1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2013 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 #from ConfigParser import RawConfigParser
11 #from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from guifunct import SelectColumn, PrepSimi
14 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
15 #from tableau import Tableau
16 #from tabsimi import DoSimi
17 from PrintRScript import PrintSimiScript
23 log = logging.getLogger('iramuteq.textsimi')
25 class SimiTxt(AnalyseText):
27 self.parametres['type'] = 'simitxt'
28 self.pathout.basefiles(simipath)
29 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
36 #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
38 self.listet = self.corpus.make_etoiles()
40 self.stars = copy(self.listet)
41 self.parametres['stars'] = copy(self.listet)
42 self.parametres['sfromchi'] = False
43 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
44 if prep.val == wx.ID_OK :
46 self.parametres = prep.parametres
49 script = PrintSimiScript(self)
51 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') :
54 if self.parametres['type_graph'] == 1:
55 if os.path.exists(self.pathout['liste_graph']):
56 graph_simi = read_list_file(self.pathout['liste_graph'])
57 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
59 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
60 print_liste(self.pathout['liste_graph'], graph_simi)
64 # def preferences(self) :
65 # dial = StatDialog(self, self.parent)
66 # dial.CenterOnParent()
67 # val = dial.ShowModal()
69 # if dial.radio_lem.GetSelection() == 0 :
73 # self.parametres['lem'] = lem
75 # return self.parametres
80 def makesimiparam(self) :
81 self.paramsimi = {'coeff' : 0,
103 'cola' : (200,200,200),
111 #'ira' : self.pathout['Analyse.ira']
113 self.parametres.update(self.paramsimi)
115 def makefiles(self, lim=3) :
116 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
117 self.parametres['eff_min_forme'] = lim
118 self.parametres['nbactives'] = len(self.actives)
119 self.parametres['fromprof'] = False
120 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
121 with open(self.pathout['actives.csv'], 'w') as f :
122 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
124 class SimiFromCluster(SimiTxt) :
125 def __init__(self, ira, corpus, actives, numcluster, parametres = None, dlg = False) :
126 self.actives = actives
127 self.numcluster = numcluster
128 parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
129 SimiTxt.__init__(self, ira, corpus, parametres, dlg, lemdial = False)
131 def preferences(self) :
132 return self.parametres
134 def doanalyse(self) :
135 self.parametres['type'] = 'clustersimitxt'
136 self.pathout.basefiles(simipath)
137 self.indices = indices_simi
139 if 'bystar' in self.parametres :
140 del self.parametres['bystar']
141 dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)])
144 #self.listet = self.corpus.make_etoiles()
146 self.stars = []#copy(self.listet)
147 self.parametres['stars'] = False#copy(self.listet)
148 self.parametres['sfromchi'] = True
149 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
150 if prep.val == wx.ID_OK :
152 self.parametres = prep.parametres
157 script = PrintSimiScript(self)
159 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
161 if self.parametres['type_graph'] == 1:
162 if os.path.exists(self.pathout['liste_graph']):
163 graph_simi = read_list_file(self.pathout['liste_graph'])
164 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
166 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
167 print_liste(self.pathout['liste_graph'], graph_simi)
171 def makefiles(self) :
172 self.parametres['eff_min_forme'] = 3
173 self.parametres['nbactives'] = len(self.actives)
174 self.parametres['fromprof'] = True
175 self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
176 with open(self.pathout['actives.csv'], 'w') as f :
177 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
179 # self.tableau = Tableau(self.parent, '')
180 # self.tableau.listactives = self.actives
181 # self.tableau.parametre['fromtxt'] = True
182 # self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
183 # #print('ATTENTION ETOILES')
184 # #self.paramsimi['bystar'] = True
185 # self.tableau.listet = copy(self.listet)
186 # #self.paramsimi['cexfromchi'] = True
187 # #self.paramsimi['vlabcolor'] = True
188 # self.tableau.actives = copy(self.corpus.lems_eff)
189 # DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
192 # def __init__(self, parent, cmd = False, param = None):
193 # self.parent = parent
195 # self.ConfigPath = parent.ConfigPath
196 # self.DictPath = parent.DictPath
197 # self.KeyConf = RawConfigParser()
198 # self.KeyConf.read(self.ConfigPath['key'])
199 # self.indices = indices_simi
200 # self.paramsimi = {'coeff' : 0,
211 # 'coeff_temax' : 10,
215 # 'cexfromchi' : False,
221 # 'cols' : (255,0,0),
222 # 'cola' : (200,200,200),
227 # 'keep_coord' : True,
231 # page = getPage(self.parent)
232 # if page is not None :
233 # self.corpus = getCorpus(page)
234 # if self.corpus is not None :
235 # self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
236 # self.dictpathout = construct_simipath(self.pathout)
237 # self.val = wx.ID_OK
241 # self.corpus = Corpus(parent)
242 # self.corpus.content = self.parent.content
243 # self.corpus.parametre['encodage'] = parent.corpus_encodage
244 # self.corpus.parametre['lang'] = parent.corpus_lang
245 # self.corpus.parametre['filename'] = parent.filename
246 # self.corpus.parametre['eff_min_uce'] = None
247 # self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
248 # self.dictpathout = construct_simipath(self.pathout)
249 # dial = StatDialog(self, self.parent)
250 # dial.check_uce.SetValue(True)
251 # dial.check_uce.Enable(False)
252 # dial.OnCheckUce(wx.EVT_MENU)
253 # self.val = dial.ShowModal()
254 # if self.val == wx.ID_OK :
255 # with open(self.parent.ConfigPath['key'], 'w') as f:
256 # self.KeyConf.write(f)
257 # if dial.radio_lem.GetSelection() == 0 : lem = True
259 # if dial.exp.GetSelection() == 0 : exp = True
262 # self.corpus.parametre['lem'] = lem
263 # self.corpus.parametre['expressions'] = exp
264 # self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
265 # self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
266 # self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
273 # def make_corpus(self) :
274 # print 'make corpus'
276 # dlg = progressbar(self, maxi = 6)
277 # self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
278 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
279 # ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
283 # dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
284 # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
287 # if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
288 # self.corpus.parametre['para'] = True
290 # self.corpus.parametre['para'] = False
291 # self.corpus.make_etoiles(self.corpus.para_coords)
292 # print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
295 # dlg.Update(6, u'Dictionnaires')
296 # uces, self.orderuces = self.corpus.make_forms_and_uces()
297 # self.corpus.ucenb = len(uces)
298 # self.corpus.make_lems(self.parent.lexique)
300 # self.corpus.make_var_actives()
301 # self.corpus.make_var_supp()
302 # self.corpus.lems_eff = self.corpus.make_lem_eff()
304 # #variables = treat_var_mod(listet)
306 # #self.corpus.write_etoiles(self.dictpathout['etoiles'])
310 # def make_table(self) :
311 # if 'orderuces' not in dir(self) :
312 # self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
313 # self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
314 # self.corpus.ucenb = len(self.orderuces)
315 # #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
316 # #tabuc1.insert(0,self.corpus.actives)
317 # #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
318 # #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
319 # if self.corpus.actives is None :
320 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
321 # self.corpus.min_eff_formes()
322 # self.corpus.make_var_actives()
323 # self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
324 # #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
326 # def make_simi(self) :
327 # self.tableau = Tableau(self.parent, '')
328 # self.tableau.listactives = self.corpus.actives
329 # self.tableau.parametre['fromtxt'] = True
330 # if 'lems_eff' not in dir(self.corpus) :
331 # self.corpus.lems_eff = self.corpus.make_lem_eff()
332 # #print('ATTENTION ETOILES')
333 # #self.paramsimi['bystar'] = True
334 # self.listet = self.corpus.get_unique_etoiles()
336 # self.tableau.listet = copy(self.listet)
337 # self.paramsimi['stars'] = copy(self.listet)
338 # #self.paramsimi['cexfromchi'] = True
339 # self.paramsimi['sfromchi'] = False
340 # #self.paramsimi['vlabcolor'] = True
341 # self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
342 # self.corpus.save_corpus(self.dictpathout['corpus'])
343 # DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)