1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2013 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 #from ConfigParser import RawConfigParser
11 #from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from guifunct import SelectColumn, PrepSimi
14 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
15 #from tableau import Tableau
16 #from tabsimi import DoSimi
17 from PrintRScript import PrintSimiScript
23 log = logging.getLogger('iramuteq.textsimi')
25 class SimiTxt(AnalyseText):
27 self.parametres['type'] = 'simitxt'
28 self.pathout.basefiles(simipath)
29 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
36 #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
38 self.listet = self.corpus.make_etoiles()
40 self.stars = copy(self.listet)
41 self.parametres['stars'] = copy(self.listet)
42 self.parametres['sfromchi'] = False
43 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
44 if prep.val == wx.ID_OK :
46 self.parametres = prep.parametres
49 script = PrintSimiScript(self)
51 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') :
54 if self.parametres['type_graph'] == 1:
55 if os.path.exists(self.pathout['liste_graph']):
56 graph_simi = read_list_file(self.pathout['liste_graph'])
57 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
59 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
60 print_liste(self.pathout['liste_graph'], graph_simi)
64 # def preferences(self) :
65 # dial = StatDialog(self, self.parent)
66 # dial.CenterOnParent()
67 # val = dial.ShowModal()
69 # if dial.radio_lem.GetSelection() == 0 :
73 # self.parametres['lem'] = lem
75 # return self.parametres
80 def makesimiparam(self) :
81 self.paramsimi = {'coeff' : 0,
103 'cola' : (200,200,200),
111 #'ira' : self.pathout['Analyse.ira']
113 self.parametres.update(self.paramsimi)
115 def makefiles(self, lim=3) :
116 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
117 self.parametres['eff_min_forme'] = lim
118 self.parametres['nbactives'] = len(self.actives)
119 self.parametres['fromprof'] = False
120 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
121 with open(self.pathout['actives.csv'], 'w') as f :
122 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
126 class SimiFromCluster(SimiTxt) :
127 def __init__(self, ira, corpus, actives, numcluster, parametres = None, dlg = False) :
128 self.actives = actives
129 self.numcluster = numcluster
130 parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
131 SimiTxt.__init__(self, ira, corpus, parametres, dlg)
133 def preferences(self) :
134 return self.parametres
136 def doanalyse(self) :
137 self.parametres['type'] = 'clustersimitxt'
138 self.pathout.basefiles(simipath)
139 self.indices = indices_simi
141 if 'bystar' in self.parametres :
142 del self.parametres['bystar']
143 dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)])
146 #self.listet = self.corpus.make_etoiles()
148 self.stars = []#copy(self.listet)
149 self.parametres['stars'] = False#copy(self.listet)
150 self.parametres['sfromchi'] = True
151 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
152 if prep.val == wx.ID_OK :
154 self.parametres = prep.parametres
159 script = PrintSimiScript(self)
161 if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
163 if self.parametres['type_graph'] == 1:
164 if os.path.exists(self.pathout['liste_graph']):
165 graph_simi = read_list_file(self.pathout['liste_graph'])
166 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
168 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
169 print_liste(self.pathout['liste_graph'], graph_simi)
173 def makefiles(self) :
174 self.parametres['eff_min_forme'] = 3
175 self.parametres['nbactives'] = len(self.actives)
176 self.parametres['fromprof'] = True
177 self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
178 with open(self.pathout['actives.csv'], 'w') as f :
179 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
181 # self.tableau = Tableau(self.parent, '')
182 # self.tableau.listactives = self.actives
183 # self.tableau.parametre['fromtxt'] = True
184 # self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
185 # #print('ATTENTION ETOILES')
186 # #self.paramsimi['bystar'] = True
187 # self.tableau.listet = copy(self.listet)
188 # #self.paramsimi['cexfromchi'] = True
189 # #self.paramsimi['vlabcolor'] = True
190 # self.tableau.actives = copy(self.corpus.lems_eff)
191 # DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
194 # def __init__(self, parent, cmd = False, param = None):
195 # self.parent = parent
197 # self.ConfigPath = parent.ConfigPath
198 # self.DictPath = parent.DictPath
199 # self.KeyConf = RawConfigParser()
200 # self.KeyConf.read(self.ConfigPath['key'])
201 # self.indices = indices_simi
202 # self.paramsimi = {'coeff' : 0,
213 # 'coeff_temax' : 10,
217 # 'cexfromchi' : False,
223 # 'cols' : (255,0,0),
224 # 'cola' : (200,200,200),
229 # 'keep_coord' : True,
233 # page = getPage(self.parent)
234 # if page is not None :
235 # self.corpus = getCorpus(page)
236 # if self.corpus is not None :
237 # self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
238 # self.dictpathout = construct_simipath(self.pathout)
239 # self.val = wx.ID_OK
243 # self.corpus = Corpus(parent)
244 # self.corpus.content = self.parent.content
245 # self.corpus.parametre['encodage'] = parent.corpus_encodage
246 # self.corpus.parametre['lang'] = parent.corpus_lang
247 # self.corpus.parametre['filename'] = parent.filename
248 # self.corpus.parametre['eff_min_uce'] = None
249 # self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
250 # self.dictpathout = construct_simipath(self.pathout)
251 # dial = StatDialog(self, self.parent)
252 # dial.check_uce.SetValue(True)
253 # dial.check_uce.Enable(False)
254 # dial.OnCheckUce(wx.EVT_MENU)
255 # self.val = dial.ShowModal()
256 # if self.val == wx.ID_OK :
257 # with open(self.parent.ConfigPath['key'], 'w') as f:
258 # self.KeyConf.write(f)
259 # if dial.radio_lem.GetSelection() == 0 : lem = True
261 # if dial.exp.GetSelection() == 0 : exp = True
264 # self.corpus.parametre['lem'] = lem
265 # self.corpus.parametre['expressions'] = exp
266 # self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
267 # self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
268 # self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
275 # def make_corpus(self) :
276 # print 'make corpus'
278 # dlg = progressbar(self, maxi = 6)
279 # self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
280 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
281 # ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
285 # dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
286 # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
289 # if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
290 # self.corpus.parametre['para'] = True
292 # self.corpus.parametre['para'] = False
293 # self.corpus.make_etoiles(self.corpus.para_coords)
294 # print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
297 # dlg.Update(6, u'Dictionnaires')
298 # uces, self.orderuces = self.corpus.make_forms_and_uces()
299 # self.corpus.ucenb = len(uces)
300 # self.corpus.make_lems(self.parent.lexique)
302 # self.corpus.make_var_actives()
303 # self.corpus.make_var_supp()
304 # self.corpus.lems_eff = self.corpus.make_lem_eff()
306 # #variables = treat_var_mod(listet)
308 # #self.corpus.write_etoiles(self.dictpathout['etoiles'])
312 # def make_table(self) :
313 # if 'orderuces' not in dir(self) :
314 # self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
315 # self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
316 # self.corpus.ucenb = len(self.orderuces)
317 # #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
318 # #tabuc1.insert(0,self.corpus.actives)
319 # #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
320 # #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
321 # if self.corpus.actives is None :
322 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
323 # self.corpus.min_eff_formes()
324 # self.corpus.make_var_actives()
325 # self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
326 # #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
328 # def make_simi(self) :
329 # self.tableau = Tableau(self.parent, '')
330 # self.tableau.listactives = self.corpus.actives
331 # self.tableau.parametre['fromtxt'] = True
332 # if 'lems_eff' not in dir(self.corpus) :
333 # self.corpus.lems_eff = self.corpus.make_lem_eff()
334 # #print('ATTENTION ETOILES')
335 # #self.paramsimi['bystar'] = True
336 # self.listet = self.corpus.get_unique_etoiles()
338 # self.tableau.listet = copy(self.listet)
339 # self.paramsimi['stars'] = copy(self.listet)
340 # #self.paramsimi['cexfromchi'] = True
341 # self.paramsimi['sfromchi'] = False
342 # #self.paramsimi['vlabcolor'] = True
343 # self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
344 # self.corpus.save_corpus(self.dictpathout['corpus'])
345 # DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)