1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2011 Pierre Ratinaud
6 from chemins import ConstructPathOut, construct_simipath
7 from corpus import Corpus
9 from analysetxt import AnalyseText
10 from ConfigParser import RawConfigParser
11 from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from functions import indices_simi, progressbar, treat_var_mod
14 from tableau import Tableau
15 from tabsimi import DoSimi
16 from PrintRScript import PrintRScript
22 logger = logging.getLogger('iramuteq.textsimi')
26 class SimiTxt(AnalyseText):
28 self.indices = indices_simi
31 prep = PrepSimi(self.ira, self.parametres, indices_simi)
32 self.parametres = prep.parametres
33 script = PrintSimScript(self)
36 def preferences(self) :
37 dial = StatDialog(self, self.parent)
39 val = dial.ShowModal()
41 if dial.radio_lem.GetSelection() == 0 :
45 self.parametres['lem'] = lem
47 return self.parametres
52 def makesimiparam(self) :
53 self.paramsimi = {'coeff' : 0,
75 'cola' : (200,200,200),
83 #'ira' : self.pathout['Analyse.ira']
85 self.parametres.update(self.paramsimi)
88 self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
89 self.parametres['eff_min_forme'] = lim
90 self.parametres['nbactives'] = len(self.actives)
91 self.parametres['fromprof'] = True
92 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
93 with open(self.pathout['actives.csv'], 'w') as f :
94 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
96 self.listet = self.corpus.make_etoiles()
98 self.parametres['stars'] = copy(self.listet)
99 self.parametres['sfromchi'] = False
102 def _init_(self, parent, parametres, indices_simi) :
103 self.parametres = parametres
104 self.dial = PrefSimi(parent, -1, self.parametres, indices_simi)
105 self.dial.CenterOnParent()
106 self.val = self.dial.ShowModal()
107 if self.val == wx.ID_OK :
110 def make_param(self) :
111 self.select = self.dial.check_colch.GetValue()
112 param = {'coeff' : self.dial.choice1.GetSelection(),
113 'layout' : self.dial.choice2.GetSelection(),
114 'type' : self.dial.choice3.GetSelection(),
115 'arbremax' : self.dial.check1.GetValue(),
116 'coeff_tv' : self.dial.check_s_size.GetValue(),
117 'coeff_tv_nb' : self.dial.spin_tv.GetValue(),
118 'tvprop' : self.dial.check2.GetValue(),
119 'tvmin' : self.dial.spin_tvmin.GetValue(),
120 'tvmax' : self.dial.spin_tvmax.GetValue(),
121 'coeff_te' : self.dial.check3.GetValue(),
122 'coeff_temin' : self.dial.spin_temin.GetValue(),
123 'coeff_temax' : self.dial.spin_temax.GetValue(),
124 'label_e' : self.dial.check_elab.GetValue(),
125 'label_v' : self.dial.check_vlab.GetValue(),
126 'vcex' : self.dial.check_vcex.GetValue(),
127 'vcexmin' : self.dial.spin_vcexmin.GetValue(),
128 'vcexmax' : self.dial.spin_vcexmax.GetValue(),
129 'cex' : self.dial.spin_cex.GetValue(),
130 'seuil_ok' : self.dial.check_seuil.GetValue(),
131 'seuil' : self.dial.spin_seuil.GetValue(),
132 'cols' : self.dial.cols.GetColour(),
133 'cola' : self.dial.cola.GetColour(),
134 'width' : self.dial.spin_width.GetValue(),
135 'height' : self.dial.spin_height.GetValue(),
137 'keep_coord' : keep_coord,
138 'alpha' : self.dial.slider_sphere.GetValue(),
139 'film' : self.dial.film.GetValue()
141 if 'cexfromchi' in self.parametres :
142 param['cexfromchi'] = self.dial.checkit.GetValue()
143 if 'sfromchi' in self.parametres :
144 param['sfromchi'] = self.dial.checki.GetValue()
145 if 'vlabcolor' in self.parametres :
146 param['vlabcolor'] = self.parametres['vlabcolor']
147 if 'check_bystar' in dir(self.dial) :
148 param['bystar'] = self.dial.check_bystar.GetValue()
149 param['stars'] = self.parametres['stars']
150 self.parametres.update(param)
152 class PrintSimiScript(PrintRScript) :
153 def make_script(self) :
154 self.load(['igraph', 'proxy', 'Matrix'])
155 self.source([self.analyse.parent.RscriptsPath['simi'], self.analyse.parent.RscriptsPath['Rgraph']])
160 """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv'])
163 dm <- dm[, selected.col+1]
165 if self.parametres['coeff'] == 0 :
175 if self.parametres['coeff'] == 1 :
179 mat <- simil(dm, method = 'Russel', diag = TRUE, upper = TRUE, by_rows = FALSE)
181 elif self.analyses.indices[self.parametres['coeff']] == 'binomial' :
188 method = self.types[self.paramsimi['coeff']]
191 mat <- simil(dm, method = method, diag = TRUE, upper = TRUE, by_rows = FALSE)
192 """ % self.analyse.indices[self.parametres['coeff']]
194 mat <- as.matrix(stats::as.dist(mat,diag=TRUE,upper=TRUE))
196 mat[is.infinite(mat)] <- 0
198 if self.parametres['layout'] == 0 : layout = 'random'
199 if self.parametres['layout'] == 1 : layout = 'circle'
200 if self.parametres['layout'] == 2 : layout = 'frutch'
201 if self.parametres['layout'] == 3 : layout = 'kawa'
202 if self.parametres['layout'] == 4 : layout = 'graphopt'
206 g.ori <- graph.adjacency(mat, mode='lower', weighted = TRUE)
207 w.ori <- E(g.ori)$weight
209 if (method == 'cooc') {
210 E(g.ori)$weight <- 1 / w.ori
212 E(g.ori)$weigth <- 1 - w.ori
214 g.max <- minimum.spanning.tree(g.ori)
215 if (method == 'cooc') {
216 E(g.max)$weight <- 1 / E(g.max)$weight
218 E(g.max)$weight <- 1 - E(g.max)$weight
228 self.tableau = Tableau(self.parent, '')
229 self.tableau.listactives = self.actives
230 self.tableau.parametre['fromtxt'] = True
231 self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
232 #print('ATTENTION ETOILES')
233 #self.paramsimi['bystar'] = True
234 self.tableau.listet = copy(self.listet)
235 #self.paramsimi['cexfromchi'] = True
236 #self.paramsimi['vlabcolor'] = True
237 self.tableau.actives = copy(self.corpus.lems_eff)
238 DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
241 # def __init__(self, parent, cmd = False, param = None):
242 # self.parent = parent
244 # self.ConfigPath = parent.ConfigPath
245 # self.DictPath = parent.DictPath
246 # self.KeyConf = RawConfigParser()
247 # self.KeyConf.read(self.ConfigPath['key'])
248 # self.indices = indices_simi
249 # self.paramsimi = {'coeff' : 0,
260 # 'coeff_temax' : 10,
264 # 'cexfromchi' : False,
270 # 'cols' : (255,0,0),
271 # 'cola' : (200,200,200),
276 # 'keep_coord' : True,
280 # page = getPage(self.parent)
281 # if page is not None :
282 # self.corpus = getCorpus(page)
283 # if self.corpus is not None :
284 # self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
285 # self.dictpathout = construct_simipath(self.pathout)
286 # self.val = wx.ID_OK
290 # self.corpus = Corpus(parent)
291 # self.corpus.content = self.parent.content
292 # self.corpus.parametre['encodage'] = parent.corpus_encodage
293 # self.corpus.parametre['lang'] = parent.corpus_lang
294 # self.corpus.parametre['filename'] = parent.filename
295 # self.corpus.parametre['eff_min_uce'] = None
296 # self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
297 # self.dictpathout = construct_simipath(self.pathout)
298 # dial = StatDialog(self, self.parent)
299 # dial.check_uce.SetValue(True)
300 # dial.check_uce.Enable(False)
301 # dial.OnCheckUce(wx.EVT_MENU)
302 # self.val = dial.ShowModal()
303 # if self.val == wx.ID_OK :
304 # with open(self.parent.ConfigPath['key'], 'w') as f:
305 # self.KeyConf.write(f)
306 # if dial.radio_lem.GetSelection() == 0 : lem = True
308 # if dial.exp.GetSelection() == 0 : exp = True
311 # self.corpus.parametre['lem'] = lem
312 # self.corpus.parametre['expressions'] = exp
313 # self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
314 # self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
315 # self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
322 # def make_corpus(self) :
323 # print 'make corpus'
325 # dlg = progressbar(self, maxi = 6)
326 # self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
327 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
328 # ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
332 # dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
333 # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
336 # if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
337 # self.corpus.parametre['para'] = True
339 # self.corpus.parametre['para'] = False
340 # self.corpus.make_etoiles(self.corpus.para_coords)
341 # print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
344 # dlg.Update(6, u'Dictionnaires')
345 # uces, self.orderuces = self.corpus.make_forms_and_uces()
346 # self.corpus.ucenb = len(uces)
347 # self.corpus.make_lems(self.parent.lexique)
349 # self.corpus.make_var_actives()
350 # self.corpus.make_var_supp()
351 # self.corpus.lems_eff = self.corpus.make_lem_eff()
353 # #variables = treat_var_mod(listet)
355 # #self.corpus.write_etoiles(self.dictpathout['etoiles'])
359 # def make_table(self) :
360 # if 'orderuces' not in dir(self) :
361 # self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
362 # self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
363 # self.corpus.ucenb = len(self.orderuces)
364 # #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
365 # #tabuc1.insert(0,self.corpus.actives)
366 # #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
367 # #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
368 # if self.corpus.actives is None :
369 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
370 # self.corpus.min_eff_formes()
371 # self.corpus.make_var_actives()
372 # self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
373 # #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
375 # def make_simi(self) :
376 # self.tableau = Tableau(self.parent, '')
377 # self.tableau.listactives = self.corpus.actives
378 # self.tableau.parametre['fromtxt'] = True
379 # if 'lems_eff' not in dir(self.corpus) :
380 # self.corpus.lems_eff = self.corpus.make_lem_eff()
381 # #print('ATTENTION ETOILES')
382 # #self.paramsimi['bystar'] = True
383 # self.listet = self.corpus.get_unique_etoiles()
385 # self.tableau.listet = copy(self.listet)
386 # self.paramsimi['stars'] = copy(self.listet)
387 # #self.paramsimi['cexfromchi'] = True
388 # self.paramsimi['sfromchi'] = False
389 # #self.paramsimi['vlabcolor'] = True
390 # self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
391 # self.corpus.save_corpus(self.dictpathout['corpus'])
392 # DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)