1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2013 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 #from ConfigParser import RawConfigParser
11 #from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from guifunct import SelectColumn, PrepSimi
14 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
15 #from tableau import Tableau
16 #from tabsimi import DoSimi
17 from PrintRScript import PrintSimiScript
23 logger = logging.getLogger('iramuteq.textsimi')
25 class SimiTxt(AnalyseText):
27 self.parametres['type'] = 'simitxt'
28 self.pathout.basefiles(simipath)
29 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
36 #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
38 self.listet = self.corpus.make_etoiles()
40 self.stars = copy(self.listet)
41 self.parametres['stars'] = copy(self.listet)
42 self.parametres['sfromchi'] = False
43 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
44 if prep.val == wx.ID_OK :
46 self.parametres = prep.parametres
49 script = PrintSimiScript(self)
51 if not self.doR(script.scriptout) :
53 if self.parametres['type_graph'] == 1:
54 if os.path.exists(self.pathout['liste_graph']):
55 graph_simi = read_list_file(self.pathout['liste_graph'])
56 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
58 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
59 print_liste(self.pathout['liste_graph'], graph_simi)
63 def preferences(self) :
64 dial = StatDialog(self, self.parent)
66 val = dial.ShowModal()
68 if dial.radio_lem.GetSelection() == 0 :
72 self.parametres['lem'] = lem
74 return self.parametres
79 def makesimiparam(self) :
80 self.paramsimi = {'coeff' : 0,
102 'cola' : (200,200,200),
110 #'ira' : self.pathout['Analyse.ira']
112 self.parametres.update(self.paramsimi)
114 def makefiles(self, lim=3) :
115 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
116 self.parametres['eff_min_forme'] = lim
117 self.parametres['nbactives'] = len(self.actives)
118 self.parametres['fromprof'] = False
119 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
120 with open(self.pathout['actives.csv'], 'w') as f :
121 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
125 class SimiFromCluster(SimiTxt) :
126 def __init__(self, ira, corpus, actives, numcluster, parametres = None, dlg = False) :
127 self.actives = actives
128 self.numcluster = numcluster
129 parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
130 SimiTxt.__init__(self, ira, corpus, parametres, dlg)
132 def preferences(self) :
133 return self.parametres
135 def doanalyse(self) :
136 self.parametres['type'] = 'clustersimitxt'
137 self.pathout.basefiles(simipath)
138 self.indices = indices_simi
140 if 'bystar' in self.parametres :
141 del self.parametres['bystar']
142 dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)])
145 #self.listet = self.corpus.make_etoiles()
147 self.stars = []#copy(self.listet)
148 self.parametres['stars'] = False#copy(self.listet)
149 self.parametres['sfromchi'] = True
150 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
151 if prep.val == wx.ID_OK :
153 self.parametres = prep.parametres
158 script = PrintSimiScript(self)
160 if self.doR(script.scriptout) :
162 if self.parametres['type_graph'] == 1:
163 if os.path.exists(self.pathout['liste_graph']):
164 graph_simi = read_list_file(self.pathout['liste_graph'])
165 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
167 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
168 print_liste(self.pathout['liste_graph'], graph_simi)
172 def makefiles(self) :
173 self.parametres['eff_min_forme'] = 3
174 self.parametres['nbactives'] = len(self.actives)
175 self.parametres['fromprof'] = True
176 self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
177 with open(self.pathout['actives.csv'], 'w') as f :
178 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
180 # self.tableau = Tableau(self.parent, '')
181 # self.tableau.listactives = self.actives
182 # self.tableau.parametre['fromtxt'] = True
183 # self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
184 # #print('ATTENTION ETOILES')
185 # #self.paramsimi['bystar'] = True
186 # self.tableau.listet = copy(self.listet)
187 # #self.paramsimi['cexfromchi'] = True
188 # #self.paramsimi['vlabcolor'] = True
189 # self.tableau.actives = copy(self.corpus.lems_eff)
190 # DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
193 # def __init__(self, parent, cmd = False, param = None):
194 # self.parent = parent
196 # self.ConfigPath = parent.ConfigPath
197 # self.DictPath = parent.DictPath
198 # self.KeyConf = RawConfigParser()
199 # self.KeyConf.read(self.ConfigPath['key'])
200 # self.indices = indices_simi
201 # self.paramsimi = {'coeff' : 0,
212 # 'coeff_temax' : 10,
216 # 'cexfromchi' : False,
222 # 'cols' : (255,0,0),
223 # 'cola' : (200,200,200),
228 # 'keep_coord' : True,
232 # page = getPage(self.parent)
233 # if page is not None :
234 # self.corpus = getCorpus(page)
235 # if self.corpus is not None :
236 # self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
237 # self.dictpathout = construct_simipath(self.pathout)
238 # self.val = wx.ID_OK
242 # self.corpus = Corpus(parent)
243 # self.corpus.content = self.parent.content
244 # self.corpus.parametre['encodage'] = parent.corpus_encodage
245 # self.corpus.parametre['lang'] = parent.corpus_lang
246 # self.corpus.parametre['filename'] = parent.filename
247 # self.corpus.parametre['eff_min_uce'] = None
248 # self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
249 # self.dictpathout = construct_simipath(self.pathout)
250 # dial = StatDialog(self, self.parent)
251 # dial.check_uce.SetValue(True)
252 # dial.check_uce.Enable(False)
253 # dial.OnCheckUce(wx.EVT_MENU)
254 # self.val = dial.ShowModal()
255 # if self.val == wx.ID_OK :
256 # with open(self.parent.ConfigPath['key'], 'w') as f:
257 # self.KeyConf.write(f)
258 # if dial.radio_lem.GetSelection() == 0 : lem = True
260 # if dial.exp.GetSelection() == 0 : exp = True
263 # self.corpus.parametre['lem'] = lem
264 # self.corpus.parametre['expressions'] = exp
265 # self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
266 # self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
267 # self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
274 # def make_corpus(self) :
275 # print 'make corpus'
277 # dlg = progressbar(self, maxi = 6)
278 # self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
279 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
280 # ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
284 # dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
285 # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
288 # if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
289 # self.corpus.parametre['para'] = True
291 # self.corpus.parametre['para'] = False
292 # self.corpus.make_etoiles(self.corpus.para_coords)
293 # print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
296 # dlg.Update(6, u'Dictionnaires')
297 # uces, self.orderuces = self.corpus.make_forms_and_uces()
298 # self.corpus.ucenb = len(uces)
299 # self.corpus.make_lems(self.parent.lexique)
301 # self.corpus.make_var_actives()
302 # self.corpus.make_var_supp()
303 # self.corpus.lems_eff = self.corpus.make_lem_eff()
305 # #variables = treat_var_mod(listet)
307 # #self.corpus.write_etoiles(self.dictpathout['etoiles'])
311 # def make_table(self) :
312 # if 'orderuces' not in dir(self) :
313 # self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
314 # self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
315 # self.corpus.ucenb = len(self.orderuces)
316 # #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
317 # #tabuc1.insert(0,self.corpus.actives)
318 # #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
319 # #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
320 # if self.corpus.actives is None :
321 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
322 # self.corpus.min_eff_formes()
323 # self.corpus.make_var_actives()
324 # self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
325 # #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
327 # def make_simi(self) :
328 # self.tableau = Tableau(self.parent, '')
329 # self.tableau.listactives = self.corpus.actives
330 # self.tableau.parametre['fromtxt'] = True
331 # if 'lems_eff' not in dir(self.corpus) :
332 # self.corpus.lems_eff = self.corpus.make_lem_eff()
333 # #print('ATTENTION ETOILES')
334 # #self.paramsimi['bystar'] = True
335 # self.listet = self.corpus.get_unique_etoiles()
337 # self.tableau.listet = copy(self.listet)
338 # self.paramsimi['stars'] = copy(self.listet)
339 # #self.paramsimi['cexfromchi'] = True
340 # self.paramsimi['sfromchi'] = False
341 # #self.paramsimi['vlabcolor'] = True
342 # self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
343 # self.corpus.save_corpus(self.dictpathout['corpus'])
344 # DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)