1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2011 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 from ConfigParser import RawConfigParser
11 from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from guifunct import SelectColumn, PrepSimi
14 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
15 from tableau import Tableau
16 from tabsimi import DoSimi
17 from PrintRScript import PrintSimiScript
23 logger = logging.getLogger('iramuteq.textsimi')
25 class SimiTxt(AnalyseText):
27 self.parametres['type'] = 'simitxt'
28 self.pathout.basefiles(simipath)
29 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
36 #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
38 self.listet = self.corpus.make_etoiles()
40 self.stars = copy(self.listet)
41 self.parametres['stars'] = copy(self.listet)
42 self.parametres['sfromchi'] = False
43 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
44 if prep.val == wx.ID_OK :
46 self.parametres = prep.parametres
49 script = PrintSimiScript(self)
51 if not self.doR(script.scriptout) :
53 if self.parametres['type_graph'] == 1:
54 if os.path.exists(self.pathout['liste_graph']):
55 graph_simi = read_list_file(self.pathout['liste_graph'])
56 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
58 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
59 print_liste(self.pathout['liste_graph'], graph_simi)
63 def preferences(self) :
64 dial = StatDialog(self, self.parent)
66 val = dial.ShowModal()
68 if dial.radio_lem.GetSelection() == 0 :
72 self.parametres['lem'] = lem
74 return self.parametres
79 def makesimiparam(self) :
80 self.paramsimi = {'coeff' : 0,
102 'cola' : (200,200,200),
110 #'ira' : self.pathout['Analyse.ira']
112 self.parametres.update(self.paramsimi)
114 def makefiles(self, lim=3) :
115 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
116 self.parametres['eff_min_forme'] = lim
117 self.parametres['nbactives'] = len(self.actives)
118 self.parametres['fromprof'] = True
119 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
120 with open(self.pathout['actives.csv'], 'w') as f :
121 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
127 # self.tableau = Tableau(self.parent, '')
128 # self.tableau.listactives = self.actives
129 # self.tableau.parametre['fromtxt'] = True
130 # self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
131 # #print('ATTENTION ETOILES')
132 # #self.paramsimi['bystar'] = True
133 # self.tableau.listet = copy(self.listet)
134 # #self.paramsimi['cexfromchi'] = True
135 # #self.paramsimi['vlabcolor'] = True
136 # self.tableau.actives = copy(self.corpus.lems_eff)
137 # DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
140 # def __init__(self, parent, cmd = False, param = None):
141 # self.parent = parent
143 # self.ConfigPath = parent.ConfigPath
144 # self.DictPath = parent.DictPath
145 # self.KeyConf = RawConfigParser()
146 # self.KeyConf.read(self.ConfigPath['key'])
147 # self.indices = indices_simi
148 # self.paramsimi = {'coeff' : 0,
159 # 'coeff_temax' : 10,
163 # 'cexfromchi' : False,
169 # 'cols' : (255,0,0),
170 # 'cola' : (200,200,200),
175 # 'keep_coord' : True,
179 # page = getPage(self.parent)
180 # if page is not None :
181 # self.corpus = getCorpus(page)
182 # if self.corpus is not None :
183 # self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
184 # self.dictpathout = construct_simipath(self.pathout)
185 # self.val = wx.ID_OK
189 # self.corpus = Corpus(parent)
190 # self.corpus.content = self.parent.content
191 # self.corpus.parametre['encodage'] = parent.corpus_encodage
192 # self.corpus.parametre['lang'] = parent.corpus_lang
193 # self.corpus.parametre['filename'] = parent.filename
194 # self.corpus.parametre['eff_min_uce'] = None
195 # self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
196 # self.dictpathout = construct_simipath(self.pathout)
197 # dial = StatDialog(self, self.parent)
198 # dial.check_uce.SetValue(True)
199 # dial.check_uce.Enable(False)
200 # dial.OnCheckUce(wx.EVT_MENU)
201 # self.val = dial.ShowModal()
202 # if self.val == wx.ID_OK :
203 # with open(self.parent.ConfigPath['key'], 'w') as f:
204 # self.KeyConf.write(f)
205 # if dial.radio_lem.GetSelection() == 0 : lem = True
207 # if dial.exp.GetSelection() == 0 : exp = True
210 # self.corpus.parametre['lem'] = lem
211 # self.corpus.parametre['expressions'] = exp
212 # self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
213 # self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
214 # self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
221 # def make_corpus(self) :
222 # print 'make corpus'
224 # dlg = progressbar(self, maxi = 6)
225 # self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
226 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
227 # ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
231 # dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
232 # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
235 # if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
236 # self.corpus.parametre['para'] = True
238 # self.corpus.parametre['para'] = False
239 # self.corpus.make_etoiles(self.corpus.para_coords)
240 # print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
243 # dlg.Update(6, u'Dictionnaires')
244 # uces, self.orderuces = self.corpus.make_forms_and_uces()
245 # self.corpus.ucenb = len(uces)
246 # self.corpus.make_lems(self.parent.lexique)
248 # self.corpus.make_var_actives()
249 # self.corpus.make_var_supp()
250 # self.corpus.lems_eff = self.corpus.make_lem_eff()
252 # #variables = treat_var_mod(listet)
254 # #self.corpus.write_etoiles(self.dictpathout['etoiles'])
258 # def make_table(self) :
259 # if 'orderuces' not in dir(self) :
260 # self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
261 # self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
262 # self.corpus.ucenb = len(self.orderuces)
263 # #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
264 # #tabuc1.insert(0,self.corpus.actives)
265 # #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
266 # #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
267 # if self.corpus.actives is None :
268 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
269 # self.corpus.min_eff_formes()
270 # self.corpus.make_var_actives()
271 # self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
272 # #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
274 # def make_simi(self) :
275 # self.tableau = Tableau(self.parent, '')
276 # self.tableau.listactives = self.corpus.actives
277 # self.tableau.parametre['fromtxt'] = True
278 # if 'lems_eff' not in dir(self.corpus) :
279 # self.corpus.lems_eff = self.corpus.make_lem_eff()
280 # #print('ATTENTION ETOILES')
281 # #self.paramsimi['bystar'] = True
282 # self.listet = self.corpus.get_unique_etoiles()
284 # self.tableau.listet = copy(self.listet)
285 # self.paramsimi['stars'] = copy(self.listet)
286 # #self.paramsimi['cexfromchi'] = True
287 # self.paramsimi['sfromchi'] = False
288 # #self.paramsimi['vlabcolor'] = True
289 # self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
290 # self.corpus.save_corpus(self.dictpathout['corpus'])
291 # DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)