1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2011 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 from ConfigParser import RawConfigParser
11 from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from guifunct import SelectColumn, PrepSimi
14 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
15 from tableau import Tableau
16 from tabsimi import DoSimi
17 from PrintRScript import PrintSimiScript
23 logger = logging.getLogger('iramuteq.textsimi')
25 class SimiTxt(AnalyseText):
27 self.parametres['type'] = 'simitxt'
28 self.pathout.basefiles(simipath)
29 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
36 #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
38 self.listet = self.corpus.make_etoiles()
40 self.stars = copy(self.listet)
41 self.parametres['stars'] = copy(self.listet)
42 self.parametres['sfromchi'] = False
43 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
44 if prep.val == wx.ID_OK :
46 self.parametres = prep.parametres
49 script = PrintSimiScript(self)
51 self.doR(script.scriptout)
52 if self.parametres['type_graph'] == 1:
53 if os.path.exists(self.pathout['liste_graph']):
54 graph_simi = read_list_file(self.pathout['liste_graph'])
55 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
57 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
58 print_liste(self.pathout['liste_graph'], graph_simi)
62 def preferences(self) :
63 dial = StatDialog(self, self.parent)
65 val = dial.ShowModal()
67 if dial.radio_lem.GetSelection() == 0 :
71 self.parametres['lem'] = lem
73 return self.parametres
78 def makesimiparam(self) :
79 self.paramsimi = {'coeff' : 0,
101 'cola' : (200,200,200),
109 #'ira' : self.pathout['Analyse.ira']
111 self.parametres.update(self.paramsimi)
113 def makefiles(self, lim=3) :
114 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
115 self.parametres['eff_min_forme'] = lim
116 self.parametres['nbactives'] = len(self.actives)
117 self.parametres['fromprof'] = True
118 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
119 with open(self.pathout['actives.csv'], 'w') as f :
120 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
126 # self.tableau = Tableau(self.parent, '')
127 # self.tableau.listactives = self.actives
128 # self.tableau.parametre['fromtxt'] = True
129 # self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
130 # #print('ATTENTION ETOILES')
131 # #self.paramsimi['bystar'] = True
132 # self.tableau.listet = copy(self.listet)
133 # #self.paramsimi['cexfromchi'] = True
134 # #self.paramsimi['vlabcolor'] = True
135 # self.tableau.actives = copy(self.corpus.lems_eff)
136 # DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
139 # def __init__(self, parent, cmd = False, param = None):
140 # self.parent = parent
142 # self.ConfigPath = parent.ConfigPath
143 # self.DictPath = parent.DictPath
144 # self.KeyConf = RawConfigParser()
145 # self.KeyConf.read(self.ConfigPath['key'])
146 # self.indices = indices_simi
147 # self.paramsimi = {'coeff' : 0,
158 # 'coeff_temax' : 10,
162 # 'cexfromchi' : False,
168 # 'cols' : (255,0,0),
169 # 'cola' : (200,200,200),
174 # 'keep_coord' : True,
178 # page = getPage(self.parent)
179 # if page is not None :
180 # self.corpus = getCorpus(page)
181 # if self.corpus is not None :
182 # self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
183 # self.dictpathout = construct_simipath(self.pathout)
184 # self.val = wx.ID_OK
188 # self.corpus = Corpus(parent)
189 # self.corpus.content = self.parent.content
190 # self.corpus.parametre['encodage'] = parent.corpus_encodage
191 # self.corpus.parametre['lang'] = parent.corpus_lang
192 # self.corpus.parametre['filename'] = parent.filename
193 # self.corpus.parametre['eff_min_uce'] = None
194 # self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
195 # self.dictpathout = construct_simipath(self.pathout)
196 # dial = StatDialog(self, self.parent)
197 # dial.check_uce.SetValue(True)
198 # dial.check_uce.Enable(False)
199 # dial.OnCheckUce(wx.EVT_MENU)
200 # self.val = dial.ShowModal()
201 # if self.val == wx.ID_OK :
202 # with open(self.parent.ConfigPath['key'], 'w') as f:
203 # self.KeyConf.write(f)
204 # if dial.radio_lem.GetSelection() == 0 : lem = True
206 # if dial.exp.GetSelection() == 0 : exp = True
209 # self.corpus.parametre['lem'] = lem
210 # self.corpus.parametre['expressions'] = exp
211 # self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
212 # self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
213 # self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
220 # def make_corpus(self) :
221 # print 'make corpus'
223 # dlg = progressbar(self, maxi = 6)
224 # self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
225 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
226 # ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
230 # dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
231 # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
234 # if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
235 # self.corpus.parametre['para'] = True
237 # self.corpus.parametre['para'] = False
238 # self.corpus.make_etoiles(self.corpus.para_coords)
239 # print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
242 # dlg.Update(6, u'Dictionnaires')
243 # uces, self.orderuces = self.corpus.make_forms_and_uces()
244 # self.corpus.ucenb = len(uces)
245 # self.corpus.make_lems(self.parent.lexique)
247 # self.corpus.make_var_actives()
248 # self.corpus.make_var_supp()
249 # self.corpus.lems_eff = self.corpus.make_lem_eff()
251 # #variables = treat_var_mod(listet)
253 # #self.corpus.write_etoiles(self.dictpathout['etoiles'])
257 # def make_table(self) :
258 # if 'orderuces' not in dir(self) :
259 # self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
260 # self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
261 # self.corpus.ucenb = len(self.orderuces)
262 # #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
263 # #tabuc1.insert(0,self.corpus.actives)
264 # #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
265 # #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
266 # if self.corpus.actives is None :
267 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
268 # self.corpus.min_eff_formes()
269 # self.corpus.make_var_actives()
270 # self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
271 # #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
273 # def make_simi(self) :
274 # self.tableau = Tableau(self.parent, '')
275 # self.tableau.listactives = self.corpus.actives
276 # self.tableau.parametre['fromtxt'] = True
277 # if 'lems_eff' not in dir(self.corpus) :
278 # self.corpus.lems_eff = self.corpus.make_lem_eff()
279 # #print('ATTENTION ETOILES')
280 # #self.paramsimi['bystar'] = True
281 # self.listet = self.corpus.get_unique_etoiles()
283 # self.tableau.listet = copy(self.listet)
284 # self.paramsimi['stars'] = copy(self.listet)
285 # #self.paramsimi['cexfromchi'] = True
286 # self.paramsimi['sfromchi'] = False
287 # #self.paramsimi['vlabcolor'] = True
288 # self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
289 # self.corpus.save_corpus(self.dictpathout['corpus'])
290 # DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)