1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2011 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 from ConfigParser import RawConfigParser
11 from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from guifunct import SelectColumn, PrepSimi
14 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
15 from tableau import Tableau
16 from tabsimi import DoSimi
17 from PrintRScript import PrintSimiScript
23 logger = logging.getLogger('iramuteq.textsimi')
25 class SimiTxt(AnalyseText):
27 self.parametres['type'] = 'simitxt'
28 self.pathout.basefiles(simipath)
29 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
34 SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'])
36 prep = PrepSimi(self.ira, self.parametres, indices_simi)
37 self.parametres = prep.parametres
38 script = PrintSimiScript(self)
40 self.doR(script.scriptout)
41 if self.parametres['type_graph'] == 1:
42 if os.path.exists(self.pathout['liste_graph']):
43 graph_simi = read_list_file(self.pathout['liste_graph'])
44 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
46 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
47 print_liste(self.pathout['liste_graph'], graph_simi)
49 def preferences(self) :
50 dial = StatDialog(self, self.parent)
52 val = dial.ShowModal()
54 if dial.radio_lem.GetSelection() == 0 :
58 self.parametres['lem'] = lem
60 return self.parametres
65 def makesimiparam(self) :
66 self.paramsimi = {'coeff' : 0,
88 'cola' : (200,200,200),
96 #'ira' : self.pathout['Analyse.ira']
98 self.parametres.update(self.paramsimi)
100 def makefiles(self, lim=3) :
101 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
102 self.parametres['eff_min_forme'] = lim
103 self.parametres['nbactives'] = len(self.actives)
104 self.parametres['fromprof'] = True
105 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
106 with open(self.pathout['actives.csv'], 'w') as f :
107 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
109 self.listet = self.corpus.make_etoiles()
111 self.parametres['stars'] = copy(self.listet)
112 self.parametres['sfromchi'] = False
116 # self.tableau = Tableau(self.parent, '')
117 # self.tableau.listactives = self.actives
118 # self.tableau.parametre['fromtxt'] = True
119 # self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
120 # #print('ATTENTION ETOILES')
121 # #self.paramsimi['bystar'] = True
122 # self.tableau.listet = copy(self.listet)
123 # #self.paramsimi['cexfromchi'] = True
124 # #self.paramsimi['vlabcolor'] = True
125 # self.tableau.actives = copy(self.corpus.lems_eff)
126 # DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
129 # def __init__(self, parent, cmd = False, param = None):
130 # self.parent = parent
132 # self.ConfigPath = parent.ConfigPath
133 # self.DictPath = parent.DictPath
134 # self.KeyConf = RawConfigParser()
135 # self.KeyConf.read(self.ConfigPath['key'])
136 # self.indices = indices_simi
137 # self.paramsimi = {'coeff' : 0,
148 # 'coeff_temax' : 10,
152 # 'cexfromchi' : False,
158 # 'cols' : (255,0,0),
159 # 'cola' : (200,200,200),
164 # 'keep_coord' : True,
168 # page = getPage(self.parent)
169 # if page is not None :
170 # self.corpus = getCorpus(page)
171 # if self.corpus is not None :
172 # self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
173 # self.dictpathout = construct_simipath(self.pathout)
174 # self.val = wx.ID_OK
178 # self.corpus = Corpus(parent)
179 # self.corpus.content = self.parent.content
180 # self.corpus.parametre['encodage'] = parent.corpus_encodage
181 # self.corpus.parametre['lang'] = parent.corpus_lang
182 # self.corpus.parametre['filename'] = parent.filename
183 # self.corpus.parametre['eff_min_uce'] = None
184 # self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
185 # self.dictpathout = construct_simipath(self.pathout)
186 # dial = StatDialog(self, self.parent)
187 # dial.check_uce.SetValue(True)
188 # dial.check_uce.Enable(False)
189 # dial.OnCheckUce(wx.EVT_MENU)
190 # self.val = dial.ShowModal()
191 # if self.val == wx.ID_OK :
192 # with open(self.parent.ConfigPath['key'], 'w') as f:
193 # self.KeyConf.write(f)
194 # if dial.radio_lem.GetSelection() == 0 : lem = True
196 # if dial.exp.GetSelection() == 0 : exp = True
199 # self.corpus.parametre['lem'] = lem
200 # self.corpus.parametre['expressions'] = exp
201 # self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
202 # self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
203 # self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
210 # def make_corpus(self) :
211 # print 'make corpus'
213 # dlg = progressbar(self, maxi = 6)
214 # self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
215 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
216 # ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
220 # dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
221 # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
224 # if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
225 # self.corpus.parametre['para'] = True
227 # self.corpus.parametre['para'] = False
228 # self.corpus.make_etoiles(self.corpus.para_coords)
229 # print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
232 # dlg.Update(6, u'Dictionnaires')
233 # uces, self.orderuces = self.corpus.make_forms_and_uces()
234 # self.corpus.ucenb = len(uces)
235 # self.corpus.make_lems(self.parent.lexique)
237 # self.corpus.make_var_actives()
238 # self.corpus.make_var_supp()
239 # self.corpus.lems_eff = self.corpus.make_lem_eff()
241 # #variables = treat_var_mod(listet)
243 # #self.corpus.write_etoiles(self.dictpathout['etoiles'])
247 # def make_table(self) :
248 # if 'orderuces' not in dir(self) :
249 # self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
250 # self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
251 # self.corpus.ucenb = len(self.orderuces)
252 # #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
253 # #tabuc1.insert(0,self.corpus.actives)
254 # #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
255 # #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
256 # if self.corpus.actives is None :
257 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
258 # self.corpus.min_eff_formes()
259 # self.corpus.make_var_actives()
260 # self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
261 # #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
263 # def make_simi(self) :
264 # self.tableau = Tableau(self.parent, '')
265 # self.tableau.listactives = self.corpus.actives
266 # self.tableau.parametre['fromtxt'] = True
267 # if 'lems_eff' not in dir(self.corpus) :
268 # self.corpus.lems_eff = self.corpus.make_lem_eff()
269 # #print('ATTENTION ETOILES')
270 # #self.paramsimi['bystar'] = True
271 # self.listet = self.corpus.get_unique_etoiles()
273 # self.tableau.listet = copy(self.listet)
274 # self.paramsimi['stars'] = copy(self.listet)
275 # #self.paramsimi['cexfromchi'] = True
276 # self.paramsimi['sfromchi'] = False
277 # #self.paramsimi['vlabcolor'] = True
278 # self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
279 # self.corpus.save_corpus(self.dictpathout['corpus'])
280 # DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)