1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2011 Pierre Ratinaud
6 from chemins import ffr, simipath
7 #from corpus import Corpus
9 from analysetxt import AnalyseText
10 from ConfigParser import RawConfigParser
11 from guifunct import getPage, getCorpus
12 from dialog import StatDialog
13 from guifunct import SelectColumn, PrepSimi
14 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
15 from tableau import Tableau
16 from tabsimi import DoSimi
17 from PrintRScript import PrintSimiScript
23 logger = logging.getLogger('iramuteq.textsimi')
25 class SimiTxt(AnalyseText):
27 self.parametres['type'] = 'simitxt'
28 self.pathout.basefiles(simipath)
29 self.indices = indices_simi
32 self.actives = self.corpus.make_actives_limit(3)
33 dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
34 SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
37 prep = PrepSimi(self.ira, self.parametres, indices_simi)
38 self.parametres = prep.parametres
39 script = PrintSimiScript(self)
41 self.doR(script.scriptout)
42 if self.parametres['type_graph'] == 1:
43 if os.path.exists(self.pathout['liste_graph']):
44 graph_simi = read_list_file(self.pathout['liste_graph'])
45 graph_simi.append([os.path.basename(script.filename), script.txtgraph])
47 graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
48 print_liste(self.pathout['liste_graph'], graph_simi)
50 def preferences(self) :
51 dial = StatDialog(self, self.parent)
53 val = dial.ShowModal()
55 if dial.radio_lem.GetSelection() == 0 :
59 self.parametres['lem'] = lem
61 return self.parametres
66 def makesimiparam(self) :
67 self.paramsimi = {'coeff' : 0,
89 'cola' : (200,200,200),
97 #'ira' : self.pathout['Analyse.ira']
99 self.parametres.update(self.paramsimi)
101 def makefiles(self, lim=3) :
102 #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
103 self.parametres['eff_min_forme'] = lim
104 self.parametres['nbactives'] = len(self.actives)
105 self.parametres['fromprof'] = True
106 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
107 with open(self.pathout['actives.csv'], 'w') as f :
108 f.write('\n'.join(self.actives).encode(self.ira.syscoding))
110 self.listet = self.corpus.make_etoiles()
112 self.parametres['stars'] = copy(self.listet)
113 self.parametres['sfromchi'] = False
117 # self.tableau = Tableau(self.parent, '')
118 # self.tableau.listactives = self.actives
119 # self.tableau.parametre['fromtxt'] = True
120 # self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
121 # #print('ATTENTION ETOILES')
122 # #self.paramsimi['bystar'] = True
123 # self.tableau.listet = copy(self.listet)
124 # #self.paramsimi['cexfromchi'] = True
125 # #self.paramsimi['vlabcolor'] = True
126 # self.tableau.actives = copy(self.corpus.lems_eff)
127 # DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
130 # def __init__(self, parent, cmd = False, param = None):
131 # self.parent = parent
133 # self.ConfigPath = parent.ConfigPath
134 # self.DictPath = parent.DictPath
135 # self.KeyConf = RawConfigParser()
136 # self.KeyConf.read(self.ConfigPath['key'])
137 # self.indices = indices_simi
138 # self.paramsimi = {'coeff' : 0,
149 # 'coeff_temax' : 10,
153 # 'cexfromchi' : False,
159 # 'cols' : (255,0,0),
160 # 'cola' : (200,200,200),
165 # 'keep_coord' : True,
169 # page = getPage(self.parent)
170 # if page is not None :
171 # self.corpus = getCorpus(page)
172 # if self.corpus is not None :
173 # self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
174 # self.dictpathout = construct_simipath(self.pathout)
175 # self.val = wx.ID_OK
179 # self.corpus = Corpus(parent)
180 # self.corpus.content = self.parent.content
181 # self.corpus.parametre['encodage'] = parent.corpus_encodage
182 # self.corpus.parametre['lang'] = parent.corpus_lang
183 # self.corpus.parametre['filename'] = parent.filename
184 # self.corpus.parametre['eff_min_uce'] = None
185 # self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
186 # self.dictpathout = construct_simipath(self.pathout)
187 # dial = StatDialog(self, self.parent)
188 # dial.check_uce.SetValue(True)
189 # dial.check_uce.Enable(False)
190 # dial.OnCheckUce(wx.EVT_MENU)
191 # self.val = dial.ShowModal()
192 # if self.val == wx.ID_OK :
193 # with open(self.parent.ConfigPath['key'], 'w') as f:
194 # self.KeyConf.write(f)
195 # if dial.radio_lem.GetSelection() == 0 : lem = True
197 # if dial.exp.GetSelection() == 0 : exp = True
200 # self.corpus.parametre['lem'] = lem
201 # self.corpus.parametre['expressions'] = exp
202 # self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
203 # self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
204 # self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
211 # def make_corpus(self) :
212 # print 'make corpus'
214 # dlg = progressbar(self, maxi = 6)
215 # self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
216 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
217 # ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
221 # dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
222 # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
225 # if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
226 # self.corpus.parametre['para'] = True
228 # self.corpus.parametre['para'] = False
229 # self.corpus.make_etoiles(self.corpus.para_coords)
230 # print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
233 # dlg.Update(6, u'Dictionnaires')
234 # uces, self.orderuces = self.corpus.make_forms_and_uces()
235 # self.corpus.ucenb = len(uces)
236 # self.corpus.make_lems(self.parent.lexique)
238 # self.corpus.make_var_actives()
239 # self.corpus.make_var_supp()
240 # self.corpus.lems_eff = self.corpus.make_lem_eff()
242 # #variables = treat_var_mod(listet)
244 # #self.corpus.write_etoiles(self.dictpathout['etoiles'])
248 # def make_table(self) :
249 # if 'orderuces' not in dir(self) :
250 # self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
251 # self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
252 # self.corpus.ucenb = len(self.orderuces)
253 # #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
254 # #tabuc1.insert(0,self.corpus.actives)
255 # #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
256 # #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
257 # if self.corpus.actives is None :
258 # self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
259 # self.corpus.min_eff_formes()
260 # self.corpus.make_var_actives()
261 # self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
262 # #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
264 # def make_simi(self) :
265 # self.tableau = Tableau(self.parent, '')
266 # self.tableau.listactives = self.corpus.actives
267 # self.tableau.parametre['fromtxt'] = True
268 # if 'lems_eff' not in dir(self.corpus) :
269 # self.corpus.lems_eff = self.corpus.make_lem_eff()
270 # #print('ATTENTION ETOILES')
271 # #self.paramsimi['bystar'] = True
272 # self.listet = self.corpus.get_unique_etoiles()
274 # self.tableau.listet = copy(self.listet)
275 # self.paramsimi['stars'] = copy(self.listet)
276 # #self.paramsimi['cexfromchi'] = True
277 # self.paramsimi['sfromchi'] = False
278 # #self.paramsimi['vlabcolor'] = True
279 # self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
280 # self.corpus.save_corpus(self.dictpathout['corpus'])
281 # DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)