2 # -*- coding: utf-8 -*-
3 #Author: Pierre Ratinaud
4 #Copyright (c) 2008-2009 Pierre Ratinaud
7 from chemins import ConstructPathOut, ChdTxtPathOut
8 from OptionAlceste import OptionAlc
11 from ConfigParser import RawConfigParser
13 from functions import sortedby, print_liste, CreateIraFile, exec_rcode, ReadDicoAsDico, check_Rresult
14 from dialog import PrefGraph
15 from layout import OpenCHDS, PrintRapport
16 from PrintRScript import RchdTxt, AlcesteTxtProf
17 from openanalyse import OpenAnalyse
18 from corpus import Corpus
19 from guifunct import getPage, getCorpus
20 from time import time, sleep
22 class AnalyseAlceste :
23 def __init__(self,parent, cmd = False, big = False):
29 #page = getPage(self.parent)
30 #if page is not None :
31 # self.corpus = getCorpus(page)
32 # print self.corpus.parametre
35 self.corpus = Corpus(parent)
36 self.corpus.parametre['encodage'] = parent.corpus_encodage
37 self.corpus.parametre['lang'] = parent.corpus_lang
38 self.corpus.parametre['filename'] = parent.filename
39 self.ConfigPath = parent.ConfigPath
40 self.DictPath = parent.DictPath
41 self.AlcesteConf = RawConfigParser()
42 self.AlcesteConf.read(self.ConfigPath['alceste'])
43 self.KeyConf = RawConfigParser()
44 self.KeyConf.read(self.ConfigPath['key'])
46 self.dial = OptionAlc(self, parent)
47 self.dial.CenterOnParent()
48 self.val = self.dial.ShowModal()
49 self.print_alceste_parametre()
53 if self.val == wx.ID_OK :
54 pathout = ConstructPathOut(self.corpus.parametre['filename'], 'alceste')
55 self.dictpathout = ChdTxtPathOut(pathout)
56 self.read_alceste_parametre()
57 self.KeyConf.read(self.ConfigPath['key'])
58 self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
59 self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
61 except AttributeError :
62 wx.MessageBox(u'Vous devez charger un corpus\nFichier -> Ouvrir un texte', u'Pas de corpus', wx.OK|wx.ICON_INFORMATION)
65 def load_corpus(self):
66 self.corpus.read_formes()
67 self.corpus.read_lems()
68 self.corpus.read_ucis_paras_uces()
69 self.corpus.read_parametre()
71 def print_alceste_parametre(self):
72 self.conf = RawConfigParser()
73 self.conf.read(self.parent.ConfigPath['alceste'])
74 lem = self.dial.radio_1.GetSelection()
75 expressions = self.dial.radio_exp.GetSelection()
76 if lem == 0 : lem = 'True'
78 if expressions == 0 : expressions = 'True'
79 else : expressions = 'False'
80 self.AlcesteConf.set('ALCESTE','lem',lem)
81 self.AlcesteConf.set('ALCESTE', 'expressions', expressions)
82 self.AlcesteConf.set('ALCESTE', 'classif_mode', str(self.dial.radio_box_2.GetSelection()))
83 self.AlcesteConf.set('ALCESTE', 'tailleuc1', str(self.dial.spin_ctrl_1.GetValue()))
84 self.AlcesteConf.set('ALCESTE', 'tailleuc2', str(self.dial.spin_ctrl_2.GetValue()))
85 self.AlcesteConf.set('ALCESTE', 'nbforme_uce', str(self.dial.spin_ctrl_3.GetValue()))
86 self.AlcesteConf.set('ALCESTE', 'mincl', str(self.dial.spin_ctrl_4.GetValue()))
87 self.AlcesteConf.set('ALCESTE', 'minforme', str(self.dial.spin_ctrl_5.GetValue()))
88 self.AlcesteConf.set('ALCESTE', 'nbcl_p1', str(self.dial.spin_nbcl.GetValue()))
89 self.AlcesteConf.set('ALCESTE', 'max_actives', str(self.dial.spin_max_actives.GetValue()))
90 with open(self.parent.ConfigPath['alceste'], 'w') as f:
91 self.AlcesteConf.write(f)
92 with open(self.parent.ConfigPath['key'], 'w') as f:
95 def read_alceste_parametre(self) :
96 self.conf = RawConfigParser()
97 self.conf.read(self.parent.ConfigPath['alceste'])
98 for option in self.conf.options('ALCESTE') :
99 if self.conf.get('ALCESTE', option).isdigit() :
100 self.corpus.parametre[option] = int(self.conf.get('ALCESTE', option))
102 self.corpus.parametre[option] = self.conf.get('ALCESTE', option)
104 list_bool = ['lem', 'expressions']
105 for var in list_bool :
106 if self.corpus.parametre[var] == 'True' :
107 self.corpus.parametre[var] = True
109 self.corpus.parametre[var] = False
111 def make_analyse(self) :
114 self.corpus.content = self.parent.content
116 self.dlg = wx.ProgressDialog("Traitements",
117 "Veuillez patienter...",
120 style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
125 ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = self.dlg, cmd = self.cmd)
126 # #print('ATTENTION PHRASE')
127 # #ucis_paras_txt = self.corpus.make_ucis_paras_txt_phrases(para_coords, ucis_lines, ucis_txt)
131 self.corpus.make_len_uce(self.corpus.get_tot_occ_from_ucis_txt(ucis_txt))
135 self.dlg.Update(5, '%i ucis - Construction des uces' % len(self.corpus.ucis))
137 if self.corpus.parametre['classif_mode'] in [0,1] :
138 self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
139 #self.corpus.make_ucis_paras_uces_sentences(ucis_paras_txt, make_uce = True)
141 self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = False)
144 print 'len(ucis_paras_uces', len(self.corpus.ucis_paras_uces)
147 self.dlg.Update(6, u'Dictionnaires')
148 uces, orderuces = self.corpus.make_forms_and_uces()
152 self.corpus.ucenb = len(uces)
153 print 'len(uces)',len(uces)
154 #ucis_paras_uces_lems = self.corpus.make_ucis_paras_uces_lems(lexique)
155 self.corpus.make_lems(self.parent.lexique)
157 #----------------------------------------------
158 #self.corpus.make_eff_min_forme()
159 #self.corpus.parametre['max_actives'] = 1500
160 self.corpus.min_eff_formes()
161 #----------------------------------------------
163 self.corpus.make_var_actives()
165 self.corpus.make_var_supp()
168 self.dlg.Update(7, u'Creation des tableaux')
169 if self.corpus.parametre['classif_mode'] == 1 :
170 #tabuc1 = self.corpus.make_sparse_matrix_with_uce(orderuces)
171 #self.corpus.write_sparse_matrix(self.dictpathout['TableUc1'], tabuc1, len(orderuces), len(self.corpus.actives))
174 self.corpus.make_and_write_sparse_matrix_from_uce(orderuces, self.dictpathout['TableUc1'])
177 #tabuc1 = self.corpus.make_table_with_uce(orderuces)
179 uces1 = [[uce, orderuces[uce]] for uce in orderuces]
180 uces1 = sortedby(uces1, 1)
181 self.corpus.lenuc1 = len(uces1)
183 self.corpus.write_tab([[u'uce',u'uc']] + [[i, i] for i in range(0,len(uces))], self.dictpathout['listeuce1'])
184 elif self.corpus.parametre['classif_mode'] == 0 :
185 self.corpus.lenuc1, uces_uc1 = self.corpus.make_uc(uces, orderuces, self.corpus.parametre['tailleuc1'])
186 print 'len uc1 : ', self.corpus.lenuc1
187 uces1 = [[uce,uces_uc1[uce]] for uce in orderuces]
188 uces1 = sortedby(uces1, 1)
189 self.corpus.write_tab([[u'uce',u'uc']] + [[i, val[1]] for i, val in enumerate(uces1)], self.dictpathout['listeuce1'])
190 #tabuc1 = self.corpus.make_tab_uc(uces_uc1, uc1)
191 self.corpus.make_and_write_sparse_matrix_from_uc(uces_uc1, self.dictpathout['TableUc1'])
192 elif self.corpus.parametre['classif_mode'] == 2 :
193 #tabuc1 = self.corpus.make_table_with_uci()
194 self.corpus.make_and_write_sparse_matrix_from_uci(self.dictpathout['TableUc1'])
196 uces1 = [[uce, orderuces[uce]] for uce in orderuces]
197 uces1 = sortedby(uces1, 1)
198 self.corpus.lenuc1 = len(uces1)
199 self.corpus.write_tab([[u'uce',u'uc']] + [[i, i] for i in range(0,len(orderuces))], self.dictpathout['listeuce1'])
201 #self.corpus.write_sparse_matrix(self.dictpathout['TableUc1'], tabuc1, len(orderuces), len(self.corpus.actives))
202 #self.corpus.write_tab(tabuc1,self.dictpathout['TableUc1'])
204 if self.corpus.parametre['classif_mode'] == 0 :
205 self.corpus.lenuc2, uces_uc2 = self.corpus.make_uc(uces, orderuces, self.corpus.parametre['tailleuc2'])
206 print 'len uc2 :', self.corpus.lenuc2
207 #tabuc2 = self.corpus.make_tab_uc(uces_uc2, uc2)
208 self.corpus.make_and_write_sparse_matrix_from_uc(uces_uc2, self.dictpathout['TableUc2'])
209 #self.corpus.write_tab(tabuc2, self.dictpathout['TableUc2'])
210 uces2 = [[uce, uces_uc2[uce]] for uce in orderuces]
211 uces2 = sortedby(uces2, 1)
212 self.corpus.write_tab([[u'uce',u'uc']] + [[i, val[1]] for i, val in enumerate(uces2)], self.dictpathout['listeuce2'])
214 if sys.platform == 'win32' :
216 if self.parent.pref.getboolean('iramuteq', 'R_mem') :
217 R_max_mem = self.parent.pref.getint('iramuteq','R_max_mem')
224 RchdTxt(self.dictpathout, self.parent.RscriptsPath, self.corpus.parametre['mincl'], self.corpus.parametre['classif_mode'], nbt = self.corpus.parametre['nbcl_p1'] - 1, libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = R_max_mem)
226 pid = exec_rcode(self.parent.RPath,self.dictpathout['Rchdtxt'],wait = False)
227 while pid.poll() == None :
229 self.dlg.Pulse(u'CHD...')
233 check_Rresult(self.parent, pid)
235 self.dlg.Update(7, u'Profils')
236 ucecl = self.corpus.read_uce_from_R(self.dictpathout['uce'])
237 ucecl0 = [cl for uce,cl in ucecl if cl != 0]
238 clnb = len(list(set(ucecl0)))
239 classes = [cl for uce, cl in ucecl]
240 uces1 = [val for val, i in uces1]
242 self.corpus.make_lc(uces1, classes, clnb)
243 self.corpus.build_profile(clnb, classes, self.corpus.actives, self.dictpathout['Contout'])
245 #passives = [lem for lem in self.corpus.lems if lem not in self.corpus.actives]
246 self.corpus.build_profile(clnb, classes, self.corpus.supp, self.dictpathout['ContSupOut'])
248 self.corpus.make_etoiles(self.corpus.para_coords)
250 self.corpus.build_profile_et(clnb, classes, uces1, self.dictpathout['ContEtOut'])
252 AlcesteTxtProf(self.dictpathout, self.parent.RscriptsPath, clnb, '0.9')
254 pid = exec_rcode(self.parent.RPath, self.dictpathout['RTxtProfGraph'],wait = False)
255 while pid.poll() == None :
257 self.dlg.Pulse(u'AFC...')
261 check_Rresult(self.parent, pid)
263 self.corpus.dictpathout = self.dictpathout
266 self.dlg.Update(8, u'rapport')
269 self.corpus.minutes, self.corpus.seconds = divmod(temps, 60)
270 self.corpus.hours, self.corpus.minutes = divmod(self.corpus.minutes, 60)
272 PrintRapport(self.corpus, 'txt')
274 CreateIraFile(self.dictpathout, clnb, os.path.basename(self.corpus.parametre['filename']))
276 self.corpus.save_corpus(self.dictpathout['db'])
278 afc_graph_list = [[os.path.basename(self.dictpathout['AFC2DL_OUT']), u'Variables actives - coordonnées - facteurs 1 / 2'],
279 [os.path.basename(self.dictpathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - facteurs 1 / 2'],
280 [os.path.basename(self.dictpathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - facteur 1 / 2'],
281 [os.path.basename(self.dictpathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2'],
282 [os.path.basename(self.dictpathout['AFC2DCoul']), u'Variables actives - Corrélation - facteur 1 / 2'],
283 [os.path.basename(self.dictpathout['AFC2DCoulSup']), u'Variables supplémentaires - Corrélation - facteur 1 / 2'],
284 [os.path.basename(self.dictpathout['AFC2DCoulEt']), u'Variables illustratives - Corrélations - facteur 1 / 2'],
285 [os.path.basename(self.dictpathout['AFC2DCoulCl']), u'Classes - Corrélations - facteurs 1 / 2'],]
286 chd_graph_list = [[os.path.basename(self.dictpathout['dendro1']), u'dendrogramme à partir de chd1']]
287 if self.corpus.parametre['classif_mode'] == 0 :
288 chd_graph_list.append([os.path.basename(self.dictpathout['dendro2']), u'dendrogramme à partir de chd2'])
289 chd_graph_list.append([os.path.basename(self.dictpathout['arbre1']), u'chd1'])
290 if self.corpus.parametre['classif_mode'] == 0 :
291 chd_graph_list.append([os.path.basename(self.dictpathout['arbre2']), u'chd2'])
292 print_liste(self.dictpathout['liste_graph_afc'],afc_graph_list)
293 print_liste(self.dictpathout['liste_graph_chd'],chd_graph_list)
295 self.dlg.Update(9, u'fin')
297 OpenAnalyse(self.parent, self.dictpathout['ira'])
299 self.corpus.make_big()