1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
4 #copyright : 2012-2013 (c) Pierre Ratinaud
7 from chemins import PathOut, ChdTxtPathOut
8 from functions import exec_rcode, check_Rresult, DoConf, print_liste
9 from time import time, sleep
10 from uuid import uuid4
12 from PrintRScript import RchdTxt, AlcesteTxtProf
13 from OptionAlceste import OptionAlc
14 from layout import PrintRapport
15 from openanalyse import OpenAnalyse
16 from dialog import StatDialog
19 log = logging.getLogger('iramuteq.analyse')
22 def __init__(self, ira, corpus, parametres = None, dlg = False) :
28 self.parametres = parametres
30 if not 'pathout' in self.parametres :
31 self.pathout = PathOut(corpus.parametres['originalpath'], analyse_type = parametres['type'], dirout = corpus.parametres['pathout'])
33 self.pathout = PathOut(filename = corpus.parametres['originalpath'], dirout = self.parametres['pathout'], analyse_type = self.parametres['name'])
34 self.parametres = self.lemparam()
35 if self.parametres is not None :
36 self.parametres = self.make_config(parametres)
37 log.info(self.pathout.dirout)
38 if self.parametres is not None :
39 self.keys = DoConf(self.ira.ConfigPath['key']).getoptions()
40 gramact = [k for k in keys if keys[k] == 1]
41 gramsup = [k for k in keys if keys[k] == 2]
42 self.parametres['pathout'] = self.pathout.mkdirout()
43 self.pathout = PathOut(dirout = self.parametres['pathout'])
44 self.pathout.createdir(self.parametres['pathout'])
45 self.parametres['corpus'] = self.corpus.parametres['uuid']
46 self.parametres['uuid'] = str(uuid4())
47 self.parametres['name'] = os.path.split(self.parametres['pathout'])[1]
48 self.parametres['type'] = parametres['type']
49 self.parametres['encoding'] = self.ira.syscoding
51 self.corpus.make_lems(lem = self.parametres['lem'])
52 corpus.parse_active(gramact, gramsup)
53 result_analyse = self.doanalyse()
54 if result_analyse is None :
55 self.time = time() - self.t1
56 minutes, seconds = divmod(self.time, 60)
57 hours, minutes = divmod(minutes, 60)
58 self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds)
59 self.parametres['ira'] = self.pathout['Analyse.ira']
60 DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['Analyse.ira'])
61 self.ira.history.add(self.parametres)
64 OpenAnalyse(self.parent, self.parametres['ira'])
65 self.ira.tree.AddAnalyse(self.parametres)
81 dial = StatDialog(self, self.parent)
83 val = dial.ShowModal()
85 if dial.radio_lem.GetSelection() == 0 :
89 self.parametres['lem'] = lem
91 return self.parametres
96 return self.parametres
98 def make_config(self, config) :
99 if config is not None :
103 return self.preferences()
107 def readconfig(self, config) :
110 def preferences(self) :
111 return self.parametres
113 def printRscript(self) :
116 def doR(self, Rscript, wait = False, dlg = None, message = '') :
117 log.info('R code...')
118 pid = exec_rcode(self.ira.RPath, Rscript, wait = wait)
119 while pid.poll() is None :
121 self.dlg.Pulse(message)
125 return check_Rresult(self.ira, pid)
129 class Alceste(AnalyseText) :
130 def doanalyse(self) :
131 self.parametres['type'] = 'alceste'
132 self.pathout.basefiles(ChdTxtPathOut)
133 self.actives, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 1)
134 self.parametres['eff_min_forme'] = lim
135 self.parametres['nbactives'] = len(self.actives)
136 if self.parametres['classif_mode'] == 0 :
137 lenuc1, lenuc2 = self.corpus.make_and_write_sparse_matrix_from_uc(self.actives, self.parametres['tailleuc1'], self.parametres['tailleuc2'], self.pathout['TableUc1'], self.pathout['TableUc2'], self.pathout['listeuce1'], self.pathout['listeuce2'])
138 self.parametres['lenuc1'] = lenuc1
139 self.parametres['lenuc2'] = lenuc2
140 elif self.parametres['classif_mode'] == 1 :
141 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
142 elif self.parametres['classif_mode'] == 2 :
143 self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
144 Rscript = self.printRscript()
145 self.doR(Rscript, dlg = self.dlg, message = 'CHD...')
146 self.corpus.make_ucecl_from_R(self.pathout['uce'])
147 self.corpus.make_and_write_profile(self.actives, self.corpus.lc, self.pathout['Contout'])
148 self.sup, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 2)
149 self.corpus.make_and_write_profile(self.sup, self.corpus.lc, self.pathout['ContSupOut'])
150 self.corpus.make_and_write_profile_et(self.corpus.lc, self.pathout['ContEtOut'])
151 self.clnb = len(self.corpus.lc)
152 self.parametres['clnb'] = self.clnb
153 Rscript = self.printRscript2()
154 self.doR(Rscript, dlg = self.dlg, message = 'profils et A.F.C. ...')
155 self.time = time() - self.t1
156 minutes, seconds = divmod(self.time, 60)
157 hours, minutes = divmod(minutes, 60)
158 self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds)
159 self.print_graph_files()
161 def preferences(self) :
162 parametres = DoConf(self.parent.ConfigPath['alceste']).getoptions('ALCESTE')
163 parametres['corpus'] = self.corpus
164 parametres['pathout'] = self.pathout
165 self.dial = OptionAlc(self.parent, parametres)
166 self.dial.CenterOnParent()
167 self.dialok = self.dial.ShowModal()
168 if self.dialok == 5100 :
169 parametres['classif_mode'] = self.dial.radio_box_2.GetSelection()
170 parametres['tailleuc1'] = self.dial.spin_ctrl_1.GetValue()
171 parametres['tailleuc2'] = self.dial.spin_ctrl_2.GetValue()
172 parametres['mincl'] = self.dial.spin_ctrl_4.GetValue()
173 parametres['minforme'] = self.dial.spin_ctrl_5.GetValue()
174 parametres['nbcl_p1'] = self.dial.spin_nbcl.GetValue()
175 parametres['max_actives'] = self.dial.spin_max_actives.GetValue()
176 parametres['corpus'] = ''
177 parametres['svdmethod'] = self.dial.svdmethod[self.dial.choicesvd.GetSelection()]
178 parametres['pathout'] = self.pathout.dirout
179 parametres['mode.patate'] = self.dial.check_patate.GetValue()
180 DoConf(self.parent.ConfigPath['alceste']).makeoptions(['ALCESTE'], [parametres])
188 def printRscript(self) :
189 RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, svdmethod = self.parametres['svdmethod'], libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False, mode_patate = self.parametres['mode.patate'])
190 return self.pathout['Rchdtxt']
192 def printRscript2(self) :
193 AlcesteTxtProf(self.pathout, self.parent.RscriptsPath, self.clnb, 0.9)
194 return self.pathout['RTxtProfGraph']
196 def print_graph_files(self) :
197 mess_afc = u"La position des points n'est peut être pas exacte"
198 afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc],
199 [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc],
200 [os.path.basename(self.pathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - 30 points par classes - facteur 1 / 2 - %s' % mess_afc],
201 [os.path.basename(self.pathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2']]
202 chd_graph_list = [[os.path.basename(self.pathout['dendro1']), u'dendrogramme à partir de chd1']]
203 if self.parametres['classif_mode'] == 0 :
204 chd_graph_list.append([os.path.basename(self.pathout['dendro2']), u'dendrogramme à partir de chd2'])
205 chd_graph_list.append([os.path.basename(self.pathout['arbre1']), u'chd1'])
206 if self.parametres['classif_mode'] == 0 :
207 chd_graph_list.append([os.path.basename(self.pathout['arbre2']), u'chd2'])
208 print_liste(self.pathout['liste_graph_afc'],afc_graph_list)
209 print_liste(self.pathout['liste_graph_chd'],chd_graph_list)
210 PrintRapport(self, self.corpus, self.parametres)
213 keys = {'art_def' : 2,
242 gramact = [k for k in keys if keys[k] == 1]
243 gramsup = [k for k in keys if keys[k] == 2]