1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #from corpusNG import Corpus
5 from chemins import PathOut, ChdTxtPathOut
6 from functions import exec_rcode, check_Rresult, DoConf, print_liste
7 from time import time, sleep
11 from PrintRScript import RchdTxt, AlcesteTxtProf
12 from OptionAlceste import OptionAlc
13 from layout import PrintRapport
14 from openanalyse import OpenAnalyse
16 ######################################
17 print '#######LOGGING TEST###########'
18 log = logging.getLogger('iramuteq.analyse')
19 #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20 #ch = logging.StreamHandler()
21 #ch.setFormatter(formatter)
23 #log.setLevel(logging.INFO)
24 #######################################
26 #def make_ucecl_from_R(filein) :
27 # with open(filein, 'rU') as f :
32 # line = line.replace('\n', '').replace('"', '').split(';')
33 # ucecl.append([int(line[0]) - 1, int(line[1])])
34 # classesl = [val[1] for val in ucecl]
35 # clnb = max(classesl)
36 # ucecl = sorted(ucecl, key=itemgetter(1))
37 # ucecl = [[uce[0] for uce in ucecl if uce[1] == i] for i in range(clnb+1)]
40 #def make_lc(self, uces, classes, clnb) :
41 # self.lc = [[] for classe in range(0,clnb)]
42 # for i in range(0,clnb):
43 # self.lc[i] = [uce for j, uce in enumerate(uces) if i+1 == classes[j]]
44 # self.lc0 = [uce for j, uce in enumerate(uces) if 0 == classes[j]]
48 def __init__(self, ira, corpus, parametres = None, dlg = False) :
54 self.parametres = parametres
55 self.pathout = PathOut(corpus.parametres['originalpath'], analyse_type = parametres['type'], dirout = corpus.parametres['pathout'])
56 self.parametres = self.make_config(parametres)
57 log.info(self.pathout.dirout)
58 if self.parametres is not None :
59 self.keys = DoConf(self.ira.ConfigPath['key']).getoptions()
60 gramact = [k for k in keys if keys[k] == 1]
61 gramsup = [k for k in keys if keys[k] == 2]
63 if not 'lem' in self.parametres :
64 self.parametres['lem'] = 1
65 self.parametres['pathout'] = self.pathout.mkdirout()
66 self.pathout = PathOut(dirout = self.parametres['pathout'])
67 self.pathout.createdir(self.parametres['pathout'])
68 self.parametres['corpus'] = self.corpus.parametres['uuid']
69 self.parametres['uuid'] = str(uuid4())
70 self.parametres['name'] = os.path.split(self.parametres['pathout'])[1]
71 self.parametres['type'] = parametres['type']
72 self.parametres['encoding'] = self.ira.syscoding
74 #if self.corpus.lems is None :
75 self.corpus.make_lems(lem = self.parametres['lem'])
76 corpus.parse_active(gramact, gramsup)
77 result_analyse = self.doanalyse()
78 if result_analyse is None :
79 self.time = time() - self.t1
80 minutes, seconds = divmod(self.time, 60)
81 hours, minutes = divmod(minutes, 60)
82 self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds)
83 self.parametres['ira'] = self.pathout['Analyse.ira']
84 DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['Analyse.ira'])
85 self.ira.history.add(self.parametres)
88 OpenAnalyse(self.parent, self.parametres['ira'])
89 self.ira.tree.AddAnalyse(self.parametres)
100 def doanalyse(self) :
103 def make_config(self, config) :
104 if config is not None :
108 return self.preferences()
110 def readconfig(self, config) :
113 def preferences(self) :
116 def printRscript(self) :
119 def doR(self, Rscript, wait = False, dlg = None, message = '') :
120 log.info('R code...')
121 pid = exec_rcode(self.ira.RPath, Rscript, wait = wait)
122 while pid.poll() is None :
124 self.dlg.Pulse(message)
128 check_Rresult(self.ira, pid)
132 class Alceste(AnalyseText) :
133 def doanalyse(self) :
134 #self.pathout = PathOut(self.corpus.parametres['filename'], 'alceste')
135 self.parametres['type'] = 'alceste'
136 self.pathout.basefiles(ChdTxtPathOut)
137 self.actives, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 1)
138 self.parametres['eff_min_forme'] = lim
139 self.parametres['nbactives'] = len(self.actives)
140 if self.parametres['classif_mode'] == 0 :
141 lenuc1, lenuc2 = self.corpus.make_and_write_sparse_matrix_from_uc(self.actives, self.parametres['tailleuc1'], self.parametres['tailleuc2'], self.pathout['TableUc1'], self.pathout['TableUc2'], self.pathout['listeuce1'], self.pathout['listeuce2'])
142 self.parametres['lenuc1'] = lenuc1
143 self.parametres['lenuc2'] = lenuc2
144 elif self.parametres['classif_mode'] == 1 :
145 self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
146 elif self.parametres['classif_mode'] == 2 :
147 self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
148 Rscript = self.printRscript()
150 #self.lc = make_ucecl_from_R(self.pathout['uce'])
151 #self.lc0 = self.lc.pop(0)
152 self.corpus.make_ucecl_from_R(self.pathout['uce'])
153 self.corpus.make_and_write_profile(self.actives, self.corpus.lc, self.pathout['Contout'])
154 self.sup, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 2)
155 self.corpus.make_and_write_profile(self.sup, self.corpus.lc, self.pathout['ContSupOut'])
156 self.corpus.make_and_write_profile_et(self.corpus.lc, self.pathout['ContEtOut'])
157 self.clnb = len(self.corpus.lc)
158 self.parametres['clnb'] = self.clnb
159 Rscript = self.printRscript2()
161 self.time = time() - self.t1
162 minutes, seconds = divmod(self.time, 60)
163 hours, minutes = divmod(minutes, 60)
164 self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds)
165 self.print_graph_files()
167 def preferences(self) :
168 parametres = DoConf(self.parent.ConfigPath['alceste']).getoptions('ALCESTE')
169 parametres['corpus'] = self.corpus
170 parametres['pathout'] = self.pathout
171 self.dial = OptionAlc(self.parent, parametres)
172 self.dial.CenterOnParent()
173 self.dialok = self.dial.ShowModal()
174 if self.dialok == 5100 :
175 if self.dial.radio_1.GetSelection() == 0 :
179 parametres['lem'] = lem
180 parametres['classif_mode'] = self.dial.radio_box_2.GetSelection()
181 parametres['tailleuc1'] = self.dial.spin_ctrl_1.GetValue()
182 parametres['tailleuc2'] = self.dial.spin_ctrl_2.GetValue()
183 parametres['mincl'] = self.dial.spin_ctrl_4.GetValue()
184 parametres['minforme'] = self.dial.spin_ctrl_5.GetValue()
185 parametres['nbcl_p1'] = self.dial.spin_nbcl.GetValue()
186 parametres['max_actives'] = self.dial.spin_max_actives.GetValue()
187 parametres['corpus'] = ''
188 parametres['pathout'] = self.pathout.dirout
189 for val in parametres :
190 print val, parametres[val]
191 DoConf(self.parent.ConfigPath['alceste']).makeoptions(['ALCESTE'], [parametres])
198 def printRscript(self) :
199 RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False)
200 return self.pathout['Rchdtxt']
202 def printRscript2(self) :
203 AlcesteTxtProf(self.pathout, self.parent.RscriptsPath, self.clnb, 0.9)
204 return self.pathout['RTxtProfGraph']
206 def print_graph_files(self) :
207 mess_afc = u"La position des points n'est peut être pas exacte"
208 afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc],
209 [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc],
210 [os.path.basename(self.pathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - 30 points par classes - facteur 1 / 2 - %s' % mess_afc],
211 [os.path.basename(self.pathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2']]
212 #[os.path.basename(self.pathout['AFC2DCoul']), u'Variables actives - Corrélation - facteur 1 / 2'],
213 #[os.path.basename(self.pathout['AFC2DCoulSup']), u'Variables supplémentaires - Corrélation - facteur 1 / 2'],
214 #[os.path.basename(self.pathout['AFC2DCoulEt']), u'Variables illustratives - Corrélations - facteur 1 / 2'],
215 #[os.path.basename(self.pathout['AFC2DCoulCl']), u'Classes - Corrélations - facteurs 1 / 2'],]
216 chd_graph_list = [[os.path.basename(self.pathout['dendro1']), u'dendrogramme à partir de chd1']]
217 if self.parametres['classif_mode'] == 0 :
218 chd_graph_list.append([os.path.basename(self.pathout['dendro2']), u'dendrogramme à partir de chd2'])
219 chd_graph_list.append([os.path.basename(self.pathout['arbre1']), u'chd1'])
220 if self.parametres['classif_mode'] == 0 :
221 chd_graph_list.append([os.path.basename(self.pathout['arbre2']), u'chd2'])
222 print_liste(self.pathout['liste_graph_afc'],afc_graph_list)
223 print_liste(self.pathout['liste_graph_chd'],chd_graph_list)
224 PrintRapport(self, self.corpus, self.parametres)
227 keys = {'art_def' : 2,
256 gramact = [k for k in keys if keys[k] == 1]
257 gramsup = [k for k in keys if keys[k] == 2]
259 #corpus = Corpus('', {'filename': '/home/pierre/workspace/iramuteq/dev/testcorpus.txt','formesdb':'formes.db', 'ucesdb': 'uces.db', 'corpusdb' : 'corpus.db', 'syscoding' : 'utf-8'})
260 #corpus.read_corpus()
261 #corpus.parse_active(gramact, gramsup)
262 #Alceste(corpus).doanalyse()