X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=analysetxt.py;h=201e1772c5fd2044f60caa12c9b569661309b113;hp=ab19b3cdbc8637c5bed69938c92638d0d0878711;hb=148fe710bf14981c45e865e8b4ddb68333e62f7c;hpb=69b6d701d4298a125c51cd0ac8e884359f93a6ad diff --git a/analysetxt.py b/analysetxt.py index ab19b3c..201e177 100644 --- a/analysetxt.py +++ b/analysetxt.py @@ -4,14 +4,12 @@ # copyright : 2012-2013 (c) Pierre Ratinaud import logging -from chemins import PathOut, ChdTxtPathOut -from functions import exec_rcode, check_Rresult, DoConf, print_liste +from chemins import PathOut +from functions import exec_rcode, check_Rresult, DoConf, ReadDicoAsDico, progressbar +from shutil import copy from time import time, sleep from uuid import uuid4 import os -from PrintRScript import RchdTxt, AlcesteTxtProf -from OptionAlceste import OptionAlc -from layout import PrintRapport from openanalyse import OpenAnalyse from dialog import StatDialog @@ -49,8 +47,19 @@ class AnalyseText : self.parametres['type'] = parametres['type'] self.parametres['encoding'] = self.ira.syscoding self.t1 = time() - self.corpus.make_lems(lem=self.parametres['lem']) + if not self.parametres.get('dictionary', False) : + self.corpus.make_lems(lem=self.parametres['lem']) + else : + print 'read new dico' + dico = ReadDicoAsDico(self.parametres['dictionary']) + self.corpus.make_lems_from_dict(dico, dolem=self.parametres['lem']) + dictname = os.path.basename(self.parametres['dictionary']) + dictpath = os.path.join(self.pathout.dirout, dictname) + copy(self.parametres['dictionary'], dictpath) + self.parametres['dictionary'] = dictpath self.corpus.parse_active(gramact, gramsup) + if dlg : + self.dlg = progressbar(self.ira, dlg) result_analyse = self.doanalyse() if result_analyse is None : self.time = time() - self.t1 @@ -61,17 +70,22 @@ class AnalyseText : DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['Analyse.ira']) self.ira.history.add(self.parametres) if dlg : - dlg.Destroy() + if not isinstance(dlg, int) : + dlg.Destroy() + self.dlg.Destroy() OpenAnalyse(self.parent, self.parametres['ira']) self.ira.tree.AddAnalyse(self.parametres) self.val = 5100 else : self.val = False if dlg : - dlg.Destroy() + try : + self.dlg.Destroy() + except : + pass else : - if dlg : - dlg.Destroy() + #if isinstance(dlg, wx.ProgressDialog) : + # self.dlg.Destroy() self.val = False def doanalyse(self) : @@ -79,7 +93,7 @@ class AnalyseText : def lemparam(self) : if self.dlg and self.lemdial: - dial = StatDialog(self, self.parent) + dial = StatDialog(self.parent, self.keys) dial.CenterOnParent() val = dial.ShowModal() if val == 5100 : @@ -88,6 +102,8 @@ class AnalyseText : else : lem = 0 self.parametres['lem'] = lem + if dial.radio_dictchoice.GetSelection() == 1 : + self.parametres['dictionary'] = dial.dictpath.GetValue() dial.Destroy() return self.parametres else : @@ -124,124 +140,3 @@ class AnalyseText : else : sleep(0.2) return check_Rresult(self.ira, pid) - - - -class Alceste(AnalyseText) : - def doanalyse(self) : - self.parametres['type'] = 'alceste' - self.pathout.basefiles(ChdTxtPathOut) - self.actives, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 1) - self.parametres['eff_min_forme'] = lim - self.parametres['nbactives'] = len(self.actives) - uci = False - if self.parametres['classif_mode'] == 0 : - lenuc1, lenuc2 = self.corpus.make_and_write_sparse_matrix_from_uc(self.actives, self.parametres['tailleuc1'], self.parametres['tailleuc2'], self.pathout['TableUc1'], self.pathout['TableUc2'], self.pathout['listeuce1'], self.pathout['listeuce2']) - self.parametres['lenuc1'] = lenuc1 - self.parametres['lenuc2'] = lenuc2 - elif self.parametres['classif_mode'] == 1 : - self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1']) - elif self.parametres['classif_mode'] == 2 : - self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1']) - uci = True - Rscript = self.printRscript() - self.doR(Rscript, dlg=self.dlg, message='CHD...') - - self.corpus.make_ucecl_from_R(self.pathout['uce']) - self.corpus.make_and_write_profile(self.actives, self.corpus.lc, self.pathout['Contout'], uci = uci) - self.sup, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 2) - self.corpus.make_and_write_profile(self.sup, self.corpus.lc, self.pathout['ContSupOut'], uci = uci) - self.corpus.make_and_write_profile_et(self.corpus.lc, self.pathout['ContEtOut'], uci = uci) - self.clnb = len(self.corpus.lc) - self.parametres['clnb'] = self.clnb - Rscript = self.printRscript2() - self.doR(Rscript, dlg=self.dlg, message='profils et A.F.C. ...') - self.time = time() - self.t1 - minutes, seconds = divmod(self.time, 60) - hours, minutes = divmod(minutes, 60) - self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds) - self.print_graph_files() - - def preferences(self) : - parametres = DoConf(self.parent.ConfigPath['alceste']).getoptions('ALCESTE') - parametres['corpus'] = self.corpus - parametres['pathout'] = self.pathout - self.dial = OptionAlc(self.parent, parametres) - self.dial.CenterOnParent() - self.dialok = self.dial.ShowModal() - if self.dialok == 5100 : - parametres['classif_mode'] = self.dial.radio_box_2.GetSelection() - parametres['tailleuc1'] = self.dial.spin_ctrl_1.GetValue() - parametres['tailleuc2'] = self.dial.spin_ctrl_2.GetValue() - parametres['mincl'] = self.dial.spin_ctrl_4.GetValue() - parametres['minforme'] = self.dial.spin_ctrl_5.GetValue() - parametres['nbcl_p1'] = self.dial.spin_nbcl.GetValue() - parametres['max_actives'] = self.dial.spin_max_actives.GetValue() - parametres['corpus'] = '' - parametres['svdmethod'] = self.dial.svdmethod[self.dial.choicesvd.GetSelection()] - parametres['pathout'] = self.pathout.dirout - parametres['mode.patate'] = self.dial.check_patate.GetValue() - DoConf(self.parent.ConfigPath['alceste']).makeoptions(['ALCESTE'], [parametres]) - self.dial.Destroy() - print parametres - return parametres - else : - self.dial.Destroy() - return None - - def printRscript(self) : - RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt=self.parametres['nbcl_p1'] - 1, svdmethod=self.parametres['svdmethod'], libsvdc=self.parent.pref.getboolean('iramuteq', 'libsvdc'), libsvdc_path=self.parent.pref.get('iramuteq', 'libsvdc_path'), R_max_mem=False, mode_patate=self.parametres['mode.patate']) - return self.pathout['Rchdtxt'] - - def printRscript2(self) : - AlcesteTxtProf(self.pathout, self.parent.RscriptsPath, self.clnb, 0.9) - return self.pathout['RTxtProfGraph'] - - def print_graph_files(self) : - mess_afc = u"La position des points n'est peut être pas exacte" - afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc], - [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc], - [os.path.basename(self.pathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - 30 points par classes - facteur 1 / 2 - %s' % mess_afc], - [os.path.basename(self.pathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2']] - chd_graph_list = [[os.path.basename(self.pathout['dendro1']), u'dendrogramme à partir de chd1']] - if self.parametres['classif_mode'] == 0 : - chd_graph_list.append([os.path.basename(self.pathout['dendro2']), u'dendrogramme à partir de chd2']) - chd_graph_list.append([os.path.basename(self.pathout['arbre1']), u'chd1']) - if self.parametres['classif_mode'] == 0 : - chd_graph_list.append([os.path.basename(self.pathout['arbre2']), u'chd2']) - print_liste(self.pathout['liste_graph_afc'], afc_graph_list) - print_liste(self.pathout['liste_graph_chd'], chd_graph_list) - PrintRapport(self, self.corpus, self.parametres) - - -# keys = {'art_def' : 2, -# 'pre' : 2, -# 'adj_dem' : 2, -# 'ono' : 2, -# 'pro_per' : 2, -# 'ver_sup' : 2, -# 'adv' : 1, -# 'ver' : 1, -# 'adj_ind' : 2, -# 'adj_pos' : 2, -# 'aux' : 2, -# 'adj_int' : 2, -# 'pro_ind' : 2, -# 'adj' : 1, -# 'pro_dem' : 2, -# 'nom' : 1, -# 'art_ind' : 2, -# 'pro_pos' : 2, -# 'nom_sup' : 2, -# 'adv_sup' : 2, -# 'adj_sup' : 2, -# 'adj_num' : 2, -# 'pro_rel' : 2, -# 'con' : 2, -# 'num' : 2, -# 'nr' : 1, -# 'sw' : 2, -# } -# -# gramact = [k for k in keys if keys[k] == 1] -# gramsup = [k for k in keys if keys[k] == 2]