X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=textdist.py;h=33dd2ced2c678bae50760b8972606ac364d3d3d6;hp=c7fc1337b3843db8e1c114ec912d89cf70ced446;hb=9b78e6210e7fc88a7e77d178c4090aabb23580d9;hpb=8fa853a25a9d62b1446e1bc543e5a3a4d0e03dcf diff --git a/textdist.py b/textdist.py index c7fc133..33dd2ce 100644 --- a/textdist.py +++ b/textdist.py @@ -2,54 +2,66 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2009 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ConstructPathOut, ConstructAfcUciPath, ChdTxtPathOut from corpus import Corpus from OptionAlceste import OptionPam +from analysetxt import AnalyseText import wx import os from ConfigParser import * import sys from functions import print_liste, exec_rcode, CreateIraFile, progressbar, check_Rresult, BugDialog from layout import PrintRapport -from PrintRScript import AlcesteTxtProf, RPamTxt +from PrintRScript import ReinertTxtProf, RPamTxt from openanalyse import OpenAnalyse from time import time, sleep -class AnalysePam: - def __init__(self, parent, corpus, cmd = False): - t1 = time() - self.parent = parent - self.corpus = corpus - self.cmd = cmd +class AnalysePam(AnalyseText) : +# def __init__(self, parent, corpus, cmd = False): + def doanalyse(self) : + self.parametres['type'] = 'pamtxt' + self.pathout.basefiles(ChdTxtPathOut) + self.actives, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 1) + self.parametres['eff_min_forme'] = lim + self.parametres['nbactives'] = len(self.actives) + if self.parametres['classif_mode'] == 0 : + self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1']) + elif self.parametres['classif_mode'] == 1 : + self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1']) + RPamTxt(self.corpus, self.parent.RscriptsPath) + #t1 = time() + #self.parent = parent + #self.corpus = corpus + #self.cmd = cmd if not self.cmd : self.dlg = progressbar(self, 9) - else : - self.dlg = None - ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = self.dlg, cmd = self.cmd) - self.corpus.make_len_uce(self.corpus.get_tot_occ_from_ucis_txt(ucis_txt)) - del ucis_txt - if not self.cmd : - self.dlg.Update(5, '%i ucis - Construction des uces' % len(self.corpus.ucis)) - if self.corpus.parametre['type'] == 0 : - self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True) - else : - self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = False) - del ucis_paras_txt + #else : + # self.dlg = None + #ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = self.dlg, cmd = self.cmd) + #self.corpus.make_len_uce(self.corpus.get_tot_occ_from_ucis_txt(ucis_txt)) + #del ucis_txt + #if not self.cmd : + # self.dlg.Update(5, '%i ucis - Construction des uces' % len(self.corpus.ucis)) + #if self.corpus.parametre['type'] == 0 : + # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True) + #else : + # self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = False) + #del ucis_paras_txt - if not self.cmd : - self.dlg.Update(6, u'Dictionnaires') - uces, orderuces = self.corpus.make_forms_and_uces() - self.corpus.ucenb = len(uces) - self.corpus.make_lems(self.parent.lexique) - self.corpus.min_eff_formes() - self.corpus.make_var_actives() - self.corpus.make_var_supp() + #if not self.cmd : + # self.dlg.Update(6, u'Dictionnaires') + #uces, orderuces = self.corpus.make_forms_and_uces() + #self.corpus.ucenb = len(uces) + #self.corpus.make_lems(self.parent.lexique) + #self.corpus.min_eff_formes() + #self.corpus.make_var_actives() + #self.corpus.make_var_supp() - if not self.cmd : - self.dlg.Update(7, u'Creation des tableaux') + #if not self.cmd : + # self.dlg.Update(7, u'Creation des tableaux') if self.corpus.parametre['type'] == 0: tabuc1 = self.corpus.make_table_with_uce(orderuces) uc1 = None @@ -63,58 +75,61 @@ class AnalysePam: self.corpus.write_tab(tabuc1,self.corpus.dictpathout['TableUc1']) self.corpus.lenuc1 = len(tabuc1) del tabuc1, uc1 - RPamTxt(self.corpus, self.parent.RscriptsPath) - pid = exec_rcode(self.parent.RPath,self.corpus.dictpathout['Rchdtxt'], wait = False) - while pid.poll() == None : - if not self.cmd : - self.dlg.Pulse(u'CHD...') - sleep(0.2) - else : - pass - check_Rresult(self.parent, pid) - ucecl = self.corpus.read_uce_from_R(self.corpus.dictpathout['uce']) - ucecl0 = [cl for uce,cl in ucecl if cl != 0] - clnb = len(list(set(ucecl0))) - classes = [cl for uce, cl in ucecl] - uces1 = [val for val, i in uces1] - self.corpus.make_lc(uces1, classes, clnb) - self.corpus.build_profile(clnb, classes, self.corpus.actives, self.corpus.dictpathout['Contout']) + RPamTxt(self, self.parent.RscriptsPath) + self.DoR(self.pathout['Rchdtxt'], dlg = self.dlg, message = 'R...') + #pid = exec_rcode(self.parent.RPath,self.pathout['Rchdtxt'], wait = False) + #while pid.poll() == None : + # if not self.cmd : + # self.dlg.Pulse(u'CHD...') + # sleep(0.2) + # else : + # pass + #check_Rresult(self.parent, pid) + self.corpus.make_ucecl_from_R(self.pathout['uce']) + #ucecl0 = [cl for uce,cl in ucecl if cl != 0] + #clnb = len(list(set(ucecl0))) + #classes = [cl for uce, cl in ucecl] + #uces1 = [val for val, i in uces1] + #self.corpus.make_lc(uces1, classes, clnb) + #self.corpus.build_profile(clnb, classes, self.corpus.actives, self.corpus.dictpathout['Contout']) + self.corpus.make_and_write_profile(self.actives, self.corpus.lc, self.pathout['Contout']) + self.sup, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 2) + self.corpus.make_and_write_profile(self.sup, self.corpus.lc, self.pathout['ContSupOut']) + self.corpus.make_and_write_profile_et(self.corpus.lc, self.pathout['ContEtOut']) + self.clnb = len(self.corpus.lc) + self.parametres['clnb'] = self.clnb #passives = [lem for lem in self.corpus.lems if lem not in self.corpus.actives] - self.corpus.build_profile(clnb, classes, self.corpus.supp, self.corpus.dictpathout['ContSupOut']) - self.corpus.make_etoiles(self.corpus.para_coords) - self.corpus.build_profile_et(clnb, classes, uces1, self.corpus.dictpathout['ContEtOut']) - AlcesteTxtProf(self.corpus.dictpathout, self.parent.RscriptsPath, clnb, '0.9') - pid = exec_rcode(self.parent.RPath, self.corpus.dictpathout['RTxtProfGraph'], wait = False) - while pid.poll() == None : - if not self.cmd : - self.dlg.Pulse(u'AFC...') - sleep(0.2) - else : - pass - check_Rresult(self.parent, pid) - temps = time() - t1 - self.corpus.minutes, self.corpus.seconds = divmod(temps, 60) - self.corpus.hours, self.corpus.minutes = divmod(self.corpus.minutes, 60) - PrintRapport(self.corpus, 'txt') - CreateIraFile(self.corpus.dictpathout, clnb, os.path.basename(self.corpus.parametre['filename'])) + #self.corpus.build_profile(clnb, classes, self.corpus.supp, self.corpus.dictpathout['ContSupOut']) + #self.corpus.make_etoiles(self.corpus.para_coords) + #self.corpus.build_profile_et(clnb, classes, uces1, self.corpus.dictpathout['ContEtOut']) + AlcesteTxtProf(self.pathout, self.parent.RscriptsPath, clnb, '0.9') + self.doR(self.pathout['RTxtProfGraph'], dlg = self.dlg, message = 'profils et A.F.C. ...') + #pid = exec_rcode(self.parent.RPath, self.corpus.dictpathout['RTxtProfGraph'], wait = False) + #while pid.poll() == None : + # if not self.cmd : + # self.dlg.Pulse(u'AFC...') + # sleep(0.2) + # else : + # pass + #check_Rresult(self.parent, pid) + #temps = time() - t1 + #self.corpus.minutes, self.corpus.seconds = divmod(temps, 60) + #self.corpus.hours, self.corpus.minutes = divmod(self.corpus.minutes, 60) + PrintRapport(self, self.corpus, self.parametres) + #CreateIraFile(self.corpus.dictpathout, clnb, os.path.basename(self.corpus.parametre['filename'])) self.corpus.save_corpus(self.corpus.dictpathout['db']) afc_graph_list = [[os.path.basename(self.corpus.dictpathout['AFC2DL_OUT']), u'Variables actives - coordonnées - facteurs 1 / 2'], [os.path.basename(self.corpus.dictpathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - facteurs 1 / 2'], [os.path.basename(self.corpus.dictpathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - facteur 1 / 2'], - [os.path.basename(self.corpus.dictpathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2'], - [os.path.basename(self.corpus.dictpathout['AFC2DCoul']), u'Variables actives - Corrélation - facteur 1/2'], - [os.path.basename(self.corpus.dictpathout['AFC2DCoulSup']), u'Variables supplémentaires - Corrélation - facteur 1 / 2'], - [os.path.basename(self.corpus.dictpathout['AFC2DCoulEt']), u'Variables illustratives - Corrélations - facteur 1 / 2'], - [os.path.basename(self.corpus.dictpathout['AFC2DCoulCl']), u'Classes - Corrélations - facteurs 1 / 2'],] + [os.path.basename(self.corpus.dictpathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2']] chd_graph_list = [[os.path.basename(self.corpus.dictpathout['arbre1']), u'résultats de la classification']] - print_liste(self.corpus.dictpathout['liste_graph_afc'],afc_graph_list) - print_liste(self.corpus.dictpathout['liste_graph_chd'],chd_graph_list) - if not self.cmd : - self.dlg.Update(9, u'fin') - self.dlg.Destroy() - OpenAnalyse(self.parent, self.corpus.dictpathout['ira']) - print 'fini' + print_liste(self.pathout['liste_graph_afc'],afc_graph_list) + print_liste(self.pathout['liste_graph_chd'],chd_graph_list) + #if not self.cmd : + # self.dlg.Update(9, u'fin') + # self.dlg.Destroy() + #print 'fini'