X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=analysetxt.py;h=2811a7ed48907ccb3eb7e679b22f790ac4bbca2e;hp=0402cdc753564c6715f1162867fbaf47d0f2c38e;hb=9d5358d31d7438dfa92c9112adb2ae471ad95aae;hpb=0bb1e9556fdbb07e171b663ffcea149692a8a49f diff --git a/analysetxt.py b/analysetxt.py index 0402cdc..2811a7e 100644 --- a/analysetxt.py +++ b/analysetxt.py @@ -1,67 +1,45 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud -#from corpusNG import Corpus +#lisence : GNU GPL +#copyright : 2012-2013 (c) Pierre Ratinaud + import logging from chemins import PathOut, ChdTxtPathOut from functions import exec_rcode, check_Rresult, DoConf, print_liste from time import time, sleep from uuid import uuid4 import os -#ALCESTE from PrintRScript import RchdTxt, AlcesteTxtProf from OptionAlceste import OptionAlc from layout import PrintRapport from openanalyse import OpenAnalyse +from dialog import StatDialog from time import time -###################################### -print '#######LOGGING TEST###########' -log = logging.getLogger('iramuteq.analyse') -#formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') -#ch = logging.StreamHandler() -#ch.setFormatter(formatter) -#log.addHandler(ch) -#log.setLevel(logging.INFO) -####################################### - -#def make_ucecl_from_R(filein) : -# with open(filein, 'rU') as f : -# c = f.readlines() -# c.pop(0) -# ucecl = [] -# for line in c : -# line = line.replace('\n', '').replace('"', '').split(';') -# ucecl.append([int(line[0]) - 1, int(line[1])]) -# classesl = [val[1] for val in ucecl] -# clnb = max(classesl) -# ucecl = sorted(ucecl, key=itemgetter(1)) -# ucecl = [[uce[0] for uce in ucecl if uce[1] == i] for i in range(clnb+1)] -# return ucecl -# -#def make_lc(self, uces, classes, clnb) : -# self.lc = [[] for classe in range(0,clnb)] -# for i in range(0,clnb): -# self.lc[i] = [uce for j, uce in enumerate(uces) if i+1 == classes[j]] -# self.lc0 = [uce for j, uce in enumerate(uces) if 0 == classes[j]] +log = logging.getLogger('iramuteq.analyse') class AnalyseText : - def __init__(self, ira, corpus, parametres = None, dlg = False) : + def __init__(self, ira, corpus, parametres = None, dlg = False, lemdial = True) : self.corpus = corpus self.ira = ira self.parent = ira self.dlg = dlg self.dialok = True self.parametres = parametres - self.pathout = PathOut(corpus.parametres['originalpath'], analyse_type = parametres['type'], dirout = corpus.parametres['pathout']) - self.parametres = self.make_config(parametres) + self.lemdial = lemdial + self.val = False + if not 'pathout' in self.parametres : + self.pathout = PathOut(corpus.parametres['originalpath'], analyse_type = parametres['type'], dirout = corpus.parametres['pathout']) + else : + self.pathout = PathOut(filename = corpus.parametres['originalpath'], dirout = self.parametres['pathout'], analyse_type = self.parametres['type']) + self.parametres = self.lemparam() + if self.parametres is not None : + self.parametres = self.make_config(parametres) log.info(self.pathout.dirout) if self.parametres is not None : self.keys = DoConf(self.ira.ConfigPath['key']).getoptions() gramact = [k for k in keys if keys[k] == 1] gramsup = [k for k in keys if keys[k] == 2] - #FIXME - if not 'lem' in self.parametres : - self.parametres['lem'] = 1 self.parametres['pathout'] = self.pathout.mkdirout() self.pathout = PathOut(dirout = self.parametres['pathout']) self.pathout.createdir(self.parametres['pathout']) @@ -71,22 +49,26 @@ class AnalyseText : self.parametres['type'] = parametres['type'] self.parametres['encoding'] = self.ira.syscoding self.t1 = time() - #if self.corpus.lems is None : self.corpus.make_lems(lem = self.parametres['lem']) corpus.parse_active(gramact, gramsup) - self.doanalyse() - self.time = time() - self.t1 - minutes, seconds = divmod(self.time, 60) - hours, minutes = divmod(minutes, 60) - self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds) - self.parametres['ira'] = self.pathout['Analyse.ira'] - DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['Analyse.ira']) - self.ira.history.add(self.parametres) - if dlg : - dlg.Destroy() - OpenAnalyse(self.parent, self.parametres['ira']) - self.ira.tree.AddAnalyse(self.parametres) - self.val = 5100 + result_analyse = self.doanalyse() + if result_analyse is None : + self.time = time() - self.t1 + minutes, seconds = divmod(self.time, 60) + hours, minutes = divmod(minutes, 60) + self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds) + self.parametres['ira'] = self.pathout['Analyse.ira'] + DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['Analyse.ira']) + self.ira.history.add(self.parametres) + if dlg : + dlg.Destroy() + OpenAnalyse(self.parent, self.parametres['ira']) + self.ira.tree.AddAnalyse(self.parametres) + self.val = 5100 + else : + self.val = False + if dlg : + dlg.Destroy() else : if dlg : dlg.Destroy() @@ -95,18 +77,39 @@ class AnalyseText : def doanalyse(self) : pass + def lemparam(self) : + if self.dlg and self.lemdial: + dial = StatDialog(self, self.parent) + dial.CenterOnParent() + val = dial.ShowModal() + if val == 5100 : + if dial.radio_lem.GetSelection() == 0 : + lem = 1 + else : + lem = 0 + self.parametres['lem'] = lem + dial.Destroy() + return self.parametres + else : + dial.Destroy() + return None + else : + return self.parametres + def make_config(self, config) : if config is not None : if not self.dlg : return config else : return self.preferences() + else : + return None def readconfig(self, config) : return config def preferences(self) : - return {} + return self.parametres def printRscript(self) : pass @@ -115,18 +118,17 @@ class AnalyseText : log.info('R code...') pid = exec_rcode(self.ira.RPath, Rscript, wait = wait) while pid.poll() is None : - if dlg is not None : + if dlg : self.dlg.Pulse(message) sleep(0.2) else : sleep(0.2) - check_Rresult(self.ira, pid) + return check_Rresult(self.ira, pid) class Alceste(AnalyseText) : def doanalyse(self) : - #self.pathout = PathOut(self.corpus.parametres['filename'], 'alceste') self.parametres['type'] = 'alceste' self.pathout.basefiles(ChdTxtPathOut) self.actives, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 1) @@ -141,9 +143,8 @@ class Alceste(AnalyseText) : elif self.parametres['classif_mode'] == 2 : self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1']) Rscript = self.printRscript() - self.doR(Rscript) - #self.lc = make_ucecl_from_R(self.pathout['uce']) - #self.lc0 = self.lc.pop(0) + self.doR(Rscript, dlg = self.dlg, message = 'CHD...') + self.corpus.make_ucecl_from_R(self.pathout['uce']) self.corpus.make_and_write_profile(self.actives, self.corpus.lc, self.pathout['Contout']) self.sup, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 2) @@ -152,7 +153,7 @@ class Alceste(AnalyseText) : self.clnb = len(self.corpus.lc) self.parametres['clnb'] = self.clnb Rscript = self.printRscript2() - self.doR(Rscript) + self.doR(Rscript, dlg = self.dlg, message = 'profils et A.F.C. ...') self.time = time() - self.t1 minutes, seconds = divmod(self.time, 60) hours, minutes = divmod(minutes, 60) @@ -167,11 +168,6 @@ class Alceste(AnalyseText) : self.dial.CenterOnParent() self.dialok = self.dial.ShowModal() if self.dialok == 5100 : - if self.dial.radio_1.GetSelection() == 0 : - lem = 1 - else : - lem = 0 - parametres['lem'] = lem parametres['classif_mode'] = self.dial.radio_box_2.GetSelection() parametres['tailleuc1'] = self.dial.spin_ctrl_1.GetValue() parametres['tailleuc2'] = self.dial.spin_ctrl_2.GetValue() @@ -180,18 +176,19 @@ class Alceste(AnalyseText) : parametres['nbcl_p1'] = self.dial.spin_nbcl.GetValue() parametres['max_actives'] = self.dial.spin_max_actives.GetValue() parametres['corpus'] = '' + parametres['svdmethod'] = self.dial.svdmethod[self.dial.choicesvd.GetSelection()] parametres['pathout'] = self.pathout.dirout - for val in parametres : - print val, parametres[val] + parametres['mode.patate'] = self.dial.check_patate.GetValue() DoConf(self.parent.ConfigPath['alceste']).makeoptions(['ALCESTE'], [parametres]) self.dial.Destroy() + print parametres return parametres else : self.dial.Destroy() return None def printRscript(self) : - RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False) + RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, svdmethod = self.parametres['svdmethod'], libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False, mode_patate = self.parametres['mode.patate']) return self.pathout['Rchdtxt'] def printRscript2(self) : @@ -199,14 +196,11 @@ class Alceste(AnalyseText) : return self.pathout['RTxtProfGraph'] def print_graph_files(self) : - afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - facteurs 1 / 2'], - [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - facteurs 1 / 2'], - [os.path.basename(self.pathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - facteur 1 / 2'], - [os.path.basename(self.pathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2'], - [os.path.basename(self.pathout['AFC2DCoul']), u'Variables actives - Corrélation - facteur 1 / 2'], - [os.path.basename(self.pathout['AFC2DCoulSup']), u'Variables supplémentaires - Corrélation - facteur 1 / 2'], - [os.path.basename(self.pathout['AFC2DCoulEt']), u'Variables illustratives - Corrélations - facteur 1 / 2'], - [os.path.basename(self.pathout['AFC2DCoulCl']), u'Classes - Corrélations - facteurs 1 / 2'],] + mess_afc = u"La position des points n'est peut être pas exacte" + afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc], + [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc], + [os.path.basename(self.pathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - 30 points par classes - facteur 1 / 2 - %s' % mess_afc], + [os.path.basename(self.pathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2']] chd_graph_list = [[os.path.basename(self.pathout['dendro1']), u'dendrogramme à partir de chd1']] if self.parametres['classif_mode'] == 0 : chd_graph_list.append([os.path.basename(self.pathout['dendro2']), u'dendrogramme à partir de chd2']) @@ -249,8 +243,3 @@ keys = {'art_def' : 2, gramact = [k for k in keys if keys[k] == 1] gramsup = [k for k in keys if keys[k] == 2] - -#corpus = Corpus('', {'filename': '/home/pierre/workspace/iramuteq/dev/testcorpus.txt','formesdb':'formes.db', 'ucesdb': 'uces.db', 'corpusdb' : 'corpus.db', 'syscoding' : 'utf-8'}) -#corpus.read_corpus() -#corpus.parse_active(gramact, gramsup) -#Alceste(corpus).doanalyse()