X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=textsimi.py;h=ea1d05a28a9dba2f7a80f0e96c8cc631302066d5;hp=0bebb958d5f3e120e11b0b50bb35aea94b1372ff;hb=148fe710bf14981c45e865e8b4ddb68333e62f7c;hpb=ad8fe486b34f1cee918ea8564bf909e30cd25328 diff --git a/textsimi.py b/textsimi.py index 0bebb95..ea1d05a 100644 --- a/textsimi.py +++ b/textsimi.py @@ -1,23 +1,18 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2013 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ffr, simipath -#from corpus import Corpus import os from analysetxt import AnalyseText -#from ConfigParser import RawConfigParser -#from guifunct import getPage, getCorpus -from dialog import StatDialog -from guifunct import SelectColumn, PrepSimi -from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste -#from tableau import Tableau -#from tabsimi import DoSimi -from PrintRScript import PrintSimiScript +from guifunct import PrepSimi +from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste, DoConf, exec_rcode, check_Rresult +from PrintRScript import PrintSimiScript import wx from copy import copy - +from operator import itemgetter +import codecs import logging log = logging.getLogger('iramuteq.textsimi') @@ -27,23 +22,33 @@ class SimiTxt(AnalyseText): self.parametres['type'] = 'simitxt' self.pathout.basefiles(simipath) self.indices = indices_simi - self.makesimiparam() + if self.dlg : + self.makesimiparam() #FIXME self.actives = self.corpus.make_actives_limit(3) dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)]) continu = False if self.dlg : - #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg) - #if cont.ok : self.listet = self.corpus.make_etoiles() self.listet.sort() self.stars = copy(self.listet) self.parametres['stars'] = copy(self.listet) self.parametres['sfromchi'] = False + self.dlg.Destroy() prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol) if prep.val == wx.ID_OK : continu = True self.parametres = prep.parametres + self.dlg = progressbar(self.ira, 4) + else : + return False + else : + order_actives = [[i, act, self.corpus.getlemeff(act)] for i, act in enumerate(self.actives)] + order_actives = sorted(order_actives, key=itemgetter(2), reverse = True) + with open(self.pathout['selected.csv'], 'w') as f : + f.write('\n'.join([`order_actives[val][0]` for val in self.parametres['selected']])) + continu = True + if continu : self.makefiles() script = PrintSimiScript(self) @@ -52,31 +57,20 @@ class SimiTxt(AnalyseText): log.info('Problem') return False if self.parametres['type_graph'] == 1: + if self.parametres['svg'] : + filename, ext = os.path.splitext(script.filename) + fileout = filename + '.svg' + else : + fileout = script.filename if os.path.exists(self.pathout['liste_graph']): graph_simi = read_list_file(self.pathout['liste_graph']) - graph_simi.append([os.path.basename(script.filename), script.txtgraph]) + graph_simi.append([os.path.basename(fileout), script.txtgraph]) else : - graph_simi = [[os.path.basename(script.filename), script.txtgraph]] + graph_simi = [[os.path.basename(fileout), script.txtgraph]] print_liste(self.pathout['liste_graph'], graph_simi) else : return False -# def preferences(self) : -# dial = StatDialog(self, self.parent) -# dial.CenterOnParent() -# val = dial.ShowModal() -# if val == 5100 : -# if dial.radio_lem.GetSelection() == 0 : -# lem = 1 -# else : -# lem = 0 -# self.parametres['lem'] = lem -# dial.Destroy() -# return self.parametres -# else : -# dial.Destroy() -# return None - def makesimiparam(self) : self.paramsimi = {'coeff' : 0, 'layout' : 2, @@ -105,9 +99,13 @@ class SimiTxt(AnalyseText): 'height' : 1000, 'bystar' : False, 'first' : True, - 'keep_coord' : True, + 'keep_coord' : False, 'alpha' : 20, 'film': False, + 'svg' : 0, + 'com' : 0, + 'communities' : 0, + 'halo' : 0, #'ira' : self.pathout['Analyse.ira'] } self.parametres.update(self.paramsimi) @@ -122,11 +120,14 @@ class SimiTxt(AnalyseText): f.write('\n'.join(self.actives).encode(self.ira.syscoding)) class SimiFromCluster(SimiTxt) : - def __init__(self, ira, corpus, actives, numcluster, parametres = None, dlg = False) : + def __init__(self, ira, corpus, actives, lfreq, lchi, numcluster, parametres = None, dlg = False) : self.actives = actives self.numcluster = numcluster + self.lfreq = lfreq + self.lchi = lchi parametres['name'] = 'simi_classe_%i' % (numcluster + 1) - SimiTxt.__init__(self, ira, corpus, parametres, dlg, lemdial = False) + dlg.Destroy() + SimiTxt.__init__(self, ira, corpus, parametres, dlg=True, lemdial = False) def preferences(self) : return self.parametres @@ -135,17 +136,18 @@ class SimiFromCluster(SimiTxt) : self.parametres['type'] = 'clustersimitxt' self.pathout.basefiles(simipath) self.indices = indices_simi - self.makesimiparam() + if self.dlg : + self.makesimiparam() if 'bystar' in self.parametres : del self.parametres['bystar'] dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)]) continu = True + #print self.dlg if self.dlg : - #self.listet = self.corpus.make_etoiles() - #self.listet.sort() + self.dlg.Destroy() self.stars = []#copy(self.listet) - self.parametres['stars'] = False#copy(self.listet) - self.parametres['sfromchi'] = True + self.parametres['stars'] = 0#copy(self.listet) + self.parametres['sfromchi'] = 1 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol) if prep.val == wx.ID_OK : continu = True @@ -153,17 +155,24 @@ class SimiFromCluster(SimiTxt) : else : continu = False if continu : + self.dlg = progressbar(self.parent, 3) self.makefiles() + self.parametres['type'] = 'clustersimitxt' script = PrintSimiScript(self) script.make_script() if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') : return False if self.parametres['type_graph'] == 1: + if self.parametres['svg'] : + filename, ext = os.path.splitext(script.filename) + fileout = filename + '.svg' + else : + fileout = script.filename if os.path.exists(self.pathout['liste_graph']): graph_simi = read_list_file(self.pathout['liste_graph']) - graph_simi.append([os.path.basename(script.filename), script.txtgraph]) + graph_simi.append([os.path.basename(fileout), script.txtgraph]) else : - graph_simi = [[os.path.basename(script.filename), script.txtgraph]] + graph_simi = [[os.path.basename(fileout), script.txtgraph]] print_liste(self.pathout['liste_graph'], graph_simi) else : return False @@ -175,169 +184,11 @@ class SimiFromCluster(SimiTxt) : self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv']) with open(self.pathout['actives.csv'], 'w') as f : f.write('\n'.join(self.actives).encode(self.ira.syscoding)) + with open(self.pathout['actives_nb.csv'], 'w') as f : + f.write('\n'.join([`val` for val in self.lfreq])) + with open(self.pathout['actives_chi.csv'], 'w') as f : + f.write('\n'.join([`val` for val in self.lchi])) + -# self.tableau = Tableau(self.parent, '') -# self.tableau.listactives = self.actives -# self.tableau.parametre['fromtxt'] = True -# self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives]) -# #print('ATTENTION ETOILES') -# #self.paramsimi['bystar'] = True -# self.tableau.listet = copy(self.listet) -# #self.paramsimi['cexfromchi'] = True -# #self.paramsimi['vlabcolor'] = True -# self.tableau.actives = copy(self.corpus.lems_eff) -# DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout) -#class SimiTxt : -# def __init__(self, parent, cmd = False, param = None): -# self.parent = parent -# self.cmd = cmd -# self.ConfigPath = parent.ConfigPath -# self.DictPath = parent.DictPath -# self.KeyConf = RawConfigParser() -# self.KeyConf.read(self.ConfigPath['key']) -# self.indices = indices_simi -# self.paramsimi = {'coeff' : 0, -# 'layout' : 2, -# 'type' : 1, -# 'arbremax' : 1, -# 'coeff_tv' : 1, -# 'coeff_tv_nb' : 0, -# 'tvprop' : 0, -# 'tvmin' : 5, -# 'tvmax' : 30, -# 'coeff_te' : 1, -# 'coeff_temin' : 1, -# 'coeff_temax' : 10, -# 'label_v': 1, -# 'label_e': 0, -# 'vcex' : 1, -# 'cexfromchi' : False, -# 'vcexmin' : 10, -# 'vcexmax' : 25, -# 'cex' : 10, -# 'seuil_ok' : 0, -# 'seuil' : 1, -# 'cols' : (255,0,0), -# 'cola' : (200,200,200), -# 'width' : 1000, -# 'height' : 1000, -# 'bystar' : False, -# 'first' : True, -# 'keep_coord' : True, -# 'alpha' : 20, -# 'film': False, -# } -# page = getPage(self.parent) -# if page is not None : -# self.corpus = getCorpus(page) -# if self.corpus is not None : -# self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt') -# self.dictpathout = construct_simipath(self.pathout) -# self.val = wx.ID_OK -# self.make_table() -# self.make_simi() -# else : -# self.corpus = Corpus(parent) -# self.corpus.content = self.parent.content -# self.corpus.parametre['encodage'] = parent.corpus_encodage -# self.corpus.parametre['lang'] = parent.corpus_lang -# self.corpus.parametre['filename'] = parent.filename -# self.corpus.parametre['eff_min_uce'] = None -# self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt') -# self.dictpathout = construct_simipath(self.pathout) -# dial = StatDialog(self, self.parent) -# dial.check_uce.SetValue(True) -# dial.check_uce.Enable(False) -# dial.OnCheckUce(wx.EVT_MENU) -# self.val = dial.ShowModal() -# if self.val == wx.ID_OK : -# with open(self.parent.ConfigPath['key'], 'w') as f: -# self.KeyConf.write(f) -# if dial.radio_lem.GetSelection() == 0 : lem = True -# else : lem = False -# if dial.exp.GetSelection() == 0 : exp = True -# else : exp = False -# dial.Destroy() -# self.corpus.parametre['lem'] = lem -# self.corpus.parametre['expressions'] = exp -# self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue() -# self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue() -# self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue() -# self.make_corpus() -# self.make_table() -# self.make_simi() -# else : -# dial.Destroy() -# -# def make_corpus(self) : -# print 'make corpus' -# if not self.cmd : -# dlg = progressbar(self, maxi = 6) -# self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] -# self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] -# ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd) -# del ucis_txt -# -# if not self.cmd : -# dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt)) -# self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True) -# del ucis_paras_txt -# -# if self.corpus.para_coords != [[] for val in self.corpus.para_coords] : -# self.corpus.parametre['para'] = True -# else : -# self.corpus.parametre['para'] = False -# self.corpus.make_etoiles(self.corpus.para_coords) -# print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces) -# -# if not self.cmd : -# dlg.Update(6, u'Dictionnaires') -# uces, self.orderuces = self.corpus.make_forms_and_uces() -# self.corpus.ucenb = len(uces) -# self.corpus.make_lems(self.parent.lexique) -# -# self.corpus.make_var_actives() -# self.corpus.make_var_supp() -# self.corpus.lems_eff = self.corpus.make_lem_eff() -# -# #variables = treat_var_mod(listet) -# #print(variables) -# #self.corpus.write_etoiles(self.dictpathout['etoiles']) -# if not self.cmd : -# dlg.Destroy() -# -# def make_table(self) : -# if 'orderuces' not in dir(self) : -# self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)] -# self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)]) -# self.corpus.ucenb = len(self.orderuces) -# #tabuc1 = self.corpus.make_table_with_uce(self.orderuces) -# #tabuc1.insert(0,self.corpus.actives) -# #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces) -# #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives)) -# if self.corpus.actives is None : -# self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] -# self.corpus.min_eff_formes() -# self.corpus.make_var_actives() -# self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01']) -# #self.corpus.write_tab(tabuc1,self.dictpathout['mat01']) -# -# def make_simi(self) : -# self.tableau = Tableau(self.parent, '') -# self.tableau.listactives = self.corpus.actives -# self.tableau.parametre['fromtxt'] = True -# if 'lems_eff' not in dir(self.corpus) : -# self.corpus.lems_eff = self.corpus.make_lem_eff() -# #print('ATTENTION ETOILES') -# #self.paramsimi['bystar'] = True -# self.listet = self.corpus.get_unique_etoiles() -# self.listet.sort() -# self.tableau.listet = copy(self.listet) -# self.paramsimi['stars'] = copy(self.listet) -# #self.paramsimi['cexfromchi'] = True -# self.paramsimi['sfromchi'] = False -# #self.paramsimi['vlabcolor'] = True -# self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives]) -# self.corpus.save_corpus(self.dictpathout['corpus']) -# DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout) + \ No newline at end of file