X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=textaslexico.py;h=b9268dede84ee3544ad09892599674d04f44817f;hp=82311cd72d4af17958b4258fb7c36cf94035cb8f;hb=9b78e6210e7fc88a7e77d178c4090aabb23580d9;hpb=8fa853a25a9d62b1446e1bc543e5a3a4d0e03dcf diff --git a/textaslexico.py b/textaslexico.py index 82311cd..b9268de 100644 --- a/textaslexico.py +++ b/textaslexico.py @@ -1,22 +1,26 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2011 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL -from chemins import ConstructPathOut, StatTxtPathOut +from chemins import ConstructPathOut, StatTxtPathOut, PathOut, ffr #from corpus import Corpus from analysetxt import AnalyseText import wx import os -import sys -from listlex import * -from functions import exec_rcode, progressbar, check_Rresult, CreateIraFile, print_liste, treat_var_mod, write_tab, DoConf -from dialog import OptLexi, StatDialog #LexDialog -from openanalyse import OpenAnalyse +#import sys +#from listlex import * +from functions import exec_rcode, progressbar, check_Rresult, CreateIraFile, print_liste, treat_var_mod, write_tab, DoConf, TGen +from dialog import OptLexi#, StatDialog +#from openanalyse import OpenAnalyse import tempfile -from ConfigParser import RawConfigParser -from guifunct import getPage, getCorpus +#from ConfigParser import RawConfigParser +#from guifunct import getPage, getCorpus +from PrintRScript import TgenSpecScript from time import sleep +import logging + +log = logging.getLogger('iramuteq.spec') class Lexico(AnalyseText) : def doanalyse(self) : @@ -29,88 +33,6 @@ class Lexico(AnalyseText) : self.dlg.Destroy() except : pass -# def __init__(self, parent, cmd = False): -# self.parent = parent -# self.cmd = False -# self.ConfigPath = parent.ConfigPath -# self.DictPath = parent.DictPath -# self.KeyConf = RawConfigParser() -# self.KeyConf.read(self.ConfigPath['key']) -# -# page = getPage(self.parent) -# if page is not None : -# self.corpus = getCorpus(page) -# if self.corpus is not None : -# pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'lexico') -# self.dictpathout = StatTxtPathOut(pathout) -# self.val = wx.ID_OK -# #print self.corpus.lems -# self.make_lexico() -# -# else : -# self.corpus = Corpus(parent) -# self.corpus.content = self.parent.content -# self.corpus.parametre['encodage'] = parent.corpus_encodage -# self.corpus.parametre['lang'] = parent.corpus_lang -# self.corpus.parametre['filename'] = parent.filename -# dial = StatDialog(self, self.parent) -# dial.check_uce.SetValue(True) -# dial.check_uce.Enable(False) -# dial.OnCheckUce(wx.EVT_MENU) -# self.val = dial.ShowModal() -## dial = LexDialog(self.parent) -## dial.CenterOnParent() -## res = dial.ShowModal() -# if self.val == wx.ID_OK : -# #if dial.m_radioBox2.GetSelection() == 0 : self.corpus.parametre['lem'] = True -# if dial.radio_lem.GetSelection() == 0 : self.corpus.parametre['lem'] = True -# else : self.corpus.parametre['lem'] = False -# #if dial.m_radioBox21.GetSelection() == 0 : self.corpus.parametre['expressions'] = True -# if dial.exp.GetSelection() == 0 : self.corpus.parametre['expressions'] = True -# else : self.corpus.parametre['expressions'] = False -# self.make_uce = dial.check_uce.GetValue() -# self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue() -# self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue() -# self.corpus.parametre['eff_min_uce'] = self.corpus.parametre['nbforme_uce'] -# dial.Destroy() -# pathout = ConstructPathOut(self.corpus.parametre['filename'], 'lexico') -# self.dictpathout = StatTxtPathOut(pathout) -# self.make_corpus() -# #print self.corpus.ucis -# self.make_lexico() -# -# def make_corpus(self) : -# print 'make corpus' -# if not self.cmd : -# dlg = progressbar(self, maxi = 6) -# self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] -# self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] -# ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd) -# del ucis_txt -# -# if not self.cmd : -# dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt)) -# self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = self.make_uce) -# del ucis_paras_txt -# -# if self.corpus.para_coords != [[] for val in self.corpus.para_coords] : -# self.corpus.parametre['para'] = True -# else : -# self.corpus.parametre['para'] = False -# self.corpus.make_etoiles(self.corpus.para_coords) -# -# print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces) -# -# if not self.cmd : -# dlg.Update(6, u'Dictionnaires') -# uces, orderuces = self.corpus.make_forms_and_uces() -# self.corpus.make_lems(self.parent.lexique) -## if not self.corpus.parametre['lem'] : -## formes = self.corpus.formes -## else : -## formes = self.corpus.make_lem_eff() -# if not self.cmd : -# dlg.Destroy() def DoR(self): nbligne = 5 @@ -118,13 +40,16 @@ class Lexico(AnalyseText) : txt = """ source("%s") source("%s") - """ % (self.parent.RscriptsPath['chdfunct'], self.parent.RscriptsPath['Rgraph']) + """ % (ffr(self.parent.RscriptsPath['chdfunct']), ffr(self.parent.RscriptsPath['Rgraph'])) txt += """ dmf<-read.csv2("%s",row.names=1) - """ % self.dictpathout['tableafcm'] + """ % ffr(self.dictpathout['tableafcm']) txt += """ dmt<-read.csv2("%s",row.names=1) - """ % self.dictpathout['tabletypem'] + """ % ffr(self.dictpathout['tabletypem']) + txt += """ + indice <- "%s" + """ % self.parametres['indice'] if self.parametres['indice'] == 'hypergeo' : txt += """ outf <- make.spec.hypergeo(dmf) @@ -135,21 +60,33 @@ class Lexico(AnalyseText) : outf<-AsLexico2(dmf) outt<-AsLexico2(dmt) """ - + txt += """ + if (indice == 'hypergeo') { + banseuil <- 2 + } else if (indice == 'chi2') { + banseuil <- 3 + } + banal <- apply(abs(outf[[1]]), 1, max) + banal <- which(banal < banseuil) + banalfreq <- rowSums(dmf[banal,]) + banalspec <- specf<-outf[[1]][banal,] + banal <- cbind(banalfreq, banalspec) + write.csv2(banal,file="%s") + """ % ffr(self.pathout['banalites.csv']) txt += """ specf<-outf[[1]] spect<-outt[[1]] write.csv2(specf,file="%s") - """ % self.dictpathout['tablespecf'] + """ % ffr(self.dictpathout['tablespecf']) txt += """ write.csv2(spect,file="%s") - """ % self.dictpathout['tablespect'] + """ % ffr(self.dictpathout['tablespect']) txt += """ write.csv2(outf[[3]],file="%s") - """ % self.dictpathout['eff_relatif_forme'] + """ % ffr(self.dictpathout['eff_relatif_forme']) txt += """ write.csv2(outt[[3]],file="%s") - """ % self.dictpathout['eff_relatif_type'] + """ % ffr(self.dictpathout['eff_relatif_type']) if self.parametres['clnb'] > 2 : txt += """ library(ca) @@ -196,19 +133,16 @@ class Lexico(AnalyseText) : debsup <- NULL debet <- NULL clnb <- ncol(specf) - """ % (self.dictpathout['afcf_row'], self.dictpathout['afcf_col'], self.dictpathout['afct_row'], self.dictpathout['afct_col'], self.dictpathout['afcf_facteur_csv'], self.dictpathout['afcf_col_csv'], self.dictpathout['afcf_row_csv'], self.dictpathout['afct_facteur_csv'], self.dictpathout['afct_col_csv'], self.dictpathout['afct_row_csv']) + """ % (ffr(self.dictpathout['afcf_row']), ffr(self.dictpathout['afcf_col']), ffr(self.dictpathout['afct_row']), ffr(self.dictpathout['afct_col']), ffr(self.dictpathout['afcf_facteur_csv']), ffr(self.dictpathout['afcf_col_csv']), ffr(self.dictpathout['afcf_row_csv']), ffr(self.dictpathout['afct_facteur_csv']), ffr(self.dictpathout['afct_col_csv']), ffr(self.dictpathout['afct_row_csv'])) txt += """ save.image("%s") - """ % self.dictpathout['RData'] + """ % ffr(self.dictpathout['RData']) tmpfile = tempfile.mktemp(dir=self.parent.TEMPDIR) tmpscript = open(tmpfile, 'w') tmpscript.write(txt) tmpscript.close() - pid = exec_rcode(self.parent.RPath, tmpfile, wait = False) - while pid.poll() == None : - sleep(0.2) - check_Rresult(self.parent, pid) + self.doR(tmpfile, dlg = self.dlg, message = 'R...') def preferences(self) : listet = self.corpus.make_etoiles() @@ -228,16 +162,14 @@ class Lexico(AnalyseText) : else : ListEt = variables[var[dial.list_box_1.GetSelections()[0]]] self.listet = ListEt + self.listet.sort() self.parametres['mineff'] = dial.spin.GetValue() if dial.choice_indice.GetSelection() == 0 : self.parametres['indice'] = 'hypergeo' else : self.parametres['indice'] = 'chi2' + self.parametres['typeformes'] = dial.typeformes.GetSelection() self.parametres['clnb'] = len(ListEt) - if dial.checklem.GetValue() : - self.parametres['lem'] = 1 - else : - self.parametres['lem'] = 0 dial.Destroy() return self.parametres else : @@ -245,50 +177,57 @@ class Lexico(AnalyseText) : return None def make_lexico(self) : -# listet = self.corpus.make_etoiles() -# listet.sort() -# variables = treat_var_mod(listet) -# var = [v for v in variables] -# if self.dlg : -# dial = OptLexi(self.parent) -# dial.listet = listet -# dial.variables = var -# for et in var : -# dial.list_box_1.Append(et) -# dial.CenterOnParent() -# val = dial.ShowModal() -# if val == wx.ID_OK : -# if dial.choice.GetSelection() == 1 : -# ListEt = [listet[i] for i in dial.list_box_1.GetSelections()] -# else : -# ListEt = variables[var[dial.list_box_1.GetSelections()[0]]] -# mineff = dial.spin.GetValue() -# if dial.choice_indice.GetSelection() == 0 : -# indice = 'hypergeo' -# else : -# indice = 'chi2' -# self.parametres = {'indice' : indice} -# dial.Destroy() -# else : -# dial.Destroy() -# else : mineff = self.parametres['mineff'] #dlg = progressbar(self, maxi = 3) - tabout = self.corpus.make_lexitable(mineff, self.listet) + tabout = self.corpus.make_lexitable(mineff, self.listet, gram = self.parametres['typeformes']) + #log.warning('Fmax a 200') + #Fmax = [line for line in tabout[1:] if sum(line[1:]) > 199] + #formesmax = [line[0] for line in Fmax + #Fmax = [line[1:] for line in Fmax] + #summax = [sum(col) for col in zip(*Fmax)] + #tabout.append(['Fmax'] + summax) + #tabout = [line for line in tabout if line[0] not in formesmax] + #log.warning('ATTENTION : hapax par etoile') + #tabout.append(['hapax'] + self.corpus.gethapaxbyet(self.listet)) write_tab(tabout, self.dictpathout['tableafcm']) + + #log.warning('ATTENTION : gethapaxuces') + #self.corpus.gethapaxuces() + tabout = self.corpus.make_efftype_from_etoiles(self.listet) write_tab(tabout, self.dictpathout['tabletypem']) - #dlg.Update(2, u'R...') + if self.dlg : + self.dlg.Update(2, u'R...') self.DoR() - #dlg.Update(3, u'Chargement...') + if self.dlg : + self.dlg.Update(3, u'Chargement...') afcf_graph_list = [[os.path.basename(self.dictpathout['afcf_row']), u'lignes'],\ [os.path.basename(self.dictpathout['afcf_col']), u'colonnes']] afct_graph_list = [[os.path.basename(self.dictpathout['afct_row']), u'lignes'],\ [os.path.basename(self.dictpathout['afct_col']), u'colonnes']] print_liste(self.dictpathout['liste_graph_afcf'],afcf_graph_list) print_liste(self.dictpathout['liste_graph_afct'],afct_graph_list) - #CreateIraFile(self.dictpathout, 0, corpname = os.path.basename(self.corpus.parametre['filename']), section = 'lexico') - DoConf().makeoptions(['spec'],[self.parametres], self.dictpathout['ira']) - #OpenAnalyse(self.parent, self.dictpathout['ira']) - #dolayout(self) - #dlg.Destroy() + #DoConf().makeoptions(['spec'],[self.parametres], self.dictpathout['ira']) + +class TgenSpec(AnalyseText): + def __init__(self, ira, corpus, parametres): + self.ira = ira + self.corpus = corpus + self.parametres = parametres + self.pathout = PathOut(dirout = self.parametres['pathout']) + self.doanalyse() + + def doanalyse(self): + self.tgen = TGen(path = self.parametres['tgenpath'], encoding = self.ira.syscoding) + self.tgen.read(self.tgen.path) + self.parametres['etoiles'].sort() + tgenocc, totocc = self.corpus.make_tgen_table(self.tgen, self.parametres['etoiles']) + self.parametres['tgeneff'] = os.path.join(self.parametres['pathout'], 'tgeneff.csv') + self.tgen.writetable(self.parametres['tgeneff'], tgenocc, totocc) + self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenspec.csv') + self.Rscript = TgenSpecScript(self) + self.Rscript.make_script() + self.Rscript.write() + self.doR(self.Rscript.scriptout, dlg = False, message = 'R...') + + \ No newline at end of file