X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=textaslexico.py;h=35007b2f50b2be433b5320ce45195ef99af13a77;hp=de0f1819eb473de99bc0ec82ef0949341a1d2ac0;hb=f1c8bb143f403d37ccad639907bb519fbdd43451;hpb=12b4b71c8c8feb6154abc04dfa9bb93a521ef789 diff --git a/textaslexico.py b/textaslexico.py index de0f181..35007b2 100644 --- a/textaslexico.py +++ b/textaslexico.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2011 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL -from chemins import ConstructPathOut, StatTxtPathOut +from chemins import ConstructPathOut, StatTxtPathOut, PathOut #from corpus import Corpus from analysetxt import AnalyseText import wx import os -import sys -from listlex import * -from functions import exec_rcode, progressbar, check_Rresult, CreateIraFile, print_liste, treat_var_mod, write_tab, DoConf -from dialog import OptLexi, StatDialog #LexDialog -from openanalyse import OpenAnalyse +#import sys +#from listlex import * +from functions import exec_rcode, progressbar, check_Rresult, CreateIraFile, print_liste, treat_var_mod, write_tab, DoConf, TGen +from dialog import OptLexi#, StatDialog +#from openanalyse import OpenAnalyse import tempfile -from ConfigParser import RawConfigParser -from guifunct import getPage, getCorpus +#from ConfigParser import RawConfigParser +#from guifunct import getPage, getCorpus +from PrintRScript import TgenSpecScript from time import sleep import logging @@ -46,6 +47,9 @@ class Lexico(AnalyseText) : txt += """ dmt<-read.csv2("%s",row.names=1) """ % self.dictpathout['tabletypem'] + txt += """ + indice <- "%s" + """ % self.parametres['indice'] if self.parametres['indice'] == 'hypergeo' : txt += """ outf <- make.spec.hypergeo(dmf) @@ -56,7 +60,19 @@ class Lexico(AnalyseText) : outf<-AsLexico2(dmf) outt<-AsLexico2(dmt) """ - + txt += """ + if (indice == 'hypergeo') { + banseuil <- 2 + } else if (indice == 'chi2') { + banseuil <- 3 + } + banal <- apply(abs(outf[[1]]), 1, max) + banal <- which(banal < banseuil) + banalfreq <- rowSums(dmf[banal,]) + banalspec <- specf<-outf[[1]][banal,] + banal <- cbind(banalfreq, banalspec) + write.csv2(banal,file="%s") + """ % self.pathout['banalites.csv'] txt += """ specf<-outf[[1]] spect<-outt[[1]] @@ -151,6 +167,7 @@ class Lexico(AnalyseText) : self.parametres['indice'] = 'hypergeo' else : self.parametres['indice'] = 'chi2' + self.parametres['typeformes'] = dial.typeformes.GetSelection() self.parametres['clnb'] = len(ListEt) dial.Destroy() return self.parametres @@ -161,10 +178,10 @@ class Lexico(AnalyseText) : def make_lexico(self) : mineff = self.parametres['mineff'] #dlg = progressbar(self, maxi = 3) - tabout = self.corpus.make_lexitable(mineff, self.listet) + tabout = self.corpus.make_lexitable(mineff, self.listet, gram = self.parametres['typeformes']) #log.warning('Fmax a 200') #Fmax = [line for line in tabout[1:] if sum(line[1:]) > 199] - #formesmax = [line[0] for line in Fmax] + #formesmax = [line[0] for line in Fmax #Fmax = [line[1:] for line in Fmax] #summax = [sum(col) for col in zip(*Fmax)] #tabout.append(['Fmax'] + summax) @@ -189,4 +206,26 @@ class Lexico(AnalyseText) : [os.path.basename(self.dictpathout['afct_col']), u'colonnes']] print_liste(self.dictpathout['liste_graph_afcf'],afcf_graph_list) print_liste(self.dictpathout['liste_graph_afct'],afct_graph_list) - DoConf().makeoptions(['spec'],[self.parametres], self.dictpathout['ira']) + #DoConf().makeoptions(['spec'],[self.parametres], self.dictpathout['ira']) + +class TgenSpec(AnalyseText): + def __init__(self, ira, corpus, parametres): + self.ira = ira + self.corpus = corpus + self.parametres = parametres + self.pathout = PathOut(dirout = self.parametres['pathout']) + self.doanalyse() + + def doanalyse(self): + self.tgen = TGen(path = self.parametres['tgenpath'], encoding = self.ira.syscoding) + self.tgen.read(self.tgen.path) + tgenocc, totocc = self.corpus.make_tgen_table(self.tgen, self.parametres['etoiles']) + self.parametres['tgeneff'] = os.path.join(self.parametres['pathout'], 'tgeneff.csv') + self.tgen.writetable(self.parametres['tgeneff'], tgenocc, totocc) + self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenspec.csv') + self.Rscript = TgenSpecScript(self) + self.Rscript.make_script() + self.Rscript.write() + self.doR(self.Rscript.scriptout, dlg = False, message = 'R...') + + \ No newline at end of file