From b0333175cc68917ceb33589b0b354bf931fec245 Mon Sep 17 00:00:00 2001 From: Pierre Date: Thu, 30 Aug 2012 17:26:35 +0200 Subject: [PATCH] ... --- PrintRScript.py | 1 + analysetxt.py | 4 +- corpusNG.py | 65 +++++- dictionnaires/expression_fr.txt | 1 - functions.py | 15 ++ iracmd.py | 3 + iramuteq.py | 13 +- openanalyse.py | 13 +- tabsimi.py | 5 + textaslexico.py | 16 ++ textsimi.py | 446 ++++++++++++++++++++++++++++++---------- textstat.py | 4 +- tree.py | 37 ++-- 13 files changed, 484 insertions(+), 139 deletions(-) diff --git a/PrintRScript.py b/PrintRScript.py index 4d0fb40..6089847 100644 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -17,6 +17,7 @@ class PrintRScript : log.info('Rscript') self.pathout = analyse.pathout self.analyse = analyse + self.parametres = analyse.parametres self.scriptout = self.pathout['temp'] self.script = u"#Script genere par IRaMuTeQ - %s" % datetime.now().ctime() diff --git a/analysetxt.py b/analysetxt.py index f5d9273..666a062 100644 --- a/analysetxt.py +++ b/analysetxt.py @@ -78,8 +78,8 @@ class AnalyseText : minutes, seconds = divmod(self.time, 60) hours, minutes = divmod(minutes, 60) self.parametres['time'] = '%.0fh %.0fm %.0fs' % (hours, minutes, seconds) - self.parametres['ira'] = self.pathout['ira'] - DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['ira']) + self.parametres['ira'] = self.pathout['Analyse.ira'] + DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['Analyse.ira']) self.ira.history.add(self.parametres) if dlg : dlg.Destroy() diff --git a/corpusNG.py b/corpusNG.py index f1c8451..eb55b08 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -54,6 +54,7 @@ class Corpus : self.idformesuces = {} self.iduces = None self.idformes = None + self.uceuci = None if read : self.pathout = PathOut(dirout = parametres['pathout']) self.read_corpus() @@ -221,6 +222,10 @@ class Corpus : # else : # self.lems[self.formes[forme].lem] = {self.formes[forme].ident : 0} + def getetbyuceid(self, uceid) : + if self.uceuci is None : self.uceuci = dict([[uce.ident,uci.ident] for uci in self.ucis for uce in uci.uces]) + return self.ucis[self.uceuci[uceid]].etoiles + def make_lems(self, lem = True) : log.info('make lems') self.lems = {} @@ -527,6 +532,64 @@ class Corpus : self.lc0 = self.lc.pop(0) #return ucecl + def gethapaxbyet(self, etoiles) : + hapaxuces = [self.getlemuces(forme)[0] for forme in self.lems if self.lems[forme].freq == 1] + hucesdict = {} + for uce in hapaxuces : + if uce in hucesdict : + hucesdict[uce] += 1 + else : + hucesdict[uce] = 1 + etuces = [[] for et in etoiles] + for uci in self.ucis : + get = list(set(uci.etoiles).intersection(etoiles)) + if len(get) > 1 : + return '2 variables sur la meme ligne' + elif get != [] : + etuces[etoiles.index(get[0])] += [uce.ident for uce in uci.uces] + etuces = [set(val) for val in etuces] + return [sum([hucesdict[uce] for uce in list(etuce.intersection(hapaxuces))]) for etuce in etuces] + + def gethapaxuces(self) : + hapaxuces = [self.getlemuces(forme)[0] for forme in self.lems if self.lems[forme].freq == 1] + hapax = [forme for forme in self.lems if self.lems[forme].freq == 1] + hucesdict = {} + for i,uce in enumerate(hapaxuces) : + if uce in hucesdict : + hucesdict[uce][0] += 1 + hucesdict[uce][1].append(hapax[i]) + else : + hucesdict[uce] = [1,[hapax[i]]] + huces = {} + for uce in hucesdict : + if hucesdict[uce][0] in huces : + huces[hucesdict[uce][0]].append(uce) + else : + huces[hucesdict[uce][0]] = [uce] + huces = zip(huces, huces.values()) + huces.sort(reverse=True) + txt = """ + + """ + for nb in huces[0:4] : + txt += "

%i hapax par uce

\n" % nb[0] + for uce in nb[1] : + res = self.getconcorde([uce]) + for row in res : + ucetxt = ' ' + row[1] + ' ' + uceid = row[0] + for hap in hucesdict[uce][1] : + laforme = self.getforme([forme for forme in self.lems[hap].formes][0]).forme + ucetxt = ucetxt.replace(' '+laforme+' ', ' '+laforme+' ') + txt += '

' + ' '.join(self.getetbyuceid(uceid)) + '

' + txt += '

'+ucetxt+'

\n' + txt += """ + + """ + with open('/tmp/testhapxuce.html','w') as f : + f.write(txt) + + class Uci : def __init__(self, iduci, line, paraset = None) : self.ident = iduci @@ -630,7 +693,7 @@ def prep_txtcharact(txt) : class BuildCorpus : """ - Class for building a corpora + Class for building a corpus """ def __init__(self, infile, parametres_corpus, lexique = None, expressions = None, dlg = None) : log.info('begin building corpus...') diff --git a/dictionnaires/expression_fr.txt b/dictionnaires/expression_fr.txt index ca3ce2d..ecbb0b7 100644 --- a/dictionnaires/expression_fr.txt +++ b/dictionnaires/expression_fr.txt @@ -3,7 +3,6 @@ a cappella a_cappella ADV 0.04 0.07 0.04 0.07 a contrario a_contrario ADV 0 0.27 0 0.27 a fortiori a_fortiori ADV 0.04 0.88 0.04 0.88 a giorno a_giorno ADV 0 0.27 0 0.27 -a jeun à_jeun ADV 1.45 3.85 0.18 0 a l'instar a_l_instar PRE 0.26 0 0.26 0 a posteriori a_posteriori ADV 0.05 0.2 0.01 0.14 ab absurdo ab_absurdo ADV 0 0.07 0 0.07 diff --git a/functions.py b/functions.py index 3922fd5..2165f33 100644 --- a/functions.py +++ b/functions.py @@ -71,6 +71,7 @@ class History : sections = self.corpora.keys() + self.analyses.keys() parametres = [self.corpora[key] for key in self.corpora.keys()] + [self.analyses[key] for key in self.analyses.keys()] self.conf.makeoptions(sections, parametres) + log.info('write history') def add(self, analyse) : if 'corpus' in analyse : @@ -87,6 +88,19 @@ class History : self.corpora[analyse['uuid']] = analyse self.write() + def delete(self, uuid, corpus = False) : + if corpus : + del self.corpora[uuid] + self.conf.conf.remove_section(uuid) + for analyse in self.history[uuid].get('analyses', [False]) : + if analyse : + del self.analyses[analyse['uuid']] + self.conf.conf.remove_section(analyse['uuid']) + else : + del self.analyses[uuid] + self.conf.conf.remove_section(uuid) + self.write() + def addtab(self, analyse) : self.opened[analyse['uuid']] = analyse @@ -133,6 +147,7 @@ class DoConf : self.conf.set(section, option, parametres[i][option].encode('utf8')) if outfile is None : outfile = self.configfile + print outfile with codecs.open(outfile, 'w', 'utf8') as f : self.conf.write(f) diff --git a/iracmd.py b/iracmd.py index deba7cf..19b0880 100644 --- a/iracmd.py +++ b/iracmd.py @@ -100,7 +100,10 @@ class CmdLine : corpus.conn_all() + corpus.make_lems() corpus.parse_active(gramact, gramsup) + log.warning('ATTENTION gethapaxuces') + corpus.gethapaxuces() # self.content = f.read() #self.content = self.content.replace('\r','') if options.type_analyse == 'alceste' : diff --git a/iramuteq.py b/iramuteq.py index 0abc480..db25305 100644 --- a/iramuteq.py +++ b/iramuteq.py @@ -844,14 +844,15 @@ Voulez-vous fermer quand même ?""" except: BugReport(self) - def OnSimiTxt(self, evt) : - # print 'PLUS DE BUG SUR SIMITXT' - try : - self.Text = SimiTxt(self) + def OnSimiTxt(self, evt, corpus = None) : + print 'PLUS DE BUG SUR SIMITXT' + #try : + #self.Text = SimiTxt(self) + self.Text = SimiTxt(self, corpus, parametres = {'type': 'simitxt'}, dlg = progressbar(self, 3)) if self.Text.val == wx.ID_OK : PlaySound(self) - except : - BugReport(self) + #except : + # BugReport(self) def OnWordCloud(self, evt) : # print 'PLUS DE BUG SUR WORDCLOUD' diff --git a/openanalyse.py b/openanalyse.py index da4a75d..1cf1a03 100644 --- a/openanalyse.py +++ b/openanalyse.py @@ -60,7 +60,18 @@ class OpenAnalyse(): elif self.conf['type'] == 'alceste' : self.parent.ShowMenu(_("Text analysis")) OpenCHDS(self.parent, corpus, self.conf, Alceste = True) - elif self.conf['type'] == 'simi' : + elif self.conf['type'] == 'simitxt' : + self.tableau = Tableau(self.parent, self.conf['ira']) + self.DictPathOut=construct_simipath(self.conf['pathout']) + self.tableau.dictpathout = self.DictPathOut + self.tableau.read_tableau(self.tableau.dictpathout['db']) + if self.tableau.parametre.get('corpus', False) : + self.corpus=corpus + #self.corpus.read_corpus_from_shelves(self.DictPathOut['corpus']) + self.corpus.parametres['openpath'] = self.conf['pathout'] + self.parent.ShowMenu(_("Text analysis")) + DoSimi(self.parent, self.conf, isopen = True, filename = self.conf['ira'], gparent = self, openfromprof=False) + print 'simi' # try : # #if self.conf['type'] in ['analyse','lexico','stat','wordcloud'] : diff --git a/tabsimi.py b/tabsimi.py index faf8631..53974aa 100644 --- a/tabsimi.py +++ b/tabsimi.py @@ -16,6 +16,8 @@ import tempfile import datetime from ConfigParser import RawConfigParser from time import sleep +from uuid import uuid4 + class DoSimi(): def __init__(self, parent, param = None, isopen = False, fromprof = False, pathout = False, filename ='', gparent = False, wordgraph = False, listactives = False, actives = False, cmd = False, openfromprof=False): @@ -226,6 +228,9 @@ class DoSimi(): if not 'simi' in conf.sections() : conf.add_section('simi') date = datetime.datetime.now().ctime() + if self.fromprof : + conf.set('simi', 'corpus', self.Source.corpus.parametres['uuid']) + conf.set('simi', 'uuid', str(uuid4())) conf.set('simi', 'date', str(date)) conf.set('simi', 'indice', self.paramsimi['coeff']) conf.set('simi','layout', self.paramsimi['layout']) diff --git a/textaslexico.py b/textaslexico.py index 82311cd..d78c8fd 100644 --- a/textaslexico.py +++ b/textaslexico.py @@ -17,6 +17,9 @@ import tempfile from ConfigParser import RawConfigParser from guifunct import getPage, getCorpus from time import sleep +import logging + +log = logging.getLogger('iramuteq.spec') class Lexico(AnalyseText) : def doanalyse(self) : @@ -275,7 +278,20 @@ class Lexico(AnalyseText) : mineff = self.parametres['mineff'] #dlg = progressbar(self, maxi = 3) tabout = self.corpus.make_lexitable(mineff, self.listet) + log.warning('Fmax a 200') + Fmax = [line for line in tabout[1:] if sum(line[1:]) > 199] + formesmax = [line[0] for line in Fmax] + Fmax = [line[1:] for line in Fmax] + summax = [sum(col) for col in zip(*Fmax)] + tabout.append(['Fmax'] + summax) + tabout = [line for line in tabout if line[0] not in formesmax] + log.warning('ATTENTION : hapax par etoile') + tabout.append(['hapax'] + self.corpus.gethapaxbyet(self.listet)) write_tab(tabout, self.dictpathout['tableafcm']) + + #log.warning('ATTENTION : gethapaxuces') + #self.corpus.gethapaxuces() + tabout = self.corpus.make_efftype_from_etoiles(self.listet) write_tab(tabout, self.dictpathout['tabletypem']) #dlg.Update(2, u'R...') diff --git a/textsimi.py b/textsimi.py index 1340276..efd3a67 100644 --- a/textsimi.py +++ b/textsimi.py @@ -6,24 +6,50 @@ from chemins import ConstructPathOut, construct_simipath from corpus import Corpus import os +from analysetxt import AnalyseText from ConfigParser import RawConfigParser from guifunct import getPage, getCorpus from dialog import StatDialog from functions import indices_simi, progressbar, treat_var_mod from tableau import Tableau from tabsimi import DoSimi +from PrintRScript import PrintRScript import wx from copy import copy -class SimiTxt : - def __init__(self, parent, cmd = False, param = None): - self.parent = parent - self.cmd = cmd - self.ConfigPath = parent.ConfigPath - self.DictPath = parent.DictPath - self.KeyConf = RawConfigParser() - self.KeyConf.read(self.ConfigPath['key']) +import logging + +logger = logging.getLogger('iramuteq.textsimi') + + + +class SimiTxt(AnalyseText): + def doanalyse(self) : self.indices = indices_simi + self.makesimiparam() + self.makefiles() + prep = PrepSimi(self.ira, self.parametres, indices_simi) + self.parametres = prep.parametres + script = PrintSimScript(self) + + + def preferences(self) : + dial = StatDialog(self, self.parent) + dial.CenterOnParent() + val = dial.ShowModal() + if val == 5100 : + if dial.radio_lem.GetSelection() == 0 : + lem = 1 + else : + lem = 0 + self.parametres['lem'] = lem + dial.Destroy() + return self.parametres + else : + dial.Destroy() + return None + + def makesimiparam(self) : self.paramsimi = {'coeff' : 0, 'layout' : 2, 'type' : 1, @@ -54,117 +80,313 @@ class SimiTxt : 'keep_coord' : True, 'alpha' : 20, 'film': False, + #'ira' : self.pathout['Analyse.ira'] } - page = getPage(self.parent) - if page is not None : - self.corpus = getCorpus(page) - if self.corpus is not None : - self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt') - self.dictpathout = construct_simipath(self.pathout) - self.val = wx.ID_OK - self.make_table() - self.make_simi() + self.parametres.update(self.paramsimi) + + def makefiles(self) : + self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1) + self.parametres['eff_min_forme'] = lim + self.parametres['nbactives'] = len(self.actives) + self.parametres['fromprof'] = True + self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv']) + with open(self.pathout['actives.csv'], 'w') as f : + f.write('\n'.join(self.actives).encode(self.ira.syscoding)) + + self.listet = self.corpus.make_etoiles() + self.listet.sort() + self.parametres['stars'] = copy(self.listet) + self.parametres['sfromchi'] = False + +class PrepSimi : + def _init_(self, parent, parametres, indices_simi) : + self.parametres = parametres + self.dial = PrefSimi(parent, -1, self.parametres, indices_simi) + self.dial.CenterOnParent() + self.val = self.dial.ShowModal() + if self.val == wx.ID_OK : + self.make_param() + + def make_param(self) : + self.select = self.dial.check_colch.GetValue() + param = {'coeff' : self.dial.choice1.GetSelection(), + 'layout' : self.dial.choice2.GetSelection(), + 'type' : self.dial.choice3.GetSelection(), + 'arbremax' : self.dial.check1.GetValue(), + 'coeff_tv' : self.dial.check_s_size.GetValue(), + 'coeff_tv_nb' : self.dial.spin_tv.GetValue(), + 'tvprop' : self.dial.check2.GetValue(), + 'tvmin' : self.dial.spin_tvmin.GetValue(), + 'tvmax' : self.dial.spin_tvmax.GetValue(), + 'coeff_te' : self.dial.check3.GetValue(), + 'coeff_temin' : self.dial.spin_temin.GetValue(), + 'coeff_temax' : self.dial.spin_temax.GetValue(), + 'label_e' : self.dial.check_elab.GetValue(), + 'label_v' : self.dial.check_vlab.GetValue(), + 'vcex' : self.dial.check_vcex.GetValue(), + 'vcexmin' : self.dial.spin_vcexmin.GetValue(), + 'vcexmax' : self.dial.spin_vcexmax.GetValue(), + 'cex' : self.dial.spin_cex.GetValue(), + 'seuil_ok' : self.dial.check_seuil.GetValue(), + 'seuil' : self.dial.spin_seuil.GetValue(), + 'cols' : self.dial.cols.GetColour(), + 'cola' : self.dial.cola.GetColour(), + 'width' : self.dial.spin_width.GetValue(), + 'height' : self.dial.spin_height.GetValue(), + 'first' : False, + 'keep_coord' : keep_coord, + 'alpha' : self.dial.slider_sphere.GetValue(), + 'film' : self.dial.film.GetValue() + } + if 'cexfromchi' in self.parametres : + param['cexfromchi'] = self.dial.checkit.GetValue() + if 'sfromchi' in self.parametres : + param['sfromchi'] = self.dial.checki.GetValue() + if 'vlabcolor' in self.parametres : + param['vlabcolor'] = self.parametres['vlabcolor'] + if 'check_bystar' in dir(self.dial) : + param['bystar'] = self.dial.check_bystar.GetValue() + param['stars'] = self.parametres['stars'] + self.parametres.update(param) + +class PrintSimiScript(PrintRScript) : + def make_script(self) : + self.load(['igraph', 'proxy', 'Matrix']) + self.source([self.analyse.parent.RscriptsPath['simi'], self.analyse.parent.RscriptsPath['Rgraph']]) + txt = """ + dm.path <- "%s" + cn.path <- "%s" + selected.col <- "%s" + """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv']) + + txt += """ + dm <- dm[, selected.col+1] + """ + if self.parametres['coeff'] == 0 : + method = 'cooc' + txt += """ + method <- 'cooc' + mat <- make.a(dm) + """ else : - self.corpus = Corpus(parent) - self.corpus.content = self.parent.content - self.corpus.parametre['encodage'] = parent.corpus_encodage - self.corpus.parametre['lang'] = parent.corpus_lang - self.corpus.parametre['filename'] = parent.filename - self.corpus.parametre['eff_min_uce'] = None - self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt') - self.dictpathout = construct_simipath(self.pathout) - dial = StatDialog(self, self.parent) - dial.check_uce.SetValue(True) - dial.check_uce.Enable(False) - dial.OnCheckUce(wx.EVT_MENU) - self.val = dial.ShowModal() - if self.val == wx.ID_OK : - with open(self.parent.ConfigPath['key'], 'w') as f: - self.KeyConf.write(f) - if dial.radio_lem.GetSelection() == 0 : lem = True - else : lem = False - if dial.exp.GetSelection() == 0 : exp = True - else : exp = False - dial.Destroy() - self.corpus.parametre['lem'] = lem - self.corpus.parametre['expressions'] = exp - self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue() - self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue() - self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue() - self.make_corpus() - self.make_table() - self.make_simi() - else : - dial.Destroy() - - def make_corpus(self) : - print 'make corpus' - if not self.cmd : - dlg = progressbar(self, maxi = 6) - self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] - self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] - ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd) - del ucis_txt - - if not self.cmd : - dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt)) - self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True) - del ucis_paras_txt - - if self.corpus.para_coords != [[] for val in self.corpus.para_coords] : - self.corpus.parametre['para'] = True + txt += """ + dm <- as.matrix(dm) + """ + if self.parametres['coeff'] == 1 : + method = 'prcooc' + txt += """ + method <- 'Russel' + mat <- simil(dm, method = 'Russel', diag = TRUE, upper = TRUE, by_rows = FALSE) + """ + elif self.analyses.indices[self.parametres['coeff']] == 'binomial' : + method = 'binomial' + txt += """ + method <- 'binomial' + mat <- binom.sim(dm) + """ else : - self.corpus.parametre['para'] = False - self.corpus.make_etoiles(self.corpus.para_coords) - print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces) - - if not self.cmd : - dlg.Update(6, u'Dictionnaires') - uces, self.orderuces = self.corpus.make_forms_and_uces() - self.corpus.ucenb = len(uces) - self.corpus.make_lems(self.parent.lexique) - - self.corpus.make_var_actives() - self.corpus.make_var_supp() - self.corpus.lems_eff = self.corpus.make_lem_eff() - - #variables = treat_var_mod(listet) - #print(variables) - #self.corpus.write_etoiles(self.dictpathout['etoiles']) - if not self.cmd : - dlg.Destroy() - - def make_table(self) : - if 'orderuces' not in dir(self) : - self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)] - self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)]) - self.corpus.ucenb = len(self.orderuces) - #tabuc1 = self.corpus.make_table_with_uce(self.orderuces) - #tabuc1.insert(0,self.corpus.actives) - #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces) - #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives)) - if self.corpus.actives is None : - self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] - self.corpus.min_eff_formes() - self.corpus.make_var_actives() - self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01']) - #self.corpus.write_tab(tabuc1,self.dictpathout['mat01']) - - def make_simi(self) : + method = self.types[self.paramsimi['coeff']] + txt += """ + method <-"%s" + mat <- simil(dm, method = method, diag = TRUE, upper = TRUE, by_rows = FALSE) + """ % self.analyse.indices[self.parametres['coeff']] + txt += """ + mat <- as.matrix(stats::as.dist(mat,diag=TRUE,upper=TRUE)) + mat[is.na(mat)] <- 0 + mat[is.infinite(mat)] <- 0 + """ + if self.parametres['layout'] == 0 : layout = 'random' + if self.parametres['layout'] == 1 : layout = 'circle' + if self.parametres['layout'] == 2 : layout = 'frutch' + if self.parametres['layout'] == 3 : layout = 'kawa' + if self.parametres['layout'] == 4 : layout = 'graphopt' + + txt += """ + eff <- colSums(dm) + g.ori <- graph.adjacency(mat, mode='lower', weighted = TRUE) + w.ori <- E(g.ori)$weight + if (max.tree) { + if (method == 'cooc') { + E(g.ori)$weight <- 1 / w.ori + } else { + E(g.ori)$weigth <- 1 - w.ori + } + g.max <- minimum.spanning.tree(g.ori) + if (method == 'cooc') { + E(g.max)$weight <- 1 / E(g.max)$weight + } else { + E(g.max)$weight <- 1 - E(g.max)$weight + } + g.toplot <- g.max + } else { + g.toplot <- g.ori + } + """ + + + self.tableau = Tableau(self.parent, '') - self.tableau.listactives = self.corpus.actives + self.tableau.listactives = self.actives self.tableau.parametre['fromtxt'] = True - if 'lems_eff' not in dir(self.corpus) : - self.corpus.lems_eff = self.corpus.make_lem_eff() + self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives]) #print('ATTENTION ETOILES') #self.paramsimi['bystar'] = True - self.listet = self.corpus.get_unique_etoiles() - self.listet.sort() self.tableau.listet = copy(self.listet) - self.paramsimi['stars'] = copy(self.listet) #self.paramsimi['cexfromchi'] = True - self.paramsimi['sfromchi'] = False #self.paramsimi['vlabcolor'] = True - self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives]) - self.corpus.save_corpus(self.dictpathout['corpus']) - DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout) + self.tableau.actives = copy(self.corpus.lems_eff) + DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout) + +#class SimiTxt : +# def __init__(self, parent, cmd = False, param = None): +# self.parent = parent +# self.cmd = cmd +# self.ConfigPath = parent.ConfigPath +# self.DictPath = parent.DictPath +# self.KeyConf = RawConfigParser() +# self.KeyConf.read(self.ConfigPath['key']) +# self.indices = indices_simi +# self.paramsimi = {'coeff' : 0, +# 'layout' : 2, +# 'type' : 1, +# 'arbremax' : 1, +# 'coeff_tv' : 1, +# 'coeff_tv_nb' : 0, +# 'tvprop' : 0, +# 'tvmin' : 5, +# 'tvmax' : 30, +# 'coeff_te' : 1, +# 'coeff_temin' : 1, +# 'coeff_temax' : 10, +# 'label_v': 1, +# 'label_e': 0, +# 'vcex' : 1, +# 'cexfromchi' : False, +# 'vcexmin' : 10, +# 'vcexmax' : 25, +# 'cex' : 10, +# 'seuil_ok' : 0, +# 'seuil' : 1, +# 'cols' : (255,0,0), +# 'cola' : (200,200,200), +# 'width' : 1000, +# 'height' : 1000, +# 'bystar' : False, +# 'first' : True, +# 'keep_coord' : True, +# 'alpha' : 20, +# 'film': False, +# } +# page = getPage(self.parent) +# if page is not None : +# self.corpus = getCorpus(page) +# if self.corpus is not None : +# self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt') +# self.dictpathout = construct_simipath(self.pathout) +# self.val = wx.ID_OK +# self.make_table() +# self.make_simi() +# else : +# self.corpus = Corpus(parent) +# self.corpus.content = self.parent.content +# self.corpus.parametre['encodage'] = parent.corpus_encodage +# self.corpus.parametre['lang'] = parent.corpus_lang +# self.corpus.parametre['filename'] = parent.filename +# self.corpus.parametre['eff_min_uce'] = None +# self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt') +# self.dictpathout = construct_simipath(self.pathout) +# dial = StatDialog(self, self.parent) +# dial.check_uce.SetValue(True) +# dial.check_uce.Enable(False) +# dial.OnCheckUce(wx.EVT_MENU) +# self.val = dial.ShowModal() +# if self.val == wx.ID_OK : +# with open(self.parent.ConfigPath['key'], 'w') as f: +# self.KeyConf.write(f) +# if dial.radio_lem.GetSelection() == 0 : lem = True +# else : lem = False +# if dial.exp.GetSelection() == 0 : exp = True +# else : exp = False +# dial.Destroy() +# self.corpus.parametre['lem'] = lem +# self.corpus.parametre['expressions'] = exp +# self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue() +# self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue() +# self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue() +# self.make_corpus() +# self.make_table() +# self.make_simi() +# else : +# dial.Destroy() +# +# def make_corpus(self) : +# print 'make corpus' +# if not self.cmd : +# dlg = progressbar(self, maxi = 6) +# self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] +# self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] +# ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd) +# del ucis_txt +# +# if not self.cmd : +# dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt)) +# self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True) +# del ucis_paras_txt +# +# if self.corpus.para_coords != [[] for val in self.corpus.para_coords] : +# self.corpus.parametre['para'] = True +# else : +# self.corpus.parametre['para'] = False +# self.corpus.make_etoiles(self.corpus.para_coords) +# print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces) +# +# if not self.cmd : +# dlg.Update(6, u'Dictionnaires') +# uces, self.orderuces = self.corpus.make_forms_and_uces() +# self.corpus.ucenb = len(uces) +# self.corpus.make_lems(self.parent.lexique) +# +# self.corpus.make_var_actives() +# self.corpus.make_var_supp() +# self.corpus.lems_eff = self.corpus.make_lem_eff() +# +# #variables = treat_var_mod(listet) +# #print(variables) +# #self.corpus.write_etoiles(self.dictpathout['etoiles']) +# if not self.cmd : +# dlg.Destroy() +# +# def make_table(self) : +# if 'orderuces' not in dir(self) : +# self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)] +# self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)]) +# self.corpus.ucenb = len(self.orderuces) +# #tabuc1 = self.corpus.make_table_with_uce(self.orderuces) +# #tabuc1.insert(0,self.corpus.actives) +# #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces) +# #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives)) +# if self.corpus.actives is None : +# self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] +# self.corpus.min_eff_formes() +# self.corpus.make_var_actives() +# self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01']) +# #self.corpus.write_tab(tabuc1,self.dictpathout['mat01']) +# +# def make_simi(self) : +# self.tableau = Tableau(self.parent, '') +# self.tableau.listactives = self.corpus.actives +# self.tableau.parametre['fromtxt'] = True +# if 'lems_eff' not in dir(self.corpus) : +# self.corpus.lems_eff = self.corpus.make_lem_eff() +# #print('ATTENTION ETOILES') +# #self.paramsimi['bystar'] = True +# self.listet = self.corpus.get_unique_etoiles() +# self.listet.sort() +# self.tableau.listet = copy(self.listet) +# self.paramsimi['stars'] = copy(self.listet) +# #self.paramsimi['cexfromchi'] = True +# self.paramsimi['sfromchi'] = False +# #self.paramsimi['vlabcolor'] = True +# self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives]) +# self.corpus.save_corpus(self.dictpathout['corpus']) +# DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout) diff --git a/textstat.py b/textstat.py index 4d67879..bf28af1 100644 --- a/textstat.py +++ b/textstat.py @@ -1,7 +1,7 @@ #!/bin/env python # -*- coding: utf-8 -*- #Author: Pierre Ratinaud -#Copyright (c) 2008-2009 Pierre Ratinaud +#Copyright (c) 2008-2012 Pierre Ratinaud #Lisense: GNU/GPL #from chemins import ConstructPathOut, StatTxtPathOut, ffr @@ -67,6 +67,8 @@ class Stat(AnalyseText) : supp = [[forme, formes[forme].freq, formes[forme].gram] for forme in formes if formes[forme].act == 2] supp = sortedby(supp, 2, 1) + #print self.corpus.gethapaxbyuci() + supp = [[i, val] for i, val in enumerate(supp)] #self.corpus.pathout = self.dictpathout #self.corpus.make_type_tot() diff --git a/tree.py b/tree.py index 41769f5..aac691a 100644 --- a/tree.py +++ b/tree.py @@ -50,10 +50,6 @@ class LeftTree(CT.CustomTreeCtrl): self.count = 0 self.log = log - # NOTE: For some reason tree items have to have a data object in - # order to be sorted. Since our compare just uses the labels - # we don't need any real data, so we'll just use None below for - # the item data. self.history = parent.history self.h = self.history.history self.root = self.AddRoot("Iramuteq") @@ -319,15 +315,17 @@ class LeftTree(CT.CustomTreeCtrl): alceste = classification.Append(wx.ID_ANY, u"Méthode ALCESTE") pam = classification.Append(wx.ID_ANY, u"Par matrice des distances") menu.AppendMenu(-1, u"Classification", classification) + simi = menu.Append(wx.ID_ANY, u"Analyse de similitude") menu.AppendSeparator() self.Bind(wx.EVT_MENU, self.OnAlceste, alceste) self.Bind(wx.EVT_MENU, self.OnPam, pam) self.Bind(wx.EVT_MENU, self.OnStat, stat) self.Bind(wx.EVT_MENU, self.OnSpec, spec) + self.Bind(wx.EVT_MENU, self.OnSimiTxt, simi) - item10 = menu.Append(wx.ID_ANY, "Supprimer de l'historique") + itemdelete = menu.Append(wx.ID_ANY, "Supprimer de l'historique") if item == self.GetRootItem(): - item10.Enable(False) + itemdelete.Enable(False) #item11 = menu.Append(wx.ID_ANY, "Prepend An Item") #item12 = menu.Append(wx.ID_ANY, "Append An Item") @@ -340,7 +338,7 @@ class LeftTree(CT.CustomTreeCtrl): #self.Bind(wx.EVT_MENU, self.OnDisableItem, item7) #self.Bind(wx.EVT_MENU, self.OnItemIcons, item8) self.Bind(wx.EVT_MENU, self.OnItemInfo, info) - #self.Bind(wx.EVT_MENU, self.OnItemDelete, item10) + self.Bind(wx.EVT_MENU, self.OnItemDelete, itemdelete) #self.Bind(wx.EVT_MENU, self.OnItemPrepend, item11) #self.Bind(wx.EVT_MENU, self.OnItemAppend, item12) @@ -365,6 +363,9 @@ class LeftTree(CT.CustomTreeCtrl): def OnPam(self, evt) : print 'rien' + def OnSimiTxt(self, evt) : + self.parent.OnSimiTxt(evt, self.getcorpus(self.itemdict)) + def OnItemBackground(self, event): colourdata = wx.ColourData() @@ -495,7 +496,12 @@ class LeftTree(CT.CustomTreeCtrl): return dlg.Destroy() - + + pydata = self.itemdict['pydata'] + if 'corpus_name' in pydata : + self.history.delete(pydata['uuid'], True) + else : + self.history.delete(pydata['uuid']) self.DeleteChildren(self.current) self.Delete(self.current) self.current = None @@ -647,13 +653,14 @@ class LeftTree(CT.CustomTreeCtrl): def OnSelChanged(self, event): item = event.GetItem() pydata = self.GetPyData(item) - if pydata['uuid'] in self.parent.history.opened : - for i in range(self.parent.nb.GetPageCount()) : - page = self.parent.nb.GetPage(i) - if 'parametres' in dir(page) : - if page.parametres['uuid'] == pydata['uuid'] : - self.parent.nb.SetSelection(i) - break + if pydata is not None : + if pydata['uuid'] in self.parent.history.opened : + for i in range(self.parent.nb.GetPageCount()) : + page = self.parent.nb.GetPage(i) + if 'parametres' in dir(page) : + if page.parametres['uuid'] == pydata['uuid'] : + self.parent.nb.SetSelection(i) + break #self.item = event.GetItem() #if self.item: -- 2.7.4