X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpus.py;h=69539d3b8c53e7dfbcf4a434b50913b9b2c6174f;hp=cbf8794292c9d7d93fb103c60f85200c5a659bd7;hb=4efa1d6083096de61aed08a9b64bae41f48e13aa;hpb=e84160b7f61eb5b05cc12339e44a61d67b499e15 diff --git a/corpus.py b/corpus.py index cbf8794..69539d3 100644 --- a/corpus.py +++ b/corpus.py @@ -17,6 +17,7 @@ from operator import itemgetter from uuid import uuid4 from chemins import PathOut from dialog import CorpusPref, SubTextFromMetaDial +from copy import copy from colors import colors import datetime @@ -33,6 +34,16 @@ def copycorpus(corpus) : copy_corpus.conn_all() return copy_corpus +def CopyUce(uce) : + return Uce(uce.ident, uce.para, uce.uci) + + +def CopyUci(uci): + nuci = Uci(uci.ident, '') + nuci.etoiles = copy(uci.etoiles) + nuci.uces = [CopyUce(uce) for uce in uci.uces] + return nuci + class Corpus : @@ -94,9 +105,7 @@ class Corpus : else : self.idformesuces[self.formes[word.forme].ident] = {stident: 1} else : - self.formes[word.forme] = word - self.formes[word.forme].ident = len(self.formes) - self.formes[word.forme].freq = 1 + self.formes[word.forme] = Word(word.forme, word.gram, len(self.formes), word.lem) self.idformesuces[self.formes[word.forme].ident] = {stident : 1} def conn_all(self): @@ -1300,11 +1309,12 @@ class BuildCorpus : class BuildSubCorpus(BuildCorpus): def __init__(self, corpus, parametres, dlg = None) : + print parametres log.info('begin subcorpus...') self.dlg = dlg self.ori = corpus self.infile = None - self.corpus = Corpus(self, corpus.parametres) + self.corpus = Corpus(self, {'type' : 'corpus', 'originalpath' : corpus.parametres['originalpath'], 'encoding' : corpus.parametres['encoding']}) self.last = 0 self.encoding = corpus.parametres['encoding'] self.corpus.parametres['corpus_name'] = parametres['corpus_name'] @@ -1315,7 +1325,7 @@ class BuildSubCorpus(BuildCorpus): self.corpus.parametres['uuid'] = str(uuid4()) if parametres.get('frommeta', False) : print 'make subtexts' - self.corpus.ucis = [uci for uci in self.ori.ucis if set(parametres['meta']).intersection(uci.etoiles) != set()] + self.corpus.ucis = [CopyUci(uci) for uci in self.ori.ucis if set(parametres['meta']).intersection(uci.etoiles) != set()] elif parametres.get('fromtheme', False) : print 'make subtexts from theme' idpara = 0 @@ -1325,17 +1335,45 @@ class BuildSubCorpus(BuildCorpus): newpara = [] for et in uci.paras : if et in parametres['meta'] : - newuce += [uce for uce in uci.uces if uce.para == idpara] + newuce += [CopyUce(uce) for uce in uci.uces if uce.para == idpara] newpara.append(et) idpara += 1 if newuce != [] : - uci.uces = newuce - uci.paras = newpara - self.corpus.ucis.append(uci) + nuci = CopyUci(uci) + nuci.uces = newuce + nuci.paras = newpara + self.corpus.ucis.append(nuci) else : idpara += 1 elif parametres.get('fromcluster', False) : pass + elif parametres.get('fromuceids', False) : + print 'fromuceids' + dictucekeep = dict(zip(parametres['uceids'], parametres['uceids'])) + idpara = 0 + for uci in self.ori.ucis : + if uci.paras == [] : + keepuces = [CopyUce(uce) for uce in uci.uces if uce.ident in dictucekeep] + if keepuces != [] : + nuci = CopyUci(uci) + nuci.uces = keepuces + self.corpus.ucis.append(nuci) + idpara += 1 + else : + newuces = [] + newpara = [] + for et in uci.paras : + keepuces = [CopyUce(uce) for uce in uci.uces if uce.ident in dictucekeep] + idpara += 1 + if keepuces != [] : + newuces += keepuces + newpara.append(et) + if newuces != [] : + nuci = CopyUci(uci) + nuci.uces = newuces + nuci.paras = newpara + self.corpus.ucis.append(nuci) + #create database self.connect() self.dobuild() @@ -1520,12 +1558,19 @@ class SubBuilder : self.ori = corpus self.dlg = dlg corpus_name = 'Sub' + corpus.parametres['corpus_name'] + if dlg is not None : + busy = wx.BusyInfo(_("Please wait...").decode('utf8'), self) + wx.SafeYield() parametres['corpus_name'] = corpus_name if parametres.get('frommeta', False) : parametres['meta'] = corpus.make_etoiles() elif parametres.get('fromtheme', False) : parametres['meta'] = corpus.make_themes() + else : + parametres['meta'] = [] parametres['meta'].sort() + if dlg is not None : + del busy dial = SubTextFromMetaDial(parent, parametres) self.res = dial.ShowModal() if self.res == 5100 :