X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpus.py;h=e34233adcdcf493d21cd65f83471e0ac7aa367e9;hp=42b3d72aaf5b513382aa73aa23df4f2cfeb765e0;hb=004429fae3db0a606add39137070dd3926934281;hpb=6523e3596df7ec9b7b94b875593953d4b34b6117 diff --git a/corpus.py b/corpus.py index 42b3d72..e34233a 100644 --- a/corpus.py +++ b/corpus.py @@ -214,7 +214,7 @@ class Corpus : return [len(uce[1].split()) for uce in res] def getconcorde(self, uces) : - return self.cuces.execute('select * from uces where id IN (%s);' % ', '.join([`i` for i in uces])) + return self.cuces.execute('select * from uces where id IN (%s) ORDER BY id;' % ', '.join([`i` for i in uces])) def getuciconcorde(self, ucis) : uces = [[val,[uce.ident for uce in self.ucis[val].uces]] for val in ucis] @@ -1130,7 +1130,7 @@ class BuildCorpus : self.corpus.pathout = PathOut(filename = parametres_corpus['originalpath'], dirout = parametres_corpus['pathout']) self.corpus.pathout.createdir(parametres_corpus['pathout']) self.corpus.parametres['uuid'] = str(uuid4()) - self.corpus.parametres['corpus_name'] = os.path.split(self.corpus.parametres['pathout'])[1] + self.corpus.parametres['corpus_name'] = parametres_corpus['corpus_name']#os.path.split(self.corpus.parametres['pathout'])[1] self.corpus.parametres['type'] = 'corpus' if self.corpus.parametres['keep_ponct'] : self.ponctuation_espace = [' ', ''] @@ -1304,7 +1304,7 @@ class BuildSubCorpus(BuildCorpus): self.dlg = dlg self.ori = corpus self.infile = None - self.corpus = Corpus(self, corpus.parametres) + self.corpus = Corpus(self, {'type' : 'corpus', 'originalpath' : corpus.parametres['originalpath'], 'encoding' : corpus.parametres['encoding']}) self.last = 0 self.encoding = corpus.parametres['encoding'] self.corpus.parametres['corpus_name'] = parametres['corpus_name'] @@ -1336,6 +1336,31 @@ class BuildSubCorpus(BuildCorpus): idpara += 1 elif parametres.get('fromcluster', False) : pass + elif parametres.get('fromuceids', False) : + print 'fromuceids' + dictucekeep = dict(zip(parametres['uceids'], parametres['uceids'])) + idpara = 0 + for uci in self.ori.ucis : + if uci.paras == [] : + keepuces = [uce for uce in uci.uces if uce.ident in dictucekeep] + if keepuces != [] : + uci.uces = keepuces + self.corpus.ucis.append(uci) + idpara += 1 + else : + newuces = [] + newpara = [] + for et in uci.paras : + keepuces = [uce for uce in uci.uces if uce.para == idpara] + idpara += 1 + if keepuces != [] : + newuces += keepuces + newpara.append(et) + if newuces != [] : + uci.uces = newuces + uci.paras = newpara + self.corpus.ucis.append(uci) + #create database self.connect() self.dobuild() @@ -1490,6 +1515,7 @@ class Builder : self.dlg = dlg parametres = DoConf(os.path.join(self.parent.UserConfigPath,'corpus.cfg')).getoptions('corpus') parametres['pathout'] = PathOut(parent.filename, 'corpus').mkdirout() + parametres['corpus_name'] = os.path.split(parametres['pathout'])[1] dial = CorpusPref(parent, parametres) dial.CenterOnParent() dial.txtpath.SetLabel(parent.filename) @@ -1524,6 +1550,8 @@ class SubBuilder : parametres['meta'] = corpus.make_etoiles() elif parametres.get('fromtheme', False) : parametres['meta'] = corpus.make_themes() + else : + parametres['meta'] = [] parametres['meta'].sort() dial = SubTextFromMetaDial(parent, parametres) self.res = dial.ShowModal()