X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpusNG.py;h=e0437077ff0fc5642a68f89a82ca2b5777da7ea3;hp=f6468b3b5f7eb79c9e521e62bb6ebd0373e22247;hb=40ae5805c4286656d9c6c17e3231d0ea24cdcaa7;hpb=be2de22859c8f59abe9e1082981e4c3a62e9b7ca diff --git a/corpusNG.py b/corpusNG.py index f6468b3..e043707 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -196,18 +196,6 @@ class Corpus : res = self.getalluces() return [len(uce[1].split()) for uce in res] -# def getlemseff(self) : -# if self.idformes is None : -# self.make_idformes() -# return dict([[lem, sum([self.idformes[forme].freq for forme in self.lems[lem]])] for lem in self.lems]) - -# def getlemsefftype(self) : -# if self.idformes is None : -# self.make_idformes() -# if self.lems is None : -# self.make_lems() -# return dict([[lem, [sum([self.idformes[forme].freq for forme in self.lems[lem]]), '', self.idformes[self.lems[lem].keys()[0]].gram]] for lem in self.lems]) - def getconcorde(self, uces) : return self.cuces.execute('select * from uces where id IN (%s);' % ', '.join([`i` for i in uces])) @@ -1156,13 +1144,6 @@ class BuildFromAlceste(BuildCorpus) : for word in uce : self.last += 1 self.corpus.add_word(word) - #if self.dlg is not None : - # if self.limitshow > self.count : - # self.dlg.Pulse('textes : %i - segments : %i' % (iduci + 1, iduce +1)) - # self.count += 1 - # self.limitshow = 0 - # else : - # self.limitshow = self.last / 100000 log.debug(' '.join([`iduci`,`idpara`,`iduce`])) if self.last > self.lim : self.backup_uce() @@ -1174,27 +1155,13 @@ class BuildFromAlceste(BuildCorpus) : if douce : out = [] reste, texte_uce, suite = self.decouper(self.prep_txt(txt), self.ucesize + 15, self.ucesize) -# print 'reste' -# print reste -# print 'texte_uce' -# print texte_uce -# print 'suite' -# print suite while reste : uce = ' '.join([val for val in texte_uce if val not in self.ponctuation_espace]) if uce != '' : out.append(uce) reste, texte_uce, suite = self.decouper(suite, self.ucesize + 15, self.ucesize) -# print 'reste' -# print reste -# print 'texte_uce' -# print texte_uce -# print 'suite' -# print suite - uce = ' '.join([val for val in texte_uce if val not in self.ponctuation_espace]) if uce != '' : - #print 'RESTEE UUCEEEEEEEEEEEEE', uce out.append(uce) return out else :