X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpusNG.py;h=fa13a8b916b3762c806c9096fe0ff21722c76337;hp=50f90e7bb7689e8d6d99933fd60620f2737f25f9;hb=1fb687c23b19ae4cc88146acf393041356c1df3a;hpb=5d8a0a0e99d9075adc28f2525fe0aba8e14c2b0a diff --git a/corpusNG.py b/corpusNG.py index 50f90e7..fa13a8b 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -163,6 +163,9 @@ class Corpus : lemuceeff[uce] = lemuceeff.get(uce, 0) + eff[i] return lemuceeff + def getlemclustereff(self, lem, cluster) : + return len(list(set(self.lc[cluster]).intersection(self.getlemuces(lem)))) + def getlemeff(self, lem) : return self.lems[lem].freq @@ -526,7 +529,8 @@ class Corpus : lim -= 1 else : stop = nbmax - 1 - log.info('nb actives = %i - eff min = %i ' % (stop, lim)) + lim = effs[stop] + log.info('nb actives = %i - eff min = %i ' % (stop + 1, lim)) return [val[1] for val in allactives[0:stop + 1]], lim def make_and_write_profile(self, actives, ucecl, fileout) : @@ -1152,13 +1156,6 @@ class BuildFromAlceste(BuildCorpus) : for word in uce : self.last += 1 self.corpus.add_word(word) - #if self.dlg is not None : - # if self.limitshow > self.count : - # self.dlg.Pulse('textes : %i - segments : %i' % (iduci + 1, iduce +1)) - # self.count += 1 - # self.limitshow = 0 - # else : - # self.limitshow = self.last / 100000 log.debug(' '.join([`iduci`,`idpara`,`iduce`])) if self.last > self.lim : self.backup_uce() @@ -1170,27 +1167,13 @@ class BuildFromAlceste(BuildCorpus) : if douce : out = [] reste, texte_uce, suite = self.decouper(self.prep_txt(txt), self.ucesize + 15, self.ucesize) -# print 'reste' -# print reste -# print 'texte_uce' -# print texte_uce -# print 'suite' -# print suite while reste : uce = ' '.join([val for val in texte_uce if val not in self.ponctuation_espace]) if uce != '' : out.append(uce) reste, texte_uce, suite = self.decouper(suite, self.ucesize + 15, self.ucesize) -# print 'reste' -# print reste -# print 'texte_uce' -# print texte_uce -# print 'suite' -# print suite - uce = ' '.join([val for val in texte_uce if val not in self.ponctuation_espace]) if uce != '' : - #print 'RESTEE UUCEEEEEEEEEEEEE', uce out.append(uce) return out else : @@ -1219,6 +1202,9 @@ class Builder : ReadLexique(self.parent, lang = parametres['lang']) self.parent.expressions = ReadDicoAsDico(self.parent.DictPath.get(parametres['lang']+'_exp', 'french_exp')) self.parametres = parametres + else : + if self.dlg is not None : + self.dlg.Destroy() dial.Destroy() def doanalyse(self) :