projects
/
iramuteq
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
svg for afc
[iramuteq]
/
corpusNG.py
diff --git
a/corpusNG.py
b/corpusNG.py
index
cbaefa3
..
fa13a8b
100644
(file)
--- a/
corpusNG.py
+++ b/
corpusNG.py
@@
-163,6
+163,9
@@
class Corpus :
lemuceeff[uce] = lemuceeff.get(uce, 0) + eff[i]
return lemuceeff
lemuceeff[uce] = lemuceeff.get(uce, 0) + eff[i]
return lemuceeff
+ def getlemclustereff(self, lem, cluster) :
+ return len(list(set(self.lc[cluster]).intersection(self.getlemuces(lem))))
+
def getlemeff(self, lem) :
return self.lems[lem].freq
def getlemeff(self, lem) :
return self.lems[lem].freq
@@
-1153,13
+1156,6
@@
class BuildFromAlceste(BuildCorpus) :
for word in uce :
self.last += 1
self.corpus.add_word(word)
for word in uce :
self.last += 1
self.corpus.add_word(word)
- #if self.dlg is not None :
- # if self.limitshow > self.count :
- # self.dlg.Pulse('textes : %i - segments : %i' % (iduci + 1, iduce +1))
- # self.count += 1
- # self.limitshow = 0
- # else :
- # self.limitshow = self.last / 100000
log.debug(' '.join([`iduci`,`idpara`,`iduce`]))
if self.last > self.lim :
self.backup_uce()
log.debug(' '.join([`iduci`,`idpara`,`iduce`]))
if self.last > self.lim :
self.backup_uce()
@@
-1171,27
+1167,13
@@
class BuildFromAlceste(BuildCorpus) :
if douce :
out = []
reste, texte_uce, suite = self.decouper(self.prep_txt(txt), self.ucesize + 15, self.ucesize)
if douce :
out = []
reste, texte_uce, suite = self.decouper(self.prep_txt(txt), self.ucesize + 15, self.ucesize)
-# print 'reste'
-# print reste
-# print 'texte_uce'
-# print texte_uce
-# print 'suite'
-# print suite
while reste :
uce = ' '.join([val for val in texte_uce if val not in self.ponctuation_espace])
if uce != '' :
out.append(uce)
reste, texte_uce, suite = self.decouper(suite, self.ucesize + 15, self.ucesize)
while reste :
uce = ' '.join([val for val in texte_uce if val not in self.ponctuation_espace])
if uce != '' :
out.append(uce)
reste, texte_uce, suite = self.decouper(suite, self.ucesize + 15, self.ucesize)
-# print 'reste'
-# print reste
-# print 'texte_uce'
-# print texte_uce
-# print 'suite'
-# print suite
-
uce = ' '.join([val for val in texte_uce if val not in self.ponctuation_espace])
if uce != '' :
uce = ' '.join([val for val in texte_uce if val not in self.ponctuation_espace])
if uce != '' :
- #print 'RESTEE UUCEEEEEEEEEEEEE', uce
out.append(uce)
return out
else :
out.append(uce)
return out
else :
@@
-1220,6
+1202,9
@@
class Builder :
ReadLexique(self.parent, lang = parametres['lang'])
self.parent.expressions = ReadDicoAsDico(self.parent.DictPath.get(parametres['lang']+'_exp', 'french_exp'))
self.parametres = parametres
ReadLexique(self.parent, lang = parametres['lang'])
self.parent.expressions = ReadDicoAsDico(self.parent.DictPath.get(parametres['lang']+'_exp', 'french_exp'))
self.parametres = parametres
+ else :
+ if self.dlg is not None :
+ self.dlg.Destroy()
dial.Destroy()
def doanalyse(self) :
dial.Destroy()
def doanalyse(self) :