gramtype = self.parent.lexique[word][1]
lem = self.parent.lexique[word][0]
elif word.isdigit() :
- gramtype = 'num'
+ gramtype = u'num'
lem = word
else :
- gramtype = 'nr'
+ gramtype = u'nr'
lem = word
self.formes[word] = Word(word, gramtype, len(self.formes), lem)
self.idformesuces[self.formes[word].ident] = {self.ucis[-1].uces[-1].ident : 1}
if self.iduces is None :
self.iduces = dict([[uce.ident, uce] for uci in self.ucis for uce in uci.uces])
- def make_lexitable(self, mineff, etoiles) :
- tokeep = [lem for lem in self.lems if self.lems[lem].freq >= mineff]
+ def make_lexitable(self, mineff, etoiles, gram = 0) :
+ if gram == 0 :
+ grams = {1:'', 2:''}
+ else :
+ grams = {gram :''}
+ tokeep = [lem for lem in self.lems if self.lems[lem].freq >= mineff and self.lems[lem].act in grams]
etuces = [[] for et in etoiles]
for uci in self.ucis :
get = list(set(uci.etoiles).intersection(etoiles))
f.write(guce.encode(self.parametres['syscoding']) + '\n\n')
def export_classe(self, outf, classe, lem = False) :
- sts = self.lc[classe]
+ sts = self.lc[classe - 1]
res = self.getconcorde(sts)
self.make_iduces()
with open(outf, 'w') as f :
table_uce[uces[uce]][i] = 1
table_uce.insert(0, list_act)
return table_uce
+
+ def make_pondtable_with_classe(self, uces, list_act) :
+ table_uce = [[0 for val in list_act] for line in range(0,len(uces))]
+ uces = dict([[uce, i] for i, uce in enumerate(uces)])
+ for i, lem in enumerate(list_act) :
+ uceseff = self.getlemuceseff(lem)
+ lemuces = list(set(uceseff.keys()).intersection(uces))
+ for uce in lemuces :
+ table_uce[uces[uce]][i] = uceseff[uce]
+ table_uce.insert(0, list_act)
+ return table_uce
def parse_active(self, gramact, gramsup = None) :
log.info('parse actives')
self.lems[lem].act = 2
elif self.lems[lem].gram in gramact :
self.lems[lem].act = 1
- elif gramsup is not None :
+ elif gramsup is not None and self.lems[lem].gram not in gramact:
if self.lems[lem].gram in gramsup :
self.lems[lem].act = 2
else :
def firstclean(self, txt) :
txt = txt.replace(u'’',"'")
txt = txt.replace(u'œ', u'oe')
- return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', ' £$£ ')
+ return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', u' £$£ ')
def make_cleans(self, txt) :
for clean in self.cleans :