from uuid import uuid4
import datetime
from copy import copy
+#------test spacy------------
+#import spacy
+#nlp = spacy.load("fr_core_news_lg")
#------------------------------------
# import des fichiers du projet
def getucisize(self) :
ucesize = self.getucesize()
- return [sum(ucesize[uci.uces[0].ident:(uci.uces[-1].ident + 1)]) for uci in self.ucis]
+ return [sum(ucesize[uci.uces[0].ident:(uci.uces[-1].ident + 1)]) for uci in self.ucis if len(uci.uces) != 0]
def getucesize(self) :
res = self.getalluces()
return [len(uce[1].split()) for uce in res]
def getconcorde(self, uces) :
- return self.cuces.execute('select * from uces where id IN (%s) ORDER BY id;' % ', '.join([repr(i) for i in uces]))
+ return self.cuces.execute('select * from uces where id IN (%s) ORDER BY id;' % ', '.join([repr(i) for i in uces]))
def getuciconcorde(self, ucis) :
uces = [[val,[uce.ident for uce in self.ucis[val].uces]] for val in ucis]
def getucesfrometoile(self, etoile) :
return [uce.ident for uci in self.ucis for uce in uci.uces if etoile in uci.etoiles]
+ def getucisfrometoile(self, etoile):
+ uces = [uce.ident for uci in self.ucis for uce in uci.uces if etoile in uci.etoiles]
+ return list(set([self.getucefromid(val).uci for val in uces]))
+
+
def getetoileuces(self) :
log.info('get uces etoiles')
etoileuces = {}
etoileuces = self.getetoileuces()
else :
etoileuces = self.getetoileucis()
- etoileuces = dict([[et, etoileuces[et]] for et in etoileuces if len(etoileuces[et]) > 1])
+ etoileuces = dict([[et, etoileuces[et]] for et in etoileuces if len(etoileuces[et]) > 0])
with open(fileout, 'w', encoding='utf8') as f :
f.write('\n'.join([';'.join([et] + [repr(len(set(etoileuces[et]).intersection(classe))) for classe in ucecl]) for et in etoileuces])) #.encode(self.parametres['syscoding'])
#etoiles = self.make_etoiles()
newuces = []
newpara = []
for et in uci.paras :
- keepuces = [CopyUce(uce) for uce in uci.uces if uce.ident in dictucekeep]
+ keepuces = [CopyUce(uce) for uce in uci.uces if uce.ident in dictucekeepand and uce.para == idpara]
idpara += 1
if keepuces != [] :
newuces += keepuces
class BuildFromAlceste(BuildCorpus) :
def read_corpus(self, infile) :
+
if self.dlg is not None :
self.dlg.Pulse('textes : 0 - segments : 0')
self.limitshow = 0
if self.testuci(line) :
iduci += 1
if txt != [] :
+ #doc = nlp(' '.join(txt))
+ #print([[word, word.pos_, word.lemma_] for word in doc])
iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1)
txt = []
self.corpus.ucis.append(Uci(iduci, line))