longueur_max = max([len(val) for val in metas])
first = ['column_%i' % i for i in range(longueur_max)]
metas.insert(0, first)
longueur_max = max([len(val) for val in metas])
first = ['column_%i' % i for i in range(longueur_max)]
metas.insert(0, first)
f.write('\n'.join(['\t'.join(line) for line in metas]))
def export_corpus_classes(self, outf, alc = True, lem = False, uci = False) :
f.write('\n'.join(['\t'.join(line) for line in metas]))
def export_corpus_classes(self, outf, alc = True, lem = False, uci = False) :
outf = os.path.join(rep, outf)
if lem :
guce = ' '.join([self.formes[forme].lem for forme in guce.split()])
outf = os.path.join(rep, outf)
if lem :
guce = ' '.join([self.formes[forme].lem for forme in guce.split()])
f.write(guce) #.encode('cp1252', errors = 'replace'))
def export_tropes(self, fileout, classe, lem = False, uci = False) :
f.write(guce) #.encode('cp1252', errors = 'replace'))
def export_tropes(self, fileout, classe, lem = False, uci = False) :
else :
tab = [[lem] + [len(set(self.getlemuces(lem)).intersection(classe)) for classe in ucecl] for lem in actives]
tab = [[line[0]] + [repr(val) for val in line[1:]] for line in tab if sum(line[1:]) >= 3]
else :
tab = [[lem] + [len(set(self.getlemuces(lem)).intersection(classe)) for classe in ucecl] for lem in actives]
tab = [[line[0]] + [repr(val) for val in line[1:]] for line in tab if sum(line[1:]) >= 3]
else :
etoileuces = self.getetoileucis()
etoileuces = dict([[et, etoileuces[et]] for et in etoileuces if len(etoileuces[et]) > 1])
else :
etoileuces = self.getetoileucis()
etoileuces = dict([[et, etoileuces[et]] for et in etoileuces if len(etoileuces[et]) > 1])
f.write('\n'.join([';'.join([et] + [repr(len(set(etoileuces[et]).intersection(classe))) for classe in ucecl]) for et in etoileuces])) #.encode(self.parametres['syscoding'])
#etoiles = self.make_etoiles()
#with open(fileout, 'w') as f :
f.write('\n'.join([';'.join([et] + [repr(len(set(etoileuces[et]).intersection(classe))) for classe in ucecl]) for et in etoileuces])) #.encode(self.parametres['syscoding'])
#etoiles = self.make_etoiles()
#with open(fileout, 'w') as f :
for taille_segment in range(lenmin,lenmax) :
d =self.count_from_list_cl([' '.join(uce[i:i+taille_segment]) for i in range(len(uce)-(taille_segment - 1))], d, b, len(self.lc))
result = [[seg] + [str(val) for val in d[seg]] for seg in d if sum(d[seg]) >= effmin]
for taille_segment in range(lenmin,lenmax) :
d =self.count_from_list_cl([' '.join(uce[i:i+taille_segment]) for i in range(len(uce)-(taille_segment - 1))], d, b, len(self.lc))
result = [[seg] + [str(val) for val in d[seg]] for seg in d if sum(d[seg]) >= effmin]
f.write('\n'.join([';'.join(line) for line in result]))
def make_proftype(self, outf) :
f.write('\n'.join([';'.join(line) for line in result]))
def make_proftype(self, outf) :
res[gram][i] += sum([lemuceeff[uce] for uce in concern])
res = [[gram] + [repr(val) for val in res[gram]] for gram in res]
res.sort()
res[gram][i] += sum([lemuceeff[uce] for uce in concern])
res = [[gram] + [repr(val) for val in res[gram]] for gram in res]
res.sort()
f.write('\n'.join([';'.join(line) for line in res]))
def make_ucecl_from_R(self, filein) :
f.write('\n'.join([';'.join(line) for line in res]))
def make_ucecl_from_R(self, filein) :
log.info('%f' % (time() - t1))
if outf is not None :
toprint = '\n'.join([';'.join([repr(i), repr(occurrences[i]), repr(formescl[i]), repr(hapaxcl[i]), repr(lenclasses[i]), repr(float(hapaxcl[i])/float(formescl[i]))]) for i in occurrences])
log.info('%f' % (time() - t1))
if outf is not None :
toprint = '\n'.join([';'.join([repr(i), repr(occurrences[i]), repr(formescl[i]), repr(hapaxcl[i]), repr(lenclasses[i]), repr(float(hapaxcl[i])/float(formescl[i]))]) for i in occurrences])
f.write(toprint)
else :
return [[repr(occurrences[i]), repr(formescl[i]), repr(hapaxcl[i]), repr(lenclasses[i]), repr(float(hapaxcl[i])/float(formescl[i]))] for i in occurrences]
f.write(toprint)
else :
return [[repr(occurrences[i]), repr(formescl[i]), repr(hapaxcl[i]), repr(lenclasses[i]), repr(float(hapaxcl[i])/float(formescl[i]))] for i in occurrences]
f.write(txt)
def export_dictionary(self, fileout, syscoding) :
listformes = [[self.formes[forme].freq, forme, self.formes[forme].lem, self.formes[forme].gram] for forme in self.formes]
listformes.sort(reverse = True)
listformes = [forme[1:] + [repr(forme[0])] for forme in listformes]
f.write(txt)
def export_dictionary(self, fileout, syscoding) :
listformes = [[self.formes[forme].freq, forme, self.formes[forme].lem, self.formes[forme].gram] for forme in self.formes]
listformes.sort(reverse = True)
listformes = [forme[1:] + [repr(forme[0])] for forme in listformes]
f.write('\n'.join(['\t'.join(forme) for forme in listformes]))
def export_lems(self, fileout, syscoding) :
self.make_idformes()
listlem = [[lem, '\t'.join(['\t'.join([self.idformes[forme].forme, repr(self.lems[lem].formes[forme])]) for forme in self.lems[lem].formes])] for lem in self.lems]
listlem.sort()
f.write('\n'.join(['\t'.join(forme) for forme in listformes]))
def export_lems(self, fileout, syscoding) :
self.make_idformes()
listlem = [[lem, '\t'.join(['\t'.join([self.idformes[forme].forme, repr(self.lems[lem].formes[forme])]) for forme in self.lems[lem].formes])] for lem in self.lems]
listlem.sort()