X-Git-Url: http://iramuteq.org/git?a=blobdiff_plain;f=corpus.py;h=489d4f18de1c71b1ce80f3699985b5cfefd62b21;hb=d12b217aded82bd913cf8f6bc2771d24e9b65727;hp=9b417885ac77f2f97bf6e2cdeb86f4b95211d99a;hpb=c038ef7892cf106654fcd0d35389584513b2ec1d;p=iramuteq diff --git a/corpus.py b/corpus.py index 9b41788..489d4f1 100644 --- a/corpus.py +++ b/corpus.py @@ -1,25 +1,39 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud +#Copyright (c) 2008-2020 Pierre Ratinaud +#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020 +#License: GNU/GPL +#------------------------------------ +# import des modules python +#------------------------------------ import codecs import os -import gettext -_ = gettext.gettext import locale import sys from time import time -from functions import decoupercharact, ReadDicoAsDico, DoConf, ReadLexique, progressbar import re import sqlite3 import itertools import logging from operator import itemgetter from uuid import uuid4 +import datetime +from copy import copy +#------test spacy------------ +#import spacy +#nlp = spacy.load("fr_core_news_lg") + +#------------------------------------ +# import des fichiers du projet +#------------------------------------ +from functions import decoupercharact, ReadDicoAsDico, DoConf, ReadLexique, progressbar from chemins import PathOut from dialog import CorpusPref, SubTextFromMetaDial, MergeClusterFrame -from copy import copy from colors import colors -import datetime + +import langue +langue.run() log = logging.getLogger('iramuteq.corpus') @@ -36,7 +50,6 @@ def copycorpus(corpus) : def CopyUce(uce) : return Uce(uce.ident, uce.para, uce.uci) - def CopyUci(uci): nuci = Uci(uci.ident, '') @@ -44,10 +57,10 @@ def CopyUci(uci): nuci.uces = [CopyUce(uce) for uce in uci.uces] nuci.paras = copy(uci.paras) return nuci - class Corpus : + """Corpus class list of text """ @@ -87,10 +100,10 @@ class Corpus : gramtype = self.parent.lexique[word][1] lem = self.parent.lexique[word][0] elif word.isdigit() : - gramtype = u'num' + gramtype = 'num' lem = word else : - gramtype = u'nr' + gramtype = 'nr' lem = word self.formes[word] = Word(word, gramtype, len(self.formes), lem) self.idformesuces[self.formes[word].ident] = {self.ucis[-1].uces[-1].ident : 1} @@ -131,13 +144,13 @@ class Corpus : def read_corpus(self) : log.info('read corpus') - self.parametres['syscoding'] = sys.getdefaultencoding() + self.parametres['syscoding'] = 'utf8' if self.conncorpus is None : self.conn_all() res = self.ccorpus.execute('SELECT * FROM etoiles;') for row in res : self.ucis.append(Uci(row[0], row[1], row[2])) - uces = self.conncorpus.cursor().execute('SELECT * FROM luces where uci=?;',(`self.ucis[-1].ident`,)) + uces = self.conncorpus.cursor().execute('SELECT * FROM luces where uci=?;',(repr(self.ucis[-1].ident),)) for uce in uces: self.ucis[-1].uces.append(Uce(uce[2], uce[1], uce[0])) res = self.ccorpus.execute('SELECT * FROM formes;') @@ -145,9 +158,9 @@ class Corpus : self.ccorpus.close() def getworduces(self, wordid) : - if isinstance(wordid, basestring) : + if isinstance(wordid, str) : wordid = self.formes[wordid].ident - res = self.cformes.execute('SELECT uces FROM uces where id=? ORDER BY id;', (`wordid`,)) + res = self.cformes.execute('SELECT uces FROM uces where id=? ORDER BY id;', (repr(wordid),)) return list(itertools.chain(*[[int(val) for val in row[0].split()] if not isinstance(row[0], int) else [row[0]] for row in res])) def getworducis(self, wordid) : @@ -155,9 +168,9 @@ class Corpus : return list(set([self.getucefromid(uce).uci for uce in res])) def getformeuceseff(self, formeid) : - if isinstance(formeid, basestring) : + if isinstance(formeid, str) : formeid = self.formes[formeid].ident - res = self.cformes.execute('SELECT uces FROM uces where id=? ORDER BY id;', (`formeid`,)) + res = self.cformes.execute('SELECT uces FROM uces where id=? ORDER BY id;', (repr(formeid),)) uces = list(itertools.chain(*[[int(val) for val in row[0].split()] if not isinstance(row[0], int) else [row[0]] for row in res])) query = 'SELECT eff FROM eff where id=%i ORDER BY id' % formeid res = self.cformes.execute(query) @@ -168,7 +181,7 @@ class Corpus : return formeuceeff def getlemuces(self, lem) : - formesid = ', '.join([`val` for val in self.lems[lem].formes]) + formesid = ', '.join([repr(val) for val in self.lems[lem].formes]) query = 'SELECT uces FROM uces where id IN (%s) ORDER BY id' % formesid res = self.cformes.execute(query) return list(set(list(itertools.chain(*[[int(val) for val in row[0].split()] if not isinstance(row[0], int) else [row[0]] for row in res])))) @@ -179,7 +192,7 @@ class Corpus : if lem in self.lems : formesid += self.lems[lem].formes else : - print 'abscent : %s' % lem + print('abscent : %s' % lem) query = 'SELECT uces FROM uces where id IN %s ORDER BY id' % str(tuple(formesid)) res = self.cformes.execute(query) return list(set(list(itertools.chain(*[[int(val) for val in row[0].split()] if not isinstance(row[0], int) else [row[0]] for row in res])))) @@ -194,7 +207,7 @@ class Corpus : self.tgenlem[lem] = [0] * clnb self.tgenlem[lem][i] = len(set(lemst).intersection(classe)) else : - print 'abscent: ',lem + print('abscent: ',lem) return list(set(tgenst)) def gettgentxt(self, tgen): @@ -206,7 +219,7 @@ class Corpus : return list(set([self.getucefromid(val).uci for val in uces])) def getlemuceseff(self, lem, luces = None) : - formesid = ', '.join([`val` for val in self.lems[lem].formes]) + formesid = ', '.join([repr(val) for val in self.lems[lem].formes]) query = 'SELECT uces FROM uces where id IN (%s) ORDER BY id' % formesid res = self.cformes.execute(query) uces = list(itertools.chain(*[[int(val) for val in row[0].split()] if not isinstance(row[0], int) else [row[0]] for row in res])) @@ -252,13 +265,18 @@ class Corpus : return [len(uce[1].split()) for uce in res] def getconcorde(self, uces) : - return self.cuces.execute('select * from uces where id IN (%s) ORDER BY id;' % ', '.join([`i` for i in uces])) + return self.cuces.execute('select * from uces where id IN (%s) ORDER BY id;' % ', '.join([repr(i) for i in uces])) def getuciconcorde(self, ucis) : uces = [[val,[uce.ident for uce in self.ucis[val].uces]] for val in ucis] uces = [[val[0], '\n'.join([row[1] for row in self.getconcorde(val[1])])] for val in uces] return uces + def getuciconcorde_uces(self, uciid, uceid) : + uces = [uce.ident for uce in self.ucis[uciid].uces] + uces = [row for row in self.getconcorde(uces)] + return uces + def getwordconcorde(self, word) : return self.getconcorde(self.getworduces(word)) @@ -271,7 +289,7 @@ class Corpus : def getallucis(self): uces = [row[1] for row in self.getalluces()] return [[uci.ident, '\n'.join([uces[uce.ident] for uce in uci.uces])] for uci in self.ucis] - + def getucesfrometoile(self, etoile) : return [uce.ident for uci in self.ucis for uce in uci.uces if etoile in uci.etoiles] @@ -296,7 +314,7 @@ class Corpus : else : idpara += 1 return etoileuces - + def getetoileucis(self): etoileuces = {} for uci in self.ucis : @@ -317,6 +335,8 @@ class Corpus : def getactivesnb(self, key) : return len([lem for lem in self.lems if self.lems[lem].act == key]) + +# fonction inactive mais avec une incertitude concernant l'indentation sur le dernier else # def make_lems(self, lem = True) : # log.info('make lems') # self.lems = {} @@ -343,7 +363,7 @@ class Corpus : self.lems[self.formes[forme].lem] = Lem(self, self.formes[forme]) else : self.lems = dict([[forme, Lem(self, self.formes[forme])] for forme in self.formes]) - + def make_lems_from_dict(self, dictionnaire, dolem = True) : log.info('make lems from dict') self.lems = {} @@ -352,10 +372,10 @@ class Corpus : lem = dictionnaire[forme][0] gram = dictionnaire[forme][1] elif forme.isdigit() : - gram = u'num' + gram = 'num' lem = forme else : - gram = u'nr' + gram = 'nr' lem = forme self.formes[forme].lem = lem self.formes[forme].gram = gram @@ -367,7 +387,7 @@ class Corpus : self.lems[self.formes[forme].lem] = Lem(self, self.formes[forme]) else : self.lems[forme] = Lem(self, self.formes[forme]) - + def make_idformes(self) : self.idformes = dict([[self.formes[forme].ident, self.formes[forme]] for forme in self.formes]) @@ -392,13 +412,13 @@ class Corpus : tab = [] for lem in tokeep : deff = self.getlemuceseff(lem) - ucesk = deff.keys() + ucesk = list(deff.keys()) line = [lem] + [sum([deff[uce] for uce in et.intersection(ucesk)]) for et in etuces] if sum(line[1:]) >= mineff : tab.append(line) tab.insert(0, [''] + etoiles) return tab - + def make_tgen_table(self, tgen, etoiles, tot = None): lclasses = [self.getucesfrometoile(etoile) for etoile in etoiles] sets = [set(cl) for cl in lclasses] @@ -407,7 +427,7 @@ class Corpus : for forme in self.formes : formeuceeff = self.getformeuceseff(forme) for i, classe in enumerate(lclasses) : - concern = sets[i].intersection(formeuceeff.keys()) + concern = sets[i].intersection(list(formeuceeff.keys())) if len(concern) : totoccurrences[etoiles[i]] += sum([formeuceeff[uce] for uce in concern]) #tgenoccurrences = dict([[val, 0] for val in etoiles]) @@ -417,11 +437,11 @@ class Corpus : for lem in tgen[t] : lemuceeff = self.getlemuceseff(lem) for i, classe in enumerate(lclasses) : - concern = sets[i].intersection(lemuceeff.keys()) + concern = sets[i].intersection(list(lemuceeff.keys())) if len(concern) : tgenoccurrences[t][etoiles[i]] += sum([lemuceeff[uce] for uce in concern]) return tgenoccurrences, totoccurrences - + def make_tgen_profile(self, tgen, ucecl, uci = False) : log.info('tgen/classes') self.tgenlem = {} @@ -433,6 +453,7 @@ class Corpus : tab = [[lem] + [len(set(self.gettgenstprof(tgen[lem], classe, i, clnb)).intersection(classe)) for i, classe in enumerate(ucecl)] for lem in tgen] tab = [[line[0]] + [val for val in line[1:]] for line in tab if sum(line[1:]) >= 3] return tab + #i = 0 #nam = 'total' #while nam + `i` in tgen : @@ -444,7 +465,7 @@ class Corpus : #tab = [line0] + tab #with open(fileout, 'w') as f : # f.write('\n'.join(['\t'.join(line) for line in tab]).encode(self.parametres['syscoding'])) - + def make_efftype_from_etoiles(self, etoiles) : dtype = {} etuces = [[] for et in etoiles] @@ -457,7 +478,7 @@ class Corpus : etuces = [set(val) for val in etuces] for lem in self.lems : deff = self.getlemuceseff(lem) - ucesk = deff.keys() + ucesk = list(deff.keys()) gram = self.lems[lem].gram if gram in dtype : dtype[gram] = [i + j for i, j in zip(dtype[gram], [sum([deff[uce] for uce in et.intersection(ucesk)]) for et in etuces])] @@ -510,8 +531,8 @@ class Corpus : log.info('taille uc1 : %i - taille uc2 : %i' % (len(uc1), len(uc2))) self.write_ucmatrix(uc1, actives, uc1out) self.write_ucmatrix(uc2, actives, uc2out) - listuce1 = [['uce', 'uc']] + [[`uce`, `i`] for i, ucl in enumerate(uc1) for uce in ucl] - listuce2 = [['uce', 'uc']] + [[`uce`, `i`] for i, ucl in enumerate(uc2) for uce in ucl] + listuce1 = [['uce', 'uc']] + [[repr(uce), repr(i)] for i, ucl in enumerate(uc1) for uce in ucl] + listuce2 = [['uce', 'uc']] + [[repr(uce), repr(i)] for i, ucl in enumerate(uc2) for uce in ucl] with open(listuce1out, 'w') as f : f.write('\n'.join([';'.join(line) for line in listuce1])) with open(listuce2out, 'w') as f : @@ -528,7 +549,7 @@ class Corpus : for uce in self.getlemuces(lem): if (uces_uc[uce], i) not in deja_la : nbl += 1 - f.write(''.join([' '.join([`uces_uc[uce]+1`,`i+1`,`1`]),'\n'])) + f.write(''.join([' '.join([repr(uces_uc[uce]+1),repr(i+1),repr(1)]),'\n'])) deja_la[(uces_uc[uce], i)] = 0 f.seek(0) with open(fileout, 'w') as ffin : @@ -545,32 +566,32 @@ class Corpus : self.make_iduces() actuci = '' actpara = False - with open(outf,'w') as f : + with open(outf,'w', encoding='utf8') as f : for uce in res : if self.iduces[uce[0]].uci == actuci and self.iduces[uce[0]].para == actpara : - f.write(uce[1].encode(self.parametres['syscoding']) + '\n') + f.write(uce[1] + '\n') elif self.iduces[uce[0]].uci != actuci : actuci = self.iduces[uce[0]].uci if self.ucis[self.iduces[uce[0]].uci].paras == [] : actpara = self.iduces[uce[0]].para - f.write('\n' + ' '.join(self.ucis[self.iduces[uce[0]].uci].etoiles).encode(self.parametres['syscoding']) + '\n' + uce[1].encode(self.parametres['syscoding']) + '\n') + f.write('\n' + ' '.join(self.ucis[self.iduces[uce[0]].uci].etoiles) + '\n' + uce[1] + '\n') else : ident = 0 actpara = self.iduces[uce[0]].para - f.write('\n'.join([' '.join(self.ucis[self.iduces[uce[0]].uci].etoiles).encode(self.parametres['syscoding']), self.ucis[self.iduces[uce[0]].uci].paras[ident].encode(self.parametres['syscoding']), uce[1].encode(self.parametres['syscoding'])]) + '\n') + f.write('\n'.join([' '.join(self.ucis[self.iduces[uce[0]].uci].etoiles), self.ucis[self.iduces[uce[0]].uci].paras[ident], uce[1]] + '\n')) elif self.iduces[uce[0]].para != actpara : actpara = self.iduces[uce[0]].para ident += 1 - f.write('\n'.join([self.ucis[self.iduces[uce[0]].uci].paras[ident].encode(self.parametres['syscoding']), uce[1].encode(self.parametres['syscoding'])]) + '\n') - + f.write('\n'.join([self.ucis[self.iduces[uce[0]].uci].paras[ident], uce[1]]) + '\n') + def export_meta_table(self, outf) : - metas = [[`i`] + text.etoiles[1:] for i, text in enumerate(self.ucis)] + metas = [[repr(i)] + text.etoiles[1:] for i, text in enumerate(self.ucis)] longueur_max = max([len(val) for val in metas]) first = ['column_%i' % i for i in range(longueur_max)] metas.insert(0, first) - with open(outf, 'w') as f : - f.write('\n'.join(['\t'.join(line) for line in metas]).encode(self.parametres['syscoding'])) - + with open(outf, 'w', encoding='utf8') as f : + f.write('\n'.join(['\t'.join(line) for line in metas])) + def export_corpus_classes(self, outf, alc = True, lem = False, uci = False) : ucecl = {} for i, lc in enumerate(self.lc) : @@ -583,7 +604,7 @@ class Corpus : self.make_iduces() else : res = self.getallucis() - with open(outf, 'w') as f : + with open(outf, 'w', encoding='utf8') as f : for uce in res : guce = uce[1] if not uci : @@ -596,27 +617,27 @@ class Corpus : etline = ' '.join(self.ucis[actuci].etoiles + ['*classe_%i' % ucecl[uce[0]]]) else : etline = ' '.join(['<' + '='.join(et.split('_')) + '>' for et in self.ucis[actuci].etoiles[1:]]) - f.write(etline.encode(self.parametres['syscoding']) + '\n') - f.write(guce.encode(self.parametres['syscoding']) + '\n\n') + f.write(etline + '\n') + f.write(guce + '\n\n') def export_classe(self, outf, classe, lem = False, uci = False) : - sts = self.lc[classe - 1] + sts = self.lc[classe - 1] if not uci : res = self.getconcorde(sts) self.make_iduces() else : res = self.getuciconcorde(sts) - with open(outf, 'w') as f : + with open(outf, 'w', encoding='utf8') as f : for uce in res : guce = uce[1] if not uci : - f.write(' '.join(self.ucis[self.iduces[uce[0]].uci].etoiles).encode(self.parametres['syscoding']) + '\n') + f.write(' '.join(self.ucis[self.iduces[uce[0]].uci].etoiles) + '\n') else : - f.write(' '.join(self.ucis[uce[0]].etoiles).encode(self.parametres['syscoding']) + '\n') + f.write(' '.join(self.ucis[uce[0]].etoiles) + '\n') if lem : guce = ' '.join([self.formes[forme].lem for forme in guce.split()]) - f.write(guce.encode(self.parametres['syscoding']) + '\n\n') - + f.write(guce + '\n\n') + def export_owledge(self, rep, classe, lem = False, uci = False) : sts = self.lc[classe - 1] if not uci : @@ -627,12 +648,12 @@ class Corpus : for uce in res : ident = uce[0] guce = uce[1] - outf = '.'.join([`ident`, 'txt']) + outf = '.'.join([repr(ident), 'txt']) outf = os.path.join(rep, outf) if lem : guce = ' '.join([self.formes[forme].lem for forme in guce.split()]) - with open(outf, 'w') as f : - f.write(guce.encode('cp1252', errors = 'replace')) + with open(outf, 'w', encoding='utf8') as f : + f.write(guce) #.encode('cp1252', errors = 'replace')) def export_tropes(self, fileout, classe, lem = False, uci = False) : sts = self.lc[classe - 1] @@ -641,12 +662,12 @@ class Corpus : self.make_iduces() else : res = self.getuciconcorde(sts) - with open(fileout, 'w') as f : + with open(fileout, 'w', encoding='utf8') as f : for uce in res : guce = uce[1] if lem : guce = ' '.join([self.formes[forme].lem for forme in guce.split()]) - f.write(guce.encode('cp1252', errors = 'replace')) + f.write(guce) #.encode('cp1252', errors = 'replace')) f.write('\n') def make_and_write_sparse_matrix_from_uces(self, actives, outfile, listuce = False) : @@ -656,16 +677,16 @@ class Corpus : for i, lem in enumerate(actives) : for uce in sorted(self.getlemuces(lem)) : nbl += 1 - f.write(''.join([' '.join([`uce+1`, `i+1`,`1`]),'\n'])) + f.write(''.join([' '.join([repr(uce+1), repr(i+1),repr(1)]),'\n'])) f.seek(0) - with open(outfile, 'w') as ffin : + with open(outfile, 'w') as ffin : ffin.write("%%%%MatrixMarket matrix coordinate integer general\n%i %i %i\n" % (self.getucenb(), len(actives), nbl)) for line in f : ffin.write(line) os.remove(outfile + '~') if listuce : with open(listuce, 'w') as f : - f.write('\n'.join(['uce;uc'] + [';'.join([`i`,`i`]) for i in range(0, self.getucenb())])) + f.write('\n'.join(['uce;uc'] + [';'.join([repr(i),repr(i)]) for i in range(0, self.getucenb())])) def make_and_write_sparse_matrix_from_uci(self, actives, outfile, listuci = False) : log.info('make_and_write_sparse_matrix_from_ucis %s' % outfile) @@ -674,16 +695,16 @@ class Corpus : for i, lem in enumerate(actives) : for uci in sorted(self.getlemucis(lem)) : nbl += 1 - f.write(''.join([' '.join([`uci+1`, `i+1`,`1`]),'\n'])) + f.write(''.join([' '.join([repr(uci+1), repr(i+1),repr(1)]),'\n'])) f.seek(0) - with open(outfile, 'w') as ffin : + with open(outfile, 'w') as ffin : ffin.write("%%%%MatrixMarket matrix coordinate integer general\n%i %i %i\n" % (self.getucinb(), len(actives), nbl)) for line in f : ffin.write(line) os.remove(outfile + '~') if listuci : with open(listuci, 'w') as f : - f.write('\n'.join(['uci;uc'] + [';'.join([`i`,`i`]) for i in range(0, self.getucinb())])) + f.write('\n'.join(['uci;uc'] + [';'.join([repr(i),repr(i)]) for i in range(0, self.getucinb())])) def make_and_write_sparse_matrix_from_classe(self, actives, uces, outfile) : log.info('make_and_write_sparse_matrix_from_classe %s' % outfile) @@ -693,14 +714,14 @@ class Corpus : for i, lem in enumerate(actives) : uces_ok = list(set(self.getlemuces(lem)).intersection(uces)) for uce in uces_ok : - f.write(''.join([' '.join([`duces[uce]+1`,`i+1`,`1`]),'\n'])) + f.write(''.join([' '.join([repr(duces[uce]+1),repr(i+1),repr(1)]),'\n'])) f.seek(0) - with open(outfile, 'w') as ffin : + with open(outfile, 'w') as ffin : ffin.write("%%%%MatrixMarket matrix coordinate integer general\n%i %i %i\n" % (len(uces), len(actives), nbl)) for line in f : ffin.write(line) os.remove(outfile + '~') - + def make_table_with_classe(self, uces, list_act, uci = False) : table_uce = [[0 for val in list_act] for line in range(0,len(uces))] uces = dict([[uce, i] for i, uce in enumerate(uces)]) @@ -713,8 +734,8 @@ class Corpus : for uce in lemuces : table_uce[uces[uce]][i] = 1 table_uce.insert(0, list_act) - return table_uce - + return table_uce + def make_pondtable_with_classe(self, uces, list_act) : table_uce = [[0 for val in list_act] for line in range(0,len(uces))] uces = dict([[uce, i] for i, uce in enumerate(uces)]) @@ -724,7 +745,7 @@ class Corpus : for uce in lemuces : table_uce[uces[uce]][i] = uceseff[uce] table_uce.insert(0, list_act) - return table_uce + return table_uce def parse_active(self, gramact, gramsup = None) : log.info('parse actives') @@ -781,16 +802,16 @@ class Corpus : tab = [[lem] + [len(set(self.getlemucis(lem)).intersection(classe)) for classe in ucecl] for lem in actives] else : tab = [[lem] + [len(set(self.getlemuces(lem)).intersection(classe)) for classe in ucecl] for lem in actives] - tab = [[line[0]] + [`val` for val in line[1:]] for line in tab if sum(line[1:]) >= 3] - with open(fileout, 'w') as f : - f.write('\n'.join([';'.join(line) for line in tab]).encode(self.parametres['syscoding'])) + tab = [[line[0]] + [repr(val) for val in line[1:]] for line in tab if sum(line[1:]) >= 3] + with open(fileout, 'w', encoding='utf8') as f : + f.write('\n'.join([';'.join(line) for line in tab])) def make_etoiles(self) : etoiles = set([]) for uci in self.ucis : etoiles.update(uci.etoiles[1:]) return list(etoiles) - + def make_themes(self): themes = set([]) for uci in self.ucis : @@ -818,7 +839,7 @@ class Corpus : except IndexError : det[et[0]] = 1 return det - + def make_theme_dict(self): themes = [val for uci in self.ucis for val in uci.paras] det = {} @@ -858,8 +879,8 @@ class Corpus : else : etoileuces = self.getetoileucis() etoileuces = dict([[et, etoileuces[et]] for et in etoileuces if len(etoileuces[et]) > 1]) - with open(fileout, 'w') as f : - f.write('\n'.join([';'.join([et] + [`len(set(etoileuces[et]).intersection(classe))` for classe in ucecl]) for et in etoileuces]).encode(self.parametres['syscoding'])) + with open(fileout, 'w', encoding='utf8') as f : + f.write('\n'.join([';'.join([et] + [repr(len(set(etoileuces[et]).intersection(classe))) for classe in ucecl]) for et in etoileuces])) #.encode(self.parametres['syscoding']) #etoiles = self.make_etoiles() #with open(fileout, 'w') as f : # f.write('\n'.join([';'.join([etoile] + [`len(set(self.getucesfrometoile(etoile)).intersection(classe))` for classe in ucecl]) for etoile in etoiles]).encode(self.parametres['syscoding'])) @@ -873,9 +894,9 @@ class Corpus : ucecl[uce] = 0 color = ['black'] + colors[len(self.lc) - 1] txt = ''' - +
-''' % sys.getdefaultencoding() +''' if not uci : res = self.getalluces() self.make_iduces() @@ -900,6 +921,32 @@ class Corpus : txt += '' % (color[ucecl[uce[0]]]) + uce[1] + '