From 65ba8b58b5d95ec80c4bb990fad06b5c75869d8f Mon Sep 17 00:00:00 2001 From: pierre Date: Tue, 29 Sep 2020 10:46:45 +0200 Subject: [PATCH] error on decode utf8 --- functions.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/functions.py b/functions.py index 8c0c66c..c74f352 100755 --- a/functions.py +++ b/functions.py @@ -114,6 +114,11 @@ class History : self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix]) self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)]) d.close() + d = {} + d['history'] = self.history + d['matrix'] = self.matrix + with open('/home/pierre/hystory.json', 'w') as f : + f.write(json.dumps(d, indent=4, default=str)) def write(self) : d = shelve.open(self.filein) @@ -805,7 +810,7 @@ def treat_var_mod(variables) : # var_mod[var].append(variable) return var_mod -def doconcorde(corpus, uces, mots, uci = False) : +def doconcorde(corpus, uces, mots, uci = False, et = False) : if not uci : ucestxt1 = [row for row in corpus.getconcorde(uces)] else : @@ -813,8 +818,11 @@ def doconcorde(corpus, uces, mots, uci = False) : ucestxt1 = dict(ucestxt1) ucestxt = [] ucis_txt = [] - listmot = [corpus.getlems()[lem].formes for lem in mots] - listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem] + if not et : + listmot = [corpus.getlems()[lem].formes for lem in mots] + listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem] + else : + listmot = mots mothtml = ['%s' % mot for mot in listmot] dmots = dict(zip(listmot, mothtml)) for uce in uces : @@ -883,7 +891,7 @@ def gettranslation(words, lf, lt) : request = urllib2.Request(link, headers=agent) raw_data = urllib2.urlopen(request).read() data = json.loads(raw_data) - return [line[0].decode('utf8').replace(u"'", u'_').replace(u' | ', u'|').replace(u' ', u'_').replace(u'-',u'_').replace(u'\n','') for line in data[0]] + return [line[0].decode('utf8', error='replace').replace(u"'", u'_').replace(u' | ', u'|').replace(u' ', u'_').replace(u'-',u'_').replace(u'\n','') for line in data[0]] def makenprof(prof, trans, deb=0) : nprof=[] @@ -901,7 +909,7 @@ def treatempty(val) : else : return val -def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) : +def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 20) : nprof = {} lems = {} for i in range(len(dictprofile)) : -- 2.7.4