From bca66db798a88d8fcc3d702643916b507107de01 Mon Sep 17 00:00:00 2001 From: pierre Date: Sat, 7 Sep 2024 18:31:45 +0200 Subject: [PATCH] encoding --- PrintRScript.py | 8 ++++---- ProfList.py | 12 ++++++------ functions.py | 8 ++++---- graph_to_json.py | 12 +++++------- import_txm.py | 6 +++--- iramuteq.py | 2 +- listlex.py | 5 +---- parse_europress.py | 2 +- parse_factiva_txt.py | 6 +++--- tabchdalc.py | 4 ++-- tabchi2mcnemar.py | 10 +++++----- textstat.py | 2 +- 12 files changed, 36 insertions(+), 41 deletions(-) diff --git a/PrintRScript.py b/PrintRScript.py index 86f4379..0ce0bfc 100755 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -619,7 +619,7 @@ def barplot(table, parametres, intxt = False): coordinf <- coord[toinf] valinf <- di[toinf] text(x=coordinf, y=valinf - 0.1, 'i') - } + } c <- colMeans(coord) c1 <- c[-1] c2 <- c[-length(c)] @@ -639,15 +639,15 @@ def barplot(table, parametres, intxt = False): mn <- round(min(di)) mx <- round(max(di)) for (i in mn:mx) { - if ((i/d) == (i%%/%%d)) { + if ((i/d) == (i%%/%%d)) { abline(h=i,lty=3) } } par(mar=c(0,0,0,0)) plot(0, axes = FALSE, pch = '') - legend(x = 'center' , rownames(di), fill = color) + legend(x = 'center' , rownames(di) , fill = color) dev.off() - """ % (ffr(parametres['rgraph']), parametres['width'], parametres['height'], ffr(parametres['tmpgraph']), parametres['svg']) + """ % (ffr(parametres['rgraph']), parametres['width'], parametres['height'], ffr(parametres['tmpgraph']), parametres['svg']) else: txt += """ load("%s") diff --git a/ProfList.py b/ProfList.py index e4629b1..cf4618c 100644 --- a/ProfList.py +++ b/ProfList.py @@ -432,7 +432,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col if self.var_mod == {} : self.var_mod = treat_var_mod([val for val in corpus.actives] + [val for val in corpus.sups]) var_mod = self.var_mod - with codecs.open(self.Source.pathout['chisqtable'], 'r', corpus.parametres['syscoding']) as f : + with open(self.Source.pathout['chisqtable'], 'r', encoding='utf8') as f : chistable = [line.replace('\n','').replace('\r','').replace('"','').replace(',','.').split(';') for line in f] title = chistable[0] title.pop(0) @@ -540,7 +540,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col pathout = ConstructPathOut(self.Source.pathout.dirout, 'simi_classe_%i' %self.cl) if self.tmpchi is None : self.tmpchi = os.path.join(pathout,'chi.csv') - with open(self.tmpchi, 'w') as f: + with open(self.tmpchi, 'w', encoding='utf8') as f: f.write('\n'.join([str(val) for val in self.lchi])) self.filename = os.path.join(pathout,'mat01.csv') tableau.printtable(self.filename, tab) @@ -599,7 +599,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col word = self.getColumnText(self.GetFirstSelected(), 6) if self.tmpchi is None : self.tmpchi = os.path.join(self.Source.parametres['pathout'],'chi_%i.csv' % self.cl) - with open(self.tmpchi, 'w') as f: + with open(self.tmpchi, 'w', encoding='utf8') as f: f.write('\n'.join([str(val) for val in self.lchi])) index = self.la.index(word) parametres = {'type' : 'clustersimitxt', @@ -615,7 +615,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col def on_graph(self, evt): if self.tmpchi is None : self.tmpchi = os.path.join(self.Source.parametres['pathout'],'chi_%i.csv' % self.cl) - with open(self.tmpchi, 'w') as f: + with open(self.tmpchi, 'w', encoding='utf8') as f: f.write('\n'.join([str(val) for val in self.lchi])) parametres = {'type' : 'clustersimitxt', 'pathout' : self.Source.parametres['pathout'], @@ -690,7 +690,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col corpus = self.Source.corpus else : corpus = self.Source.tableau - with codecs.open(self.Source.pathout['chisqtable'], 'r', corpus.parametres['syscoding']) as f : + with open(self.Source.pathout['chisqtable'], 'r', encoging='utf8') as f : chistable = [line.replace('\n','').replace('\r','').replace('"','').replace(',','.').split(';') for line in f] title = chistable[0] title.pop(0) @@ -709,7 +709,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col corpus = self.Source.corpus else : corpus = self.Source.tableau - with codecs.open(self.Source.pathout['chisqtable'], 'r', corpus.parametres['syscoding']) as f : + with open(self.Source.pathout['chisqtable'], 'r', encoding='utf8') as f : chistable = [line.replace('\n','').replace('\r','').replace('"','').replace(',','.').split(';') for line in f] title = chistable[0] title.pop(0) diff --git a/functions.py b/functions.py index 4fed542..72e050b 100755 --- a/functions.py +++ b/functions.py @@ -117,7 +117,7 @@ class History : self.read() def read(self) : - with open(self.filein, 'r') as fjson : + with open(self.filein, 'r', encoding='utf8') as fjson : d = json.load(fjson) # d = shelve.open(self.filein, protocol=1) self.history = d.get('history', []) @@ -133,7 +133,7 @@ class History : d = {} d['history'] = self.history d['matrix'] = self.matrix - with open(self.filein, 'w') as f : + with open(self.filein, 'w', encoding='utf8') as f : f.write(json.dumps(d, indent=4, default=str)) #d = shelve.open(self.filein, protocol=1) #d.close() @@ -892,7 +892,7 @@ def getallstcarac(corpus, analyse) : print(profils) def read_chd(filein, fileout): - with open(filein, 'r') as f : + with open(filein, 'r', encoding='utf8') as f : content = f.read() #content = [line[3:].replace('"',"").replace(' ','') for line in content.splitlines()] content = [line.split('\t') for line in content.splitlines()] @@ -913,7 +913,7 @@ def read_chd(filein, fileout): mere[line[0]]['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}] mere[line[1]] = mere[line[0]]['children'][-2] mere[line[2]] = mere[line[0]]['children'][-1] - with open(fileout, 'w') as f : + with open(fileout, 'w', encoding='utf8') as f : f.write(json.dumps(chd)) diff --git a/graph_to_json.py b/graph_to_json.py index 2ccc766..8e2f9cd 100644 --- a/graph_to_json.py +++ b/graph_to_json.py @@ -26,7 +26,7 @@ class GraphToJson : edges = [line.split('\t') for line in content] except : edges = None - + with codecs.open(nodesfile, 'r', 'utf8') as f : content = f.read() content = content.replace('"','') @@ -53,17 +53,15 @@ class GraphToJson : nodes = [line.split('\t') for line in content] graph = {'edges': [], 'nodes' : {}} - + we = titles_edges.index('weight') if edges is not None : for edge in edges : graph['edges'].append({'source' : edge[0], 'target' : edge[1], 'weight' : edge[we]}) - - + coefcoord = parametres.get('coefcoord', 1) coefweight = parametres.get('coefweight', 1) - - + for node in nodes : if zr is not None : graph['nodes'][node[ni]] = {"location" : [float(node[xr])*coefcoord, float(node[yr])*coefcoord, float(node[zr])*coefcoord], 'weight' : float(node[wr])/coefweight, 'color': (int(node[r]),int(node[g]),int(node[b]))} @@ -73,5 +71,5 @@ class GraphToJson : x = randint(-150,150) graph['nodes'][node[ni]] = {"location" : [ x, float(node[xr]), float(node[yr])], 'weight' : float(node[wr]), 'color': (int(node[r]),int(node[g]),int(node[b]))} - with open(jsonout, 'w') as f : + with open(jsonout, 'w', encoding='utf8') as f : json.dump(graph, f) diff --git a/import_txm.py b/import_txm.py index 37ffa2e..be440ce 100644 --- a/import_txm.py +++ b/import_txm.py @@ -36,7 +36,7 @@ class TXMParser(xml.sax.ContentHandler) : if name == 'p' : self.printsent() self.fileout.write('\n') - + def characters(self, content) : if self.name == 'txm:form' : if content not in ['', ' ', '\n', '\r'] : @@ -64,9 +64,9 @@ def TXM2IRA(pathin, fileout, encodage_in, encodage_out) : files = glob.glob(os.path.join(pathin,'*.xml')) if len(files) == 0 : return 'nofile' - with open(fileout, 'w') as fout : + with open(fileout, 'w', encoding='utf8') as fout : parser.setContentHandler(TXMParser(fout, encodage_out)) for f in files : - parser.parse(open(f, 'r')) + parser.parse(open(f, 'r', encoding='utf8')) fout.write('\n\n') return None diff --git a/iramuteq.py b/iramuteq.py index 7ca049f..fe7a629 100755 --- a/iramuteq.py +++ b/iramuteq.py @@ -503,7 +503,7 @@ class IraFrame(wx.Frame): # fichier d'historique de Iramuteq #------------------------------------------------------------------------------------------------ if not os.path.exists(os.path.join(UserConfigPath, 'history.db')) : - with open(os.path.join(UserConfigPath, 'history.db'), 'w') as f : + with open(os.path.join(UserConfigPath, 'history.db'), 'w', encoding='utf8') as f : f.write('{}') self.history = History(os.path.join(UserConfigPath, 'history.db')) # l'extension ".db" est ajoutée automatiquement par le module diff --git a/listlex.py b/listlex.py index d090937..a831dbe 100644 --- a/listlex.py +++ b/listlex.py @@ -368,7 +368,7 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor txt = barplot('', '', '', self.parent.RscriptsPath['Rgraph'], tmpgraph, intxt = intxt) # ecriture du script dans un fichier tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR) - with open(tmpscript, 'w') as f : + with open(tmpscript, 'w', encoding='utf8') as f : f.write(txt) # excution du script exec_rcode(self.parent.RPath, tmpscript, wait = True) @@ -388,10 +388,7 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor ira = wx.GetApp().GetTopWindow() item=self.getColumnText(self.GetFirstSelected(), 0) wordlist = [val for val in self.tgens[item] if val in corpus.lems] - print(wordlist) wordlist = dict(list(zip(wordlist,wordlist))) - print(wordlist) - print(self.tgenlem) res = dict([[val, self.tgenlem[val]] for val in self.tgenlem if self.tgenlem[val][0] in wordlist]) win = ListLexFrame(self, ira, corpus, res, self.etoiles) win.Show() diff --git a/parse_europress.py b/parse_europress.py index b882de5..a4c7e23 100755 --- a/parse_europress.py +++ b/parse_europress.py @@ -163,7 +163,7 @@ def ParseEuropress(txtdir, fileout, encodage_in, encodage_out) : files.append(txtdir) tot = 0 parser = MyHTMLParser() - with open(fileout,'w') as outf : + with open(fileout,'w', encoding='utf8') as outf : for f in files : print(f) parser.doinit(outf) diff --git a/parse_factiva_txt.py b/parse_factiva_txt.py index e45c41f..6f12b75 100644 --- a/parse_factiva_txt.py +++ b/parse_factiva_txt.py @@ -100,11 +100,11 @@ class ParseFactivaPaste : nf.sort() files += nf tot = 0 - with open(fileout,'w') as outf : + with open(fileout,'w', encoding='utf8') as outf : for f in files : print(f) - with codecs.open(f, 'r', encodage_in) as infile : - content = infile.read() + with codecs.open(f, 'r', encodage_in) as infile : + content = infile.read() ucis = parsetxtpaste(content) print_ucis(ucis, outf, encodage_out) tot += len(ucis) diff --git a/tabchdalc.py b/tabchdalc.py index e4c8fc5..bacfc9e 100644 --- a/tabchdalc.py +++ b/tabchdalc.py @@ -217,7 +217,7 @@ class AnalyseQuest(AnalyseMatrix): save.image(file="%s") """ % ffr(self.pathout['RData']) tmpfile = tempfile.mktemp(dir=self.parent.TEMPDIR) - tmpscript = open(tmpfile, 'w') + tmpscript = open(tmpfile, 'w', encoding='utf8') tmpscript.write(txt) tmpscript.close() pid = exec_rcode(self.parent.RPath, tmpfile, wait = False) @@ -226,4 +226,4 @@ class AnalyseQuest(AnalyseMatrix): check_Rresult(self.parent, pid) temps = time.time() - self.t1 self.minutes, self.seconds = divmod(temps, 60) - self.hours, self.minutes = divmod(self.minutes, 60) + self.hours, self.minutes = divmod(self.minutes, 60) diff --git a/tabchi2mcnemar.py b/tabchi2mcnemar.py index cff3167..439aa93 100644 --- a/tabchi2mcnemar.py +++ b/tabchi2mcnemar.py @@ -313,7 +313,7 @@ class McNemar(AnalyseMatrix): """ % (ffr(self.parametres['pathout']),ffr(self.OutFrame)) tmpfile=tempfile.mktemp(dir=self.TEMPDIR) print(tmpfile) - tmpscript=open(tmpfile,'w') + tmpscript=open(tmpfile,'w', encoding='utf8') tmpscript.write(txt) tmpscript.close() pid = exec_rcode(self.RPath, tmpfile, wait = False) @@ -340,7 +340,7 @@ class McNemar(AnalyseMatrix): def dolayout(self, option): ListFile=[False] - file=open(self.OutFrame,'r') + file=open(self.OutFrame,'r', encoging='utf8') content=file.readlines() file.close() lcont = [line.replace('"','').replace('\n','').split(';') for line in content] @@ -363,7 +363,7 @@ class McNemar(AnalyseMatrix): if option['contrib'] : allcoord.append([i for i,chi in enumerate(lcont) if chi[1]=='*contrib*']) names.append('Contributions a posteriori') - if option['pourcent'] : + if option['pourcent'] : allcoord.append([i for i,chi in enumerate(lcont) if chi[1]=='*pr*']) names.append('Pourcentages') if option['pourcentl'] : @@ -402,7 +402,7 @@ class McNemar(AnalyseMatrix): txt = '


\n'.join(['

'.join([tab[i] for tab in allhtml]) for i,val in enumerate(res)]) txt = header + pretxt + txt + '\n' fileout=os.path.join(self.parametres['pathout'],'resultats-chi2.html') - with open(fileout, 'w') as f : + with open(fileout, 'w', encoding='utf8') as f : f.write(txt) - ListFile.append(fileout) + ListFile.append(fileout) return ListFile diff --git a/textstat.py b/textstat.py index 56e0942..424c751 100755 --- a/textstat.py +++ b/textstat.py @@ -51,7 +51,7 @@ class Stat(AnalyseText) : supp = sortedby(supp, 2, 1) supp = [[i, val] for i, val in enumerate(supp)] ucesize = self.corpus.getucesize() - with open(self.pathout['stsize.csv'], 'w') as f : + with open(self.pathout['stsize.csv'], 'w', encoding='utf8') as f : f.write('\n'.join([repr(val) for val in ucesize])) self.result = {'total' : dict(tot), 'formes_actives' : dict(act), 'formes_supplémentaires' : dict(supp), 'hapax' : dict(hapax), 'glob' : ''} occurrences = sum([val[1][1] for val in tot]) + len(hapax) -- 2.7.4