From 41491d62bfccd2f335064b067f6d1b03a09887eb Mon Sep 17 00:00:00 2001 From: pierre Date: Wed, 9 Oct 2024 14:42:01 +0200 Subject: [PATCH] stat in spec --- corpus.py | 8 ++++++++ functions.py | 31 ++++++++++++++++++++++++++++--- layout.py | 10 +++++++++- textaslexico.py | 1 + 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/corpus.py b/corpus.py index 58944e8..64f99d1 100644 --- a/corpus.py +++ b/corpus.py @@ -1071,6 +1071,14 @@ class Corpus : lclasses = [self.getucesfrometoile(etoile) for etoile in etoiles] stats = self.get_stat_by_cluster(None, lclasses) stats = [[etoiles[i]] + val for i, val in enumerate(stats)] + first = [_('variable'), _('occurences'), _('formes'), _('hapax'), _('segments'), _('hapax/formes')] + if outf is not None : + toprint = '\t'.join(first) + "\n" + toprint += '\n'.join(['\t'.join(line) for line in stats]) + with open(outf, 'w', encoding='utf8') as f : + f.write(toprint) + else : + return stats def gethapaxbyet(self, etoiles) : hapaxuces = [self.getlemuces(forme)[0] for forme in self.lems if self.lems[forme].freq == 1] diff --git a/functions.py b/functions.py index 01916d4..5385806 100755 --- a/functions.py +++ b/functions.py @@ -701,7 +701,7 @@ def ReadList(filein, encoding = 'utf8', sep = ';'): content = f.read() content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()] first = content.pop(0) - dict = {} + dico = {} i = 0 for line in content: nline = [line[0]] @@ -714,9 +714,34 @@ def ReadList(filein, encoding = 'utf8', sep = ';'): except: don = float('%.5f' % float(val)) nline.append(don) - dict[i] = nline + dico[i] = nline i += 1 - return dict, first + return dico, first + +def readliststat(filein, sep='\t') : + with open(filein, 'r', encoding='utf8') as f : + content = f.read() + content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()] + dico = {} + content = list(map(list, zip(*content))) + first = content.pop(0) + i=0 + for line in content: + nline = [line[0]] + for val in line[1:]: + if val == 'NA' : + don = '' + else: + try: + don = int(val) + except: + don = float('%.5f' % float(val)) + nline.append(don) + dico[i] = nline + i += 1 + return dico, first + + def read_dist_list(filein, sep=';') : ldict = {} diff --git a/layout.py b/layout.py index d32cf15..81d4e50 100644 --- a/layout.py +++ b/layout.py @@ -31,7 +31,7 @@ from wx.lib.agw.fmresources import * from chemins import ConstructPathOut, ChdTxtPathOut, FFF, ffr, PathOut, StatTxtPathOut, simipath from configparser import ConfigParser from functions import ReadProfileAsDico, GetTxtProfile, read_list_file, ReadList, exec_rcode, print_liste, BugReport, DoConf,\ - indices_simi, check_Rresult, progressbar, normpath_win32, TGen, ReadList, launchcommand, read_dist_list + indices_simi, check_Rresult, progressbar, normpath_win32, TGen, ReadList, launchcommand, read_dist_list, readliststat from ProfList import ProfListctrlPanel from guiparam3d import param3d, simi3d from PrintRScript import write_afc_graph, print_simi3d, PrintSimiScript @@ -768,6 +768,9 @@ class dolexlayout : self.DictSpec, first = ReadList(self.dictpathout['tablespecf'], self.corpus.parametres['syscoding']) if os.path.exists(self.pathout['banalites.csv']) : self.dictban, firstban = ReadList(self.pathout['banalites.csv'], self.corpus.parametres['syscoding']) + if os.path.exists(self.pathout['statbyet.csv']) : + self.dictstat, first = readliststat(self.pathout['statbyet.csv']) + self.DictType, firstt = ReadList(self.dictpathout['tablespect'], self.corpus.parametres['syscoding']) self.DictEff, firsteff = ReadList(self.dictpathout['tableafcm'], self.corpus.parametres['syscoding']) self.DictEffType, firstefft = ReadList(self.dictpathout['tabletypem'], self.corpus.parametres['syscoding']) @@ -782,6 +785,9 @@ class dolexlayout : self.ListPan.pathout = self.pathout if os.path.exists(self.pathout['banalites.csv']) : self.listban = ListForSpec(ira, self, self.dictban, ['eff'] + self.etoiles, usefirst = True) + if os.path.exists(self.pathout['statbyet.csv']) : + self.liststat = ListForSpec(ira, self,self.dictstat, self.etoiles) + self.liststat.pathout = self.pathout #self.ListPan2 = ListForSpec(sash.rightwin1, self, self.DictSpec, first) self.ListPant = ListForSpec(ira, self, self.DictType, self.etoiles) self.ListPant.pathout = self.pathout @@ -793,6 +799,8 @@ class dolexlayout : self.ListPanEffRelForme.pathout = self.pathout self.ListPanEffRelType = ListForSpec(ira, self, self.DictEffRelType, self.etoiles) self.ListPanEffRelType.pathout = self.pathout + if os.path.exists(self.pathout['statbyet.csv']) : + self.TabStat.AddPage(self.liststat, _('Statistics')) self.TabStat.AddPage(self.ListPan, _('Forms')) if os.path.exists(self.pathout['banalites.csv']) : self.TabStat.AddPage(self.listban, _('Banal forms')) diff --git a/textaslexico.py b/textaslexico.py index 2ed892a..ee3af22 100644 --- a/textaslexico.py +++ b/textaslexico.py @@ -189,6 +189,7 @@ class Lexico(AnalyseText) : mineff = self.parametres['mineff'] #dlg = progressbar(self, maxi = 3) tabout = self.corpus.make_lexitable(mineff, self.listet, gram = self.parametres['typeformes']) + self.corpus.get_stat_by_et(self.pathout['statbyet.csv'], self.listet) #log.warning('Fmax a 200') #Fmax = [line for line in tabout[1:] if sum(line[1:]) > 199] #formesmax = [line[0] for line in Fmax -- 2.7.4