stat in spec

author pierre <ratinaud@univ-tlse2.fr>

Wed, 9 Oct 2024 12:42:01 +0000 (14:42 +0200)

committer pierre <ratinaud@univ-tlse2.fr>

Wed, 9 Oct 2024 12:42:01 +0000 (14:42 +0200)
author pierre <ratinaud@univ-tlse2.fr>
Wed, 9 Oct 2024 12:42:01 +0000 (14:42 +0200)
committer pierre <ratinaud@univ-tlse2.fr>
Wed, 9 Oct 2024 12:42:01 +0000 (14:42 +0200)
diff --git a/corpus.py b/corpus.py

index 58944e8..64f99d1 100644 (file)
--- a/corpus.py
+++ b/corpus.py
@@ -1071,6 +1071,14 @@ class Corpus :
          lclasses = [self.getucesfrometoile(etoile) for etoile in etoiles]
          stats = self.get_stat_by_cluster(None, lclasses)
          stats = [[etoiles[i]] + val for i, val in enumerate(stats)]
+        first = [_('variable'), _('occurences'), _('formes'), _('hapax'), _('segments'), _('hapax/formes')]
+        if outf is not None :
+            toprint = '\t'.join(first) + "\n"
+            toprint += '\n'.join(['\t'.join(line) for line in stats])
+            with open(outf, 'w', encoding='utf8') as f :
+                f.write(toprint)
+        else :
+            return stats
  
      def gethapaxbyet(self, etoiles) :
          hapaxuces = [self.getlemuces(forme)[0] for forme in self.lems if self.lems[forme].freq == 1]
diff --git a/functions.py b/functions.py

index 01916d4..5385806 100755 (executable)
--- a/functions.py
+++ b/functions.py
@@ -701,7 +701,7 @@ def ReadList(filein, encoding = 'utf8', sep = ';'):
          content = f.read()
      content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
      first = content.pop(0)
-    dict = {}
+    dico = {}
      i = 0
      for line in content:
          nline = [line[0]]
@@ -714,9 +714,34 @@ def ReadList(filein, encoding = 'utf8', sep = ';'):
                  except:
                      don = float('%.5f' % float(val))
              nline.append(don)
-        dict[i] = nline
+        dico[i] = nline
          i += 1
-    return dict, first
+    return dico, first
+
+def readliststat(filein, sep='\t') :
+    with open(filein, 'r', encoding='utf8') as f :
+        content = f.read()
+    content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
+    dico = {}
+    content = list(map(list, zip(*content)))
+    first = content.pop(0)
+    i=0
+    for line in content:
+        nline = [line[0]]
+        for val in line[1:]:
+            if val == 'NA' :
+                don = ''
+            else:
+                try:
+                    don = int(val)
+                except:
+                    don = float('%.5f' % float(val))
+            nline.append(don)
+        dico[i] = nline
+        i += 1
+    return dico, first
+
+
  
  def read_dist_list(filein, sep=';') :
      ldict = {}
diff --git a/layout.py b/layout.py

index d32cf15..81d4e50 100644 (file)
--- a/layout.py
+++ b/layout.py
@@ -31,7 +31,7 @@ from wx.lib.agw.fmresources import *
  from chemins import ConstructPathOut, ChdTxtPathOut, FFF, ffr, PathOut, StatTxtPathOut, simipath
  from configparser import ConfigParser
  from functions import ReadProfileAsDico, GetTxtProfile, read_list_file, ReadList, exec_rcode, print_liste, BugReport, DoConf,\
- indices_simi, check_Rresult, progressbar, normpath_win32, TGen, ReadList, launchcommand, read_dist_list
+ indices_simi, check_Rresult, progressbar, normpath_win32, TGen, ReadList, launchcommand, read_dist_list, readliststat
  from ProfList import ProfListctrlPanel
  from guiparam3d import param3d, simi3d
  from PrintRScript import write_afc_graph, print_simi3d, PrintSimiScript
@@ -768,6 +768,9 @@ class dolexlayout :
          self.DictSpec, first = ReadList(self.dictpathout['tablespecf'], self.corpus.parametres['syscoding'])
          if os.path.exists(self.pathout['banalites.csv']) :
              self.dictban, firstban = ReadList(self.pathout['banalites.csv'], self.corpus.parametres['syscoding'])
+        if os.path.exists(self.pathout['statbyet.csv']) :
+            self.dictstat, first = readliststat(self.pathout['statbyet.csv'])
+
          self.DictType, firstt = ReadList(self.dictpathout['tablespect'], self.corpus.parametres['syscoding'])
          self.DictEff, firsteff = ReadList(self.dictpathout['tableafcm'], self.corpus.parametres['syscoding'])
          self.DictEffType, firstefft = ReadList(self.dictpathout['tabletypem'], self.corpus.parametres['syscoding'])
@@ -782,6 +785,9 @@ class dolexlayout :
          self.ListPan.pathout = self.pathout
          if os.path.exists(self.pathout['banalites.csv']) :
              self.listban = ListForSpec(ira, self, self.dictban, ['eff'] + self.etoiles, usefirst = True)
+        if os.path.exists(self.pathout['statbyet.csv']) :
+            self.liststat =  ListForSpec(ira, self,self.dictstat, self.etoiles)
+            self.liststat.pathout = self.pathout
          #self.ListPan2 = ListForSpec(sash.rightwin1, self, self.DictSpec, first)
          self.ListPant = ListForSpec(ira, self, self.DictType, self.etoiles)
          self.ListPant.pathout = self.pathout
@@ -793,6 +799,8 @@ class dolexlayout :
          self.ListPanEffRelForme.pathout = self.pathout
          self.ListPanEffRelType = ListForSpec(ira, self, self.DictEffRelType, self.etoiles)
          self.ListPanEffRelType.pathout = self.pathout
+        if os.path.exists(self.pathout['statbyet.csv']) :
+            self.TabStat.AddPage(self.liststat, _('Statistics'))
          self.TabStat.AddPage(self.ListPan, _('Forms'))
          if os.path.exists(self.pathout['banalites.csv']) :
              self.TabStat.AddPage(self.listban, _('Banal forms'))
diff --git a/textaslexico.py b/textaslexico.py

index 2ed892a..ee3af22 100644 (file)
--- a/textaslexico.py
+++ b/textaslexico.py
@@ -189,6 +189,7 @@ class Lexico(AnalyseText) :
          mineff = self.parametres['mineff']
          #dlg = progressbar(self, maxi = 3)
          tabout = self.corpus.make_lexitable(mineff, self.listet, gram = self.parametres['typeformes'])
+        self.corpus.get_stat_by_et(self.pathout['statbyet.csv'], self.listet)
          #log.warning('Fmax a 200')
          #Fmax = [line for line in tabout[1:] if sum(line[1:]) > 199]
          #formesmax = [line[0] for line in Fmax
author	pierre <ratinaud@univ-tlse2.fr>
	Wed, 9 Oct 2024 12:42:01 +0000 (14:42 +0200)
committer	pierre <ratinaud@univ-tlse2.fr>
	Wed, 9 Oct 2024 12:42:01 +0000 (14:42 +0200)
corpus.py		patch \| blob \| history
functions.py		patch \| blob \| history
layout.py		patch \| blob \| history
textaslexico.py		patch \| blob \| history