europress parser

[iramuteq] / textaslexico.py
diff --git a/textaslexico.py b/textaslexico.py

index de0f181..b9268de 100644 (file)
--- a/textaslexico.py
+++ b/textaslexico.py
@@ -1,21 +1,22 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
  #Copyright (c) 2008-2011 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
  
-from chemins import ConstructPathOut, StatTxtPathOut
+from chemins import ConstructPathOut, StatTxtPathOut, PathOut, ffr
  #from corpus import Corpus
  from analysetxt import AnalyseText
  import wx
  import os
-import sys
-from listlex import *
-from functions import exec_rcode, progressbar, check_Rresult, CreateIraFile, print_liste, treat_var_mod, write_tab, DoConf
-from dialog import OptLexi, StatDialog #LexDialog
-from openanalyse import OpenAnalyse
+#import sys
+#from listlex import *
+from functions import exec_rcode, progressbar, check_Rresult, CreateIraFile, print_liste, treat_var_mod, write_tab, DoConf, TGen
+from dialog import OptLexi#, StatDialog 
+#from openanalyse import OpenAnalyse
  import tempfile
-from ConfigParser import RawConfigParser
-from guifunct import getPage, getCorpus
+#from ConfigParser import RawConfigParser
+#from guifunct import getPage, getCorpus
+from PrintRScript import TgenSpecScript
  from time import sleep
  import logging
  
@@ -39,13 +40,16 @@ class Lexico(AnalyseText) :
          txt = """
          source("%s")
          source("%s")
-        """ % (self.parent.RscriptsPath['chdfunct'], self.parent.RscriptsPath['Rgraph'])
+        """ % (ffr(self.parent.RscriptsPath['chdfunct']), ffr(self.parent.RscriptsPath['Rgraph']))
          txt += """
          dmf<-read.csv2("%s",row.names=1)
-        """ % self.dictpathout['tableafcm']
+        """ % ffr(self.dictpathout['tableafcm'])
          txt += """
          dmt<-read.csv2("%s",row.names=1)
-        """ % self.dictpathout['tabletypem']
+        """ % ffr(self.dictpathout['tabletypem'])
+        txt += """
+        indice <- "%s"
+        """ % self.parametres['indice']
          if self.parametres['indice'] == 'hypergeo' :
              txt += """
              outf <- make.spec.hypergeo(dmf)
@@ -56,21 +60,33 @@ class Lexico(AnalyseText) :
              outf<-AsLexico2(dmf)
              outt<-AsLexico2(dmt)
              """
-
+        txt += """
+        if (indice == 'hypergeo') {
+            banseuil <- 2
+        } else if (indice == 'chi2') {
+            banseuil <- 3
+        }
+        banal <- apply(abs(outf[[1]]), 1, max)
+        banal <- which(banal < banseuil)
+        banalfreq <- rowSums(dmf[banal,])
+        banalspec <- specf<-outf[[1]][banal,]
+        banal <- cbind(banalfreq, banalspec)
+        write.csv2(banal,file="%s")
+        """ % ffr(self.pathout['banalites.csv'])
          txt += """
          specf<-outf[[1]]
          spect<-outt[[1]]
          write.csv2(specf,file="%s")
-        """ % self.dictpathout['tablespecf']
+        """ % ffr(self.dictpathout['tablespecf'])
          txt += """
          write.csv2(spect,file="%s")
-        """ % self.dictpathout['tablespect']
+        """ % ffr(self.dictpathout['tablespect'])
          txt += """
          write.csv2(outf[[3]],file="%s")
-        """ % self.dictpathout['eff_relatif_forme']
+        """ % ffr(self.dictpathout['eff_relatif_forme'])
          txt += """
          write.csv2(outt[[3]],file="%s")
-        """ % self.dictpathout['eff_relatif_type']
+        """ % ffr(self.dictpathout['eff_relatif_type'])
          if self.parametres['clnb'] > 2 :
              txt += """
              library(ca)
@@ -117,11 +133,11 @@ class Lexico(AnalyseText) :
              debsup <- NULL
              debet <- NULL
              clnb <-  ncol(specf)
-            """ % (self.dictpathout['afcf_row'], self.dictpathout['afcf_col'], self.dictpathout['afct_row'], self.dictpathout['afct_col'], self.dictpathout['afcf_facteur_csv'], self.dictpathout['afcf_col_csv'], self.dictpathout['afcf_row_csv'], self.dictpathout['afct_facteur_csv'], self.dictpathout['afct_col_csv'], self.dictpathout['afct_row_csv'])
+            """ % (ffr(self.dictpathout['afcf_row']), ffr(self.dictpathout['afcf_col']), ffr(self.dictpathout['afct_row']), ffr(self.dictpathout['afct_col']), ffr(self.dictpathout['afcf_facteur_csv']), ffr(self.dictpathout['afcf_col_csv']), ffr(self.dictpathout['afcf_row_csv']), ffr(self.dictpathout['afct_facteur_csv']), ffr(self.dictpathout['afct_col_csv']), ffr(self.dictpathout['afct_row_csv']))
  
          txt += """
          save.image("%s")
-        """ % self.dictpathout['RData']
+        """ % ffr(self.dictpathout['RData'])
          tmpfile = tempfile.mktemp(dir=self.parent.TEMPDIR)
          tmpscript = open(tmpfile, 'w')
          tmpscript.write(txt)
@@ -146,11 +162,13 @@ class Lexico(AnalyseText) :
              else :
                  ListEt = variables[var[dial.list_box_1.GetSelections()[0]]]
              self.listet = ListEt
+            self.listet.sort()
              self.parametres['mineff'] = dial.spin.GetValue()
              if dial.choice_indice.GetSelection() == 0 :
                  self.parametres['indice'] = 'hypergeo'
              else :
                  self.parametres['indice'] = 'chi2'
+            self.parametres['typeformes'] = dial.typeformes.GetSelection()
              self.parametres['clnb'] = len(ListEt)
              dial.Destroy()
              return self.parametres
@@ -161,10 +179,10 @@ class Lexico(AnalyseText) :
      def make_lexico(self) :
          mineff = self.parametres['mineff']
          #dlg = progressbar(self, maxi = 3)
-        tabout = self.corpus.make_lexitable(mineff, self.listet)
+        tabout = self.corpus.make_lexitable(mineff, self.listet, gram = self.parametres['typeformes'])
          #log.warning('Fmax a 200')
          #Fmax = [line for line in tabout[1:] if sum(line[1:]) > 199]
-        #formesmax = [line[0] for line in Fmax]
+        #formesmax = [line[0] for line in Fmax
          #Fmax = [line[1:] for line in Fmax]
          #summax = [sum(col) for col in zip(*Fmax)]
          #tabout.append(['Fmax'] + summax)
@@ -189,4 +207,27 @@ class Lexico(AnalyseText) :
                              [os.path.basename(self.dictpathout['afct_col']), u'colonnes']]
          print_liste(self.dictpathout['liste_graph_afcf'],afcf_graph_list)
          print_liste(self.dictpathout['liste_graph_afct'],afct_graph_list)
-        DoConf().makeoptions(['spec'],[self.parametres], self.dictpathout['ira'])
+        #DoConf().makeoptions(['spec'],[self.parametres], self.dictpathout['ira'])
+
+class TgenSpec(AnalyseText):
+    def __init__(self, ira, corpus, parametres):
+        self.ira = ira
+        self.corpus = corpus
+        self.parametres = parametres
+        self.pathout = PathOut(dirout = self.parametres['pathout'])
+        self.doanalyse()
+        
+    def doanalyse(self):
+        self.tgen = TGen(path = self.parametres['tgenpath'], encoding = self.ira.syscoding)
+        self.tgen.read(self.tgen.path)
+        self.parametres['etoiles'].sort()
+        tgenocc, totocc = self.corpus.make_tgen_table(self.tgen, self.parametres['etoiles'])
+        self.parametres['tgeneff'] = os.path.join(self.parametres['pathout'], 'tgeneff.csv')
+        self.tgen.writetable(self.parametres['tgeneff'], tgenocc, totocc)
+        self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenspec.csv')
+        self.Rscript = TgenSpecScript(self)
+        self.Rscript.make_script()
+        self.Rscript.write()
+        self.doR(self.Rscript.scriptout, dlg = False, message = 'R...')
+
+    
+\ No newline at end of file