...

author Pierre <ratinaud@univ-tlse2.fr>

Tue, 11 Feb 2014 13:14:56 +0000 (14:14 +0100)

committer Pierre <ratinaud@univ-tlse2.fr>

Tue, 11 Feb 2014 13:14:56 +0000 (14:14 +0100)
author Pierre <ratinaud@univ-tlse2.fr>
Tue, 11 Feb 2014 13:14:56 +0000 (14:14 +0100)
committer Pierre <ratinaud@univ-tlse2.fr>
Tue, 11 Feb 2014 13:14:56 +0000 (14:14 +0100)
diff --git a/textcheckcorpus.py b/textcheckcorpus.py

index 1c52739..fa4f935 100644 (file)
--- a/textcheckcorpus.py
+++ b/textcheckcorpus.py
@@ -2,7 +2,7 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
  #Copyright (c) 2010, Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
  from corpus import Corpus
  import wx
  import wx.lib.dialogs
diff --git a/textclassechd.py b/textclassechd.py

index 5bff4eb..8cf46c8 100644 (file)
--- a/textclassechd.py
+++ b/textclassechd.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
  #Copyright (c) 2012, Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
  
  import os
  
diff --git a/textdist.py b/textdist.py

index 07818e2..8886a9e 100644 (file)
--- a/textdist.py
+++ b/textdist.py
@@ -2,7 +2,7 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
  #Copyright (c) 2008-2009 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
  
  from chemins import ConstructPathOut, ConstructAfcUciPath, ChdTxtPathOut
  from corpus import Corpus
diff --git a/textsimi.py b/textsimi.py

index 228598a..807442e 100644 (file)
--- a/textsimi.py
+++ b/textsimi.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
  #Copyright (c) 2008-2013 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
  
  from chemins import ffr, simipath
  #from corpus import Corpus
@@ -9,8 +9,7 @@ import os
  from analysetxt import AnalyseText
  #from ConfigParser import RawConfigParser
  #from guifunct import getPage, getCorpus
-from dialog import StatDialog
-from guifunct import SelectColumn, PrepSimi
+from guifunct import PrepSimi
  from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
  #from tableau import Tableau
  #from tabsimi import DoSimi
@@ -27,14 +26,13 @@ class SimiTxt(AnalyseText):
          self.parametres['type'] = 'simitxt'
          self.pathout.basefiles(simipath)
          self.indices = indices_simi
-        self.makesimiparam()
+        if self.dlg :
+            self.makesimiparam()
          #FIXME
          self.actives = self.corpus.make_actives_limit(3)
          dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)]) 
          continu = False
          if self.dlg :
-            #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
-            #if cont.ok :
              self.listet = self.corpus.make_etoiles()
              self.listet.sort()
              self.stars = copy(self.listet)
@@ -44,6 +42,8 @@ class SimiTxt(AnalyseText):
              if prep.val == wx.ID_OK :
                  continu = True
                  self.parametres = prep.parametres
+        else :
+            continu = True
          if continu :
              self.makefiles()
              script = PrintSimiScript(self)
@@ -94,7 +94,7 @@ class SimiTxt(AnalyseText):
                            'height' : 1000,
                            'bystar' : False,
                            'first' : True,
-                          'keep_coord' : True,
+                          'keep_coord' : False,
                            'alpha' : 20,
                            'film': False,
                            'svg' : 0,
@@ -130,7 +130,8 @@ class SimiFromCluster(SimiTxt) :
          self.parametres['type'] = 'clustersimitxt'
          self.pathout.basefiles(simipath)
          self.indices = indices_simi
-        self.makesimiparam()
+        if self.dlg  :
+            self.makesimiparam()
          if 'bystar' in self.parametres :
              del self.parametres['bystar']
          dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)]) 
@@ -139,8 +140,8 @@ class SimiFromCluster(SimiTxt) :
              #self.listet = self.corpus.make_etoiles()
              #self.listet.sort()
              self.stars = []#copy(self.listet)
-            self.parametres['stars'] = False#copy(self.listet)
-            self.parametres['sfromchi'] = True
+            self.parametres['stars'] = 0#copy(self.listet)
+            self.parametres['sfromchi'] = 1
              prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
              if prep.val == wx.ID_OK :
                  continu = True
@@ -149,6 +150,7 @@ class SimiFromCluster(SimiTxt) :
                  continu = False
          if continu :
              self.makefiles()
+            self.parametres['type'] = 'clustersimitxt'
              script = PrintSimiScript(self)
              script.make_script()
              if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
diff --git a/textstat.py b/textstat.py

index 09ec518..54c8b4d 100644 (file)
--- a/textstat.py
+++ b/textstat.py
@@ -2,7 +2,7 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
  #Copyright (c) 2008-2012 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
  
  #from chemins import ConstructPathOut, StatTxtPathOut, ffr
  from chemins import PathOut
@@ -65,13 +65,13 @@ class Stat(AnalyseText) :
          phapax_forme = (float(len(hapax)) / (float(len(formes)))) * 100
          moy_occu_mot = float(occurrences) / float(len(formes))
          txt = 'Globale\n'
-        txt += 'nombre d\'uci : %i\n' % len(self.corpus.ucis)
+        txt += 'nombre de textes : %i\n' % len(self.corpus.ucis)
          txt += 'nombre d\'occurrences : %i\n' % occurrences
          txt += 'nombre de formes : %i\n' % (len(formes))
          txt += 'moyenne d\'occurrences par forme : %.2f\n' % moy_occu_mot
          txt += 'nombre d\'hapax : %i (%.2f%% des occurrences - %.2f%% des formes)\n' % (len(hapax), phapax, phapax_forme)
          print float(occurrences), float(len(self.corpus.ucis))
-        txt += 'moyenne d\'occurrences par uci : %.2f' % (float(occurrences)/float(len(self.corpus.ucis)))
+        txt += 'moyenne d\'occurrences par texte : %.2f' % (float(occurrences)/float(len(self.corpus.ucis)))
          if self.dlg :
               self.dlg.Update(7, u'Ecriture...')
          self.result['glob'] = txt
diff --git a/textwordcloud.py b/textwordcloud.py

index bc875bc..a85866a 100644 (file)
--- a/textwordcloud.py
+++ b/textwordcloud.py
@@ -2,7 +2,7 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
  #Copyright (c) 2008-2009 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
  
  from analysetxt import AnalyseText
  from guifunct import getPage, getCorpus, SelectColumn
diff --git a/tools.py b/tools.py

index 7db0e9e..e35ea4b 100644 (file)
--- a/tools.py
+++ b/tools.py
@@ -2,7 +2,7 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
  #Copyright (c) 2008-2013, Pierre Ratinaud
-#Lisense: GNU GPL
+#License: GNU GPL
  
  import codecs
  import os
@@ -68,19 +68,19 @@ class SplitFromVar :
          keepline = False
          filedict = {}
          with codecs.open(self.filein, 'r', self.encodein) as fin :
-             for line in fin :
-                 if istext(line) :
-                     varmod = testvar(line, self.var)
-                     if varmod :
-                         keepline = True
-                         if varmod not in filedict :
-                             filename = os.path.join(self.basepath, varmod + '.txt')
-                             filedict[varmod] = open(filename, 'w')
-                         fileout = filedict[varmod]
-                     else : 
-                         keepline = False
-                 if keepline :
-                     fileout.write(line.encode(self.encodeout))
+            for line in fin :
+                if istext(line) :
+                    varmod = testvar(line, self.var)
+                    if varmod :
+                        keepline = True
+                        if varmod not in filedict :
+                            filename = os.path.join(self.basepath, varmod + '.txt')
+                            filedict[varmod] = open(filename, 'w')
+                        fileout = filedict[varmod]
+                    else : 
+                        keepline = False
+                if keepline :
+                    fileout.write(line.encode(self.encodeout))
          for f in filedict :
              filedict[f].close()
  
@@ -101,22 +101,22 @@ class ExtractMods :
          keepline = False
          filedict = {}
          with codecs.open(self.filein, 'r', self.encodein) as fin :
-             for line in fin :
-                 if istext(line) :
-                     modinline = testmod(line, self.mods)
-                     if modinline :
-                         keepline = True
-                         if not self.onefile :
+            for line in fin :
+                if istext(line) :
+                    modinline = testmod(line, self.mods)
+                    if modinline :
+                        keepline = True
+                        if not self.onefile :
                              if modinline not in filedict :
                                  filename = os.path.join(self.basepath, modinline + '.txt')
                                  filedict[modinline] = open(filename, 'w')
                              fileout = filedict[modinline]
-                         else :
-                             fileout = self.fileout
-                     else : 
-                         keepline = False
-                 if keepline :
-                     fileout.write(line.encode(self.encodeout))
+                        else :
+                            fileout = self.fileout
+                    else : 
+                        keepline = False
+                if keepline :
+                    fileout.write(line.encode(self.encodeout))
          if not self.onefile :
              for f in filedict :
                  filedict[f].close()
@@ -150,7 +150,16 @@ class SubCorpus(Corpus) :
      def getlemuces(self, lem) :
          return list(set(self.sgts).intersection(self.corpus.getlemuces(lem)))
  
-
+def converttabletocorpus(table, fileout, enc='UTF8') :
+    var = table.pop(0)
+    var = var[0:len(var)-1]
+    print var
+    et = [zip(var, line[0:len(line)-1]) for line in table]
+    et = ['**** ' + ' '.join(['*' + '_'.join(val) for val in line]) for line in et] 
+    txt = ['\n'.join([et[i], line[-1]]) for i, line in enumerate(table)]
+    print '\n'.join(txt)
+    #with open(fileout, 'w') as f :
+
author	Pierre <ratinaud@univ-tlse2.fr>
	Tue, 11 Feb 2014 13:14:56 +0000 (14:14 +0100)
committer	Pierre <ratinaud@univ-tlse2.fr>
	Tue, 11 Feb 2014 13:14:56 +0000 (14:14 +0100)
textcheckcorpus.py		patch \| blob \| history
textclassechd.py		patch \| blob \| history
textdist.py		patch \| blob \| history
textsimi.py		patch \| blob \| history
textstat.py		patch \| blob \| history
textwordcloud.py		patch \| blob \| history
tools.py		patch \| blob \| history