# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2010, Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
from corpus import Corpus
import wx
import wx.lib.dialogs
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2012, Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
import os
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2008-2009 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
from chemins import ConstructPathOut, ConstructAfcUciPath, ChdTxtPathOut
from corpus import Corpus
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2008-2013 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
from chemins import ffr, simipath
#from corpus import Corpus
from analysetxt import AnalyseText
#from ConfigParser import RawConfigParser
#from guifunct import getPage, getCorpus
-from dialog import StatDialog
-from guifunct import SelectColumn, PrepSimi
+from guifunct import PrepSimi
from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
#from tableau import Tableau
#from tabsimi import DoSimi
self.parametres['type'] = 'simitxt'
self.pathout.basefiles(simipath)
self.indices = indices_simi
- self.makesimiparam()
+ if self.dlg :
+ self.makesimiparam()
#FIXME
self.actives = self.corpus.make_actives_limit(3)
dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
continu = False
if self.dlg :
- #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg)
- #if cont.ok :
self.listet = self.corpus.make_etoiles()
self.listet.sort()
self.stars = copy(self.listet)
if prep.val == wx.ID_OK :
continu = True
self.parametres = prep.parametres
+ else :
+ continu = True
if continu :
self.makefiles()
script = PrintSimiScript(self)
'height' : 1000,
'bystar' : False,
'first' : True,
- 'keep_coord' : True,
+ 'keep_coord' : False,
'alpha' : 20,
'film': False,
'svg' : 0,
self.parametres['type'] = 'clustersimitxt'
self.pathout.basefiles(simipath)
self.indices = indices_simi
- self.makesimiparam()
+ if self.dlg :
+ self.makesimiparam()
if 'bystar' in self.parametres :
del self.parametres['bystar']
dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)])
#self.listet = self.corpus.make_etoiles()
#self.listet.sort()
self.stars = []#copy(self.listet)
- self.parametres['stars'] = False#copy(self.listet)
- self.parametres['sfromchi'] = True
+ self.parametres['stars'] = 0#copy(self.listet)
+ self.parametres['sfromchi'] = 1
prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
if prep.val == wx.ID_OK :
continu = True
continu = False
if continu :
self.makefiles()
+ self.parametres['type'] = 'clustersimitxt'
script = PrintSimiScript(self)
script.make_script()
if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2008-2012 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
#from chemins import ConstructPathOut, StatTxtPathOut, ffr
from chemins import PathOut
phapax_forme = (float(len(hapax)) / (float(len(formes)))) * 100
moy_occu_mot = float(occurrences) / float(len(formes))
txt = 'Globale\n'
- txt += 'nombre d\'uci : %i\n' % len(self.corpus.ucis)
+ txt += 'nombre de textes : %i\n' % len(self.corpus.ucis)
txt += 'nombre d\'occurrences : %i\n' % occurrences
txt += 'nombre de formes : %i\n' % (len(formes))
txt += 'moyenne d\'occurrences par forme : %.2f\n' % moy_occu_mot
txt += 'nombre d\'hapax : %i (%.2f%% des occurrences - %.2f%% des formes)\n' % (len(hapax), phapax, phapax_forme)
print float(occurrences), float(len(self.corpus.ucis))
- txt += 'moyenne d\'occurrences par uci : %.2f' % (float(occurrences)/float(len(self.corpus.ucis)))
+ txt += 'moyenne d\'occurrences par texte : %.2f' % (float(occurrences)/float(len(self.corpus.ucis)))
if self.dlg :
self.dlg.Update(7, u'Ecriture...')
self.result['glob'] = txt
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2008-2009 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
from analysetxt import AnalyseText
from guifunct import getPage, getCorpus, SelectColumn
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2008-2013, Pierre Ratinaud
-#Lisense: GNU GPL
+#License: GNU GPL
import codecs
import os
keepline = False
filedict = {}
with codecs.open(self.filein, 'r', self.encodein) as fin :
- for line in fin :
- if istext(line) :
- varmod = testvar(line, self.var)
- if varmod :
- keepline = True
- if varmod not in filedict :
- filename = os.path.join(self.basepath, varmod + '.txt')
- filedict[varmod] = open(filename, 'w')
- fileout = filedict[varmod]
- else :
- keepline = False
- if keepline :
- fileout.write(line.encode(self.encodeout))
+ for line in fin :
+ if istext(line) :
+ varmod = testvar(line, self.var)
+ if varmod :
+ keepline = True
+ if varmod not in filedict :
+ filename = os.path.join(self.basepath, varmod + '.txt')
+ filedict[varmod] = open(filename, 'w')
+ fileout = filedict[varmod]
+ else :
+ keepline = False
+ if keepline :
+ fileout.write(line.encode(self.encodeout))
for f in filedict :
filedict[f].close()
keepline = False
filedict = {}
with codecs.open(self.filein, 'r', self.encodein) as fin :
- for line in fin :
- if istext(line) :
- modinline = testmod(line, self.mods)
- if modinline :
- keepline = True
- if not self.onefile :
+ for line in fin :
+ if istext(line) :
+ modinline = testmod(line, self.mods)
+ if modinline :
+ keepline = True
+ if not self.onefile :
if modinline not in filedict :
filename = os.path.join(self.basepath, modinline + '.txt')
filedict[modinline] = open(filename, 'w')
fileout = filedict[modinline]
- else :
- fileout = self.fileout
- else :
- keepline = False
- if keepline :
- fileout.write(line.encode(self.encodeout))
+ else :
+ fileout = self.fileout
+ else :
+ keepline = False
+ if keepline :
+ fileout.write(line.encode(self.encodeout))
if not self.onefile :
for f in filedict :
filedict[f].close()
def getlemuces(self, lem) :
return list(set(self.sgts).intersection(self.corpus.getlemuces(lem)))
-
+def converttabletocorpus(table, fileout, enc='UTF8') :
+ var = table.pop(0)
+ var = var[0:len(var)-1]
+ print var
+ et = [zip(var, line[0:len(line)-1]) for line in table]
+ et = ['**** ' + ' '.join(['*' + '_'.join(val) for val in line]) for line in et]
+ txt = ['\n'.join([et[i], line[-1]]) for i, line in enumerate(table)]
+ print '\n'.join(txt)
+ #with open(fileout, 'w') as f :
+