From: Pierre Date: Tue, 11 Feb 2014 13:14:56 +0000 (+0100) Subject: ... X-Git-Url: http://iramuteq.org/git?a=commitdiff_plain;h=eb614c725930bc65a7ad43eda1b769b504433f88;p=iramuteq ... --- diff --git a/textcheckcorpus.py b/textcheckcorpus.py index 1c52739..fa4f935 100644 --- a/textcheckcorpus.py +++ b/textcheckcorpus.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2010, Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from corpus import Corpus import wx import wx.lib.dialogs diff --git a/textclassechd.py b/textclassechd.py index 5bff4eb..8cf46c8 100644 --- a/textclassechd.py +++ b/textclassechd.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2012, Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL import os diff --git a/textdist.py b/textdist.py index 07818e2..8886a9e 100644 --- a/textdist.py +++ b/textdist.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2009 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ConstructPathOut, ConstructAfcUciPath, ChdTxtPathOut from corpus import Corpus diff --git a/textsimi.py b/textsimi.py index 228598a..807442e 100644 --- a/textsimi.py +++ b/textsimi.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2013 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ffr, simipath #from corpus import Corpus @@ -9,8 +9,7 @@ import os from analysetxt import AnalyseText #from ConfigParser import RawConfigParser #from guifunct import getPage, getCorpus -from dialog import StatDialog -from guifunct import SelectColumn, PrepSimi +from guifunct import PrepSimi from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste #from tableau import Tableau #from tabsimi import DoSimi @@ -27,14 +26,13 @@ class SimiTxt(AnalyseText): self.parametres['type'] = 'simitxt' self.pathout.basefiles(simipath) self.indices = indices_simi - self.makesimiparam() + if self.dlg : + self.makesimiparam() #FIXME self.actives = self.corpus.make_actives_limit(3) dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)]) continu = False if self.dlg : - #cont = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = self.dlg) - #if cont.ok : self.listet = self.corpus.make_etoiles() self.listet.sort() self.stars = copy(self.listet) @@ -44,6 +42,8 @@ class SimiTxt(AnalyseText): if prep.val == wx.ID_OK : continu = True self.parametres = prep.parametres + else : + continu = True if continu : self.makefiles() script = PrintSimiScript(self) @@ -94,7 +94,7 @@ class SimiTxt(AnalyseText): 'height' : 1000, 'bystar' : False, 'first' : True, - 'keep_coord' : True, + 'keep_coord' : False, 'alpha' : 20, 'film': False, 'svg' : 0, @@ -130,7 +130,8 @@ class SimiFromCluster(SimiTxt) : self.parametres['type'] = 'clustersimitxt' self.pathout.basefiles(simipath) self.indices = indices_simi - self.makesimiparam() + if self.dlg : + self.makesimiparam() if 'bystar' in self.parametres : del self.parametres['bystar'] dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)]) @@ -139,8 +140,8 @@ class SimiFromCluster(SimiTxt) : #self.listet = self.corpus.make_etoiles() #self.listet.sort() self.stars = []#copy(self.listet) - self.parametres['stars'] = False#copy(self.listet) - self.parametres['sfromchi'] = True + self.parametres['stars'] = 0#copy(self.listet) + self.parametres['sfromchi'] = 1 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol) if prep.val == wx.ID_OK : continu = True @@ -149,6 +150,7 @@ class SimiFromCluster(SimiTxt) : continu = False if continu : self.makefiles() + self.parametres['type'] = 'clustersimitxt' script = PrintSimiScript(self) script.make_script() if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') : diff --git a/textstat.py b/textstat.py index 09ec518..54c8b4d 100644 --- a/textstat.py +++ b/textstat.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2012 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL #from chemins import ConstructPathOut, StatTxtPathOut, ffr from chemins import PathOut @@ -65,13 +65,13 @@ class Stat(AnalyseText) : phapax_forme = (float(len(hapax)) / (float(len(formes)))) * 100 moy_occu_mot = float(occurrences) / float(len(formes)) txt = 'Globale\n' - txt += 'nombre d\'uci : %i\n' % len(self.corpus.ucis) + txt += 'nombre de textes : %i\n' % len(self.corpus.ucis) txt += 'nombre d\'occurrences : %i\n' % occurrences txt += 'nombre de formes : %i\n' % (len(formes)) txt += 'moyenne d\'occurrences par forme : %.2f\n' % moy_occu_mot txt += 'nombre d\'hapax : %i (%.2f%% des occurrences - %.2f%% des formes)\n' % (len(hapax), phapax, phapax_forme) print float(occurrences), float(len(self.corpus.ucis)) - txt += 'moyenne d\'occurrences par uci : %.2f' % (float(occurrences)/float(len(self.corpus.ucis))) + txt += 'moyenne d\'occurrences par texte : %.2f' % (float(occurrences)/float(len(self.corpus.ucis))) if self.dlg : self.dlg.Update(7, u'Ecriture...') self.result['glob'] = txt diff --git a/textwordcloud.py b/textwordcloud.py index bc875bc..a85866a 100644 --- a/textwordcloud.py +++ b/textwordcloud.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2009 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from analysetxt import AnalyseText from guifunct import getPage, getCorpus, SelectColumn diff --git a/tools.py b/tools.py index 7db0e9e..e35ea4b 100644 --- a/tools.py +++ b/tools.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2013, Pierre Ratinaud -#Lisense: GNU GPL +#License: GNU GPL import codecs import os @@ -68,19 +68,19 @@ class SplitFromVar : keepline = False filedict = {} with codecs.open(self.filein, 'r', self.encodein) as fin : - for line in fin : - if istext(line) : - varmod = testvar(line, self.var) - if varmod : - keepline = True - if varmod not in filedict : - filename = os.path.join(self.basepath, varmod + '.txt') - filedict[varmod] = open(filename, 'w') - fileout = filedict[varmod] - else : - keepline = False - if keepline : - fileout.write(line.encode(self.encodeout)) + for line in fin : + if istext(line) : + varmod = testvar(line, self.var) + if varmod : + keepline = True + if varmod not in filedict : + filename = os.path.join(self.basepath, varmod + '.txt') + filedict[varmod] = open(filename, 'w') + fileout = filedict[varmod] + else : + keepline = False + if keepline : + fileout.write(line.encode(self.encodeout)) for f in filedict : filedict[f].close() @@ -101,22 +101,22 @@ class ExtractMods : keepline = False filedict = {} with codecs.open(self.filein, 'r', self.encodein) as fin : - for line in fin : - if istext(line) : - modinline = testmod(line, self.mods) - if modinline : - keepline = True - if not self.onefile : + for line in fin : + if istext(line) : + modinline = testmod(line, self.mods) + if modinline : + keepline = True + if not self.onefile : if modinline not in filedict : filename = os.path.join(self.basepath, modinline + '.txt') filedict[modinline] = open(filename, 'w') fileout = filedict[modinline] - else : - fileout = self.fileout - else : - keepline = False - if keepline : - fileout.write(line.encode(self.encodeout)) + else : + fileout = self.fileout + else : + keepline = False + if keepline : + fileout.write(line.encode(self.encodeout)) if not self.onefile : for f in filedict : filedict[f].close() @@ -150,7 +150,16 @@ class SubCorpus(Corpus) : def getlemuces(self, lem) : return list(set(self.sgts).intersection(self.corpus.getlemuces(lem))) - +def converttabletocorpus(table, fileout, enc='UTF8') : + var = table.pop(0) + var = var[0:len(var)-1] + print var + et = [zip(var, line[0:len(line)-1]) for line in table] + et = ['**** ' + ' '.join(['*' + '_'.join(val) for val in line]) for line in et] + txt = ['\n'.join([et[i], line[-1]]) for i, line in enumerate(table)] + print '\n'.join(txt) + #with open(fileout, 'w') as f : +