X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=textstat.py;h=ee0e4ef4ae7a10dd7892f3bda67789a0f9320dae;hp=4d67879403b9d7885d55988540eda29d1d60416c;hb=3563d5ffbef160a381af784d9d8cce2ce26e11d9;hpb=8fa853a25a9d62b1446e1bc543e5a3a4d0e03dcf diff --git a/textstat.py b/textstat.py index 4d67879..ee0e4ef 100644 --- a/textstat.py +++ b/textstat.py @@ -1,8 +1,8 @@ #!/bin/env python # -*- coding: utf-8 -*- #Author: Pierre Ratinaud -#Copyright (c) 2008-2009 Pierre Ratinaud -#Lisense: GNU/GPL +#Copyright (c) 2008-2012 Pierre Ratinaud +#License: GNU/GPL #from chemins import ConstructPathOut, StatTxtPathOut, ffr from chemins import PathOut @@ -19,7 +19,6 @@ from time import sleep import wx import os -print 'TEST LOGGING' import logging logger = logging.getLogger('iramuteq.textstat') @@ -31,20 +30,7 @@ class Stat(AnalyseText) : self.make_stats() def preferences(self) : - dial = StatDialog(self, self.parent) - dial.CenterOnParent() - val = dial.ShowModal() - if val == 5100 : - if dial.radio_lem.GetSelection() == 0 : - lem = 1 - else : - lem = 0 - self.parametres['lem'] = lem - dial.Destroy() - return self.parametres - else : - dial.Destroy() - return None + return self.parametres def make_stats(self): if self.dlg : @@ -67,6 +53,8 @@ class Stat(AnalyseText) : supp = [[forme, formes[forme].freq, formes[forme].gram] for forme in formes if formes[forme].act == 2] supp = sortedby(supp, 2, 1) + #print self.corpus.gethapaxbyuci() + supp = [[i, val] for i, val in enumerate(supp)] #self.corpus.pathout = self.dictpathout #self.corpus.make_type_tot() @@ -77,13 +65,13 @@ class Stat(AnalyseText) : phapax_forme = (float(len(hapax)) / (float(len(formes)))) * 100 moy_occu_mot = float(occurrences) / float(len(formes)) txt = 'Globale\n' - txt += 'nombre d\'uci : %i\n' % len(self.corpus.ucis) + txt += 'nombre de textes : %i\n' % len(self.corpus.ucis) txt += 'nombre d\'occurrences : %i\n' % occurrences txt += 'nombre de formes : %i\n' % (len(formes)) txt += 'moyenne d\'occurrences par forme : %.2f\n' % moy_occu_mot txt += 'nombre d\'hapax : %i (%.2f%% des occurrences - %.2f%% des formes)\n' % (len(hapax), phapax, phapax_forme) print float(occurrences), float(len(self.corpus.ucis)) - txt += 'moyenne d\'occurrences par uci : %.2f' % (float(occurrences)/float(len(self.corpus.ucis))) + txt += 'moyenne d\'occurrences par texte : %.2f' % (float(occurrences)/float(len(self.corpus.ucis))) if self.dlg : self.dlg.Update(7, u'Ecriture...') self.result['glob'] = txt @@ -92,12 +80,17 @@ class Stat(AnalyseText) : txt = """ source("%s") tot <- read.csv2("%s", header = FALSE, row.names = 1) - hapax <- read.csv2("%s", header = FALSE, row.names = 1) - tot <- rbind(tot, hapax) + """ % (self.parent.RscriptsPath['Rgraph'], self.pathout['total.csv']) + if len(hapax) : + txt += """ + hapax <- read.csv2("%s", header = FALSE, row.names = 1) + tot <- rbind(tot, hapax) + """ % self.pathout['hapax.csv'] + txt += """ open_file_graph("%s", width = 400, height = 400) - plot(log(tot[,1]), log = 'x', xlab='log(rangs)', ylab = 'log(frequences)', col = 'red', pch=16) + plot(tot[,1], log = 'xy', xlab='log(rangs)', ylab = 'log(frequences)', col = 'red', pch=16) dev.off() - """ % (self.parent.RscriptsPath['Rgraph'], self.pathout['total.csv'], self.pathout['hapax.csv'], self.pathout['zipf.png']) + """ % (self.pathout['zipf.png']) tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR) with open(tmpscript, 'w') as f : f.write(txt) @@ -122,8 +115,8 @@ class Stat(AnalyseText) : else : with open(self.pathout['%s.txt' % 'glob'], 'w') as f : f.write(self.result['glob'].encode(self.parent.syscoding)) - self.parametres['pathout'] = self.pathout['Analyse.ira'] - DoConf().makeoptions(['stat'],[self.parametres], self.pathout['Analyse.ira']) + #self.parametres['pathout'] = self.pathout['Analyse.ira'] + #DoConf().makeoptions(['stat'],[self.parametres], self.pathout['Analyse.ira']) #class Stat():