X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=textstat.py;h=c25b9dcea30647650b437659f22f85903056131d;hp=f8f3735ad1a50e659fd68828de86c3dee81256a6;hb=9b78e6210e7fc88a7e77d178c4090aabb23580d9;hpb=889c0c759bd1a27a90cbf0a1bbb3f080ab293aaf diff --git a/textstat.py b/textstat.py index f8f3735..c25b9dc 100644 --- a/textstat.py +++ b/textstat.py @@ -4,21 +4,11 @@ #Copyright (c) 2008-2012 Pierre Ratinaud #License: GNU/GPL -#from chemins import ConstructPathOut, StatTxtPathOut, ffr -from chemins import PathOut +from chemins import ffr from analysetxt import AnalyseText -#from corpus import Corpus -from guifunct import getPage, getCorpus -from ConfigParser import RawConfigParser -from functions import sortedby, progressbar, CreateIraFile, exec_rcode, check_Rresult, DoConf -from dialog import StatDialog -from openanalyse import OpenAnalyse -#from ttparser import * +from functions import sortedby, progressbar, exec_rcode, check_Rresult import tempfile from time import sleep -import wx -import os - import logging logger = logging.getLogger('iramuteq.textstat') @@ -57,14 +47,13 @@ class Stat(AnalyseText) : phapax = (float(len(hapax)) / float(occurrences)) * 100 phapax_forme = (float(len(hapax)) / (float(len(formes)))) * 100 moy_occu_mot = float(occurrences) / float(len(formes)) - txt = 'Globale\n' - txt += 'nombre de textes : %i\n' % len(self.corpus.ucis) - txt += 'nombre d\'occurrences : %i\n' % occurrences - txt += 'nombre de formes : %i\n' % (len(formes)) - txt += 'moyenne d\'occurrences par forme : %.2f\n' % moy_occu_mot - txt += 'nombre d\'hapax : %i (%.2f%% des occurrences - %.2f%% des formes)\n' % (len(hapax), phapax, phapax_forme) - print float(occurrences), float(len(self.corpus.ucis)) - txt += 'moyenne d\'occurrences par texte : %.2f' % (float(occurrences)/float(len(self.corpus.ucis))) + txt = ''.join([_(u'Abstract').decode('utf8'), '\n']) + txt += ''.join([_(u'Number of texts').decode('utf8'),' : ', '%i\n' % len(self.corpus.ucis)]) + txt += ''.join([_(u"Number of occurrences").decode('utf8'),' : %i\n' % occurrences]) + txt += ''.join([_(u'Number of forms').decode('utf8'), ' : %i\n' % (len(formes))]) + txt += ''.join([_(u"Number of hapax").decode('utf8'),' : %i (%.2f%%' % (len(hapax),phapax), _(u'of occurrences').decode('utf8'), ' - %.2f%% ' % phapax_forme, _(u'of forms').decode('utf8'), ')\n']) + #print float(occurrences), float(len(self.corpus.ucis)) + txt += ''.join([_(u"Mean of occurrences by text").decode('utf8'), ' : %.2f' % (float(occurrences)/float(len(self.corpus.ucis)))]) if self.dlg : self.dlg.Update(7, u'Ecriture...') self.result['glob'] = txt @@ -73,17 +62,17 @@ class Stat(AnalyseText) : txt = """ source("%s") tot <- read.csv2("%s", header = FALSE, row.names = 1) - """ % (self.parent.RscriptsPath['Rgraph'], self.pathout['total.csv']) + """ % (ffr(self.parent.RscriptsPath['Rgraph']), ffr(self.pathout['total.csv'])) if len(hapax) : txt += """ hapax <- read.csv2("%s", header = FALSE, row.names = 1) tot <- rbind(tot, hapax) - """ % self.pathout['hapax.csv'] + """ % ffr(self.pathout['hapax.csv']) txt += """ open_file_graph("%s", width = 400, height = 400) plot(tot[,1], log = 'xy', xlab='log(rangs)', ylab = 'log(frequences)', col = 'red', pch=16) dev.off() - """ % (self.pathout['zipf.png']) + """ % (ffr(self.pathout['zipf.png'])) tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR) with open(tmpscript, 'w') as f : f.write(txt)