X-Git-Url: http://iramuteq.org/git?a=blobdiff_plain;ds=sidebyside;f=tabfrequence.py;h=376c8bf557946b59534f6aaa1be59f3d50333765;hb=2301b7f97349d29b07fe4f51d30af2721280687a;hp=4eb42ee20a54c0e7f4730c5fb40e2861fc046b3e;hpb=42a67a41b64a6e0cc3fd2a63a0749e9aa4b9374c;p=iramuteq
diff --git a/tabfrequence.py b/tabfrequence.py
index 4eb42ee..376c8bf 100644
--- a/tabfrequence.py
+++ b/tabfrequence.py
@@ -2,187 +2,202 @@
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2008 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
#from __future__ import division
import os
-import sys
import wx
-import wx.html
from chemins import ffr, FFF
import tempfile
from time import sleep
+from analysematrix import AnalyseMatrix
from functions import exec_rcode, check_Rresult
from dialog import FreqDialog
+from PrintRScript import PrintRScript, FreqMultiScript
+from operator import itemgetter
-
-class Frequences():
- def __init__(self, parent):
- #self.Filename = parent.filename
- self.fileforR = parent.tableau.parametre['csvfile']
- self.TEMPDIR = parent.TEMPDIR
- self.num = parent.FreqNum
- self.DICTFILE = {}
- self.RPath = parent.PathPath.get('PATHS', 'rpath')
- self.parent=parent
- self.tableau = parent.tableau
- dlg = FreqDialog(parent, -1, self.tableau.get_colnames(), u"Fréquences", size=(350, 200))
- dlg.CenterOnParent()
- val = dlg.ShowModal()
- if val == wx.ID_OK :
- ColSel = dlg.list_box_1.GetSelections()
- self.header=dlg.header
- dlg.Destroy()
- listfileout = self.ShowFreq(ColSel)
- parent.FreqNum += 1
- parent.DictTab[u"Fréquences_%s*" % parent.FreqNum] = listfileout
- parent.FileTabList.append(listfileout)
- parent.newtab = wx.html.HtmlWindow(parent.nb, -1)
- if "gtk2" in wx.PlatformInfo:
- parent.newtab.SetStandardFonts()
- parent.newtab.LoadPage(listfileout[len(listfileout) - 1])
- parent.nb.AddPage(parent.newtab, u"Fréquences_%s*" % parent.FreqNum)
- parent.nb.SetSelection(parent.nb.GetPageCount() - 1)
- parent.ShowAPane("Tab_content")
- parent.DisEnSaveTabAs(True)
+class Frequences(AnalyseMatrix) :
+ def doparametres(self, dlg=None) :
+ if dlg is None :
+ return
else :
- dlg.Destroy()
-
- def ShowFreq(self, select):
- listfile = []
- listfile.append(False)
- self.ListFileForR = []
- self.ListTitre = []
- self.OutFrame = tempfile.mktemp(dir=self.TEMPDIR)
- if self.parent.g_id: rownames = '1'
- else: rownames = 'NULL'
- if self.parent.g_header : header = 'TRUE'
- else : header = 'FALSE'
- self.ListTitre = [self.header[i] for i in select]
- self.ListFileForR = [ffr(os.path.join(self.TEMPDIR, 'freq%s_%s.jpeg' % (str(self.num), i))) for i in range(len(select))]
- listfile = [os.path.join(self.TEMPDIR, 'freq%s_%s.jpeg' % (str(self.num), i)) for i in range(len(select))]
+ dial = FreqDialog(self.parent, self.tableau.get_colnames(), u"Fréquences")
+ dial.CenterOnParent()
+ val = dial.ShowModal()
+ if val == wx.ID_OK :
+ self.parametres['colsel'] = dial.m_listBox1.GetSelections()
+ self.parametres['header'] = dial.header
+ self.parametres['NA'] = dial.includeNA.GetValue()
+ else :
+ self.parametres = None
+ dial.Destroy()
+
+ def doanalyse(self):
+ self.pathout.createdir(self.parametres['pathout'])
+ header = self.tableau.get_colnames()
+ select = self.parametres['colsel']
+ self.listtitre = [header[i] for i in select]
+ b, self.outframe = tempfile.mkstemp()
+ self.fileforR = [ffr(os.path.join(self.pathout.dirout, 'freq_%i.png' % i)) for i in range(len(select))]
+ self.Rscript = PrintRScript(self)
sel = 'c(' + ','.join([str(val + 1) for val in select]) + ')'
- listfiles = 'c("' + '","'.join(self.ListFileForR) + '")'
- titles = 'c("' + '","'.join(self.ListTitre) + '")'
+ listfiles = 'c("' + '","'.join(self.fileforR) + '")'
+ titles = 'c("' + '","'.join(self.listtitre) + '")'
txt = """
- source("%s")
- """ % self.parent.RscriptsPath['Rfunct']
-
+ filein <- "%s"
+ encoding <- '%s'
+ dm <- read.csv2(filein, encoding = encoding, header = TRUE, row.names = 1, sep='\\t', quote = '"', na.string = '')
+ """ %(ffr(self.tableau.parametres['csvfile']), self.tableau.parametres['syscoding'])
txt += """
- datadm <- ReadData("%s", encoding="%s", header = TRUE, sep = ";",quote = "\\%s", na.strings = "%s",rownames=1)
- """ % (ffr(self.fileforR), self.parent.encode, self.parent.tableau.parametre['txtsep'], self.parent.nastrings)
- txt += """
- outframe<-data.frame(cbind('***','****'))
- colnames(outframe)<-c('effectif','pourcentage')
+ outframe <- data.frame(cbind('***','****','****'))
+ colnames(outframe)<-c('effectif','pourcentage', 'labels')
select <- %s
listfiles <- %s
titles <- %s
compteur <- 1
""" % (sel, listfiles, titles)
+
+ if self.parametres['NA'] :
+ txt += """
+ countNA <- TRUE
+ """
+ else :
+ txt += """
+ countNA <- FALSE
+ """
+
txt += """
for (i in select) {
- datasum<-as.matrix(summary(datadm[,i]))
- if (rownames(datasum)[1]=='Min.' && rownames(datasum)[3]=='Median') {
- dtype<-'num'
- } else if (datasum[1] == "logical") {
- dtype <- 'char'
- datasum <- as.matrix(as.integer(datasum[2]))
- rownames(datasum) <- 'NA'
- } else {
- dtype<-'char'
- }
- datasum<-as.data.frame(datasum)
- if (dtype=='char') {
- datasum[,2]<-round((datasum[,1]/sum(datasum[,1]))*100,digits=2)
+ if (countNA) {
+ freq <- table(dm[,i], useNA = 'ifany')
} else {
- datasum[,2]<-datasum[,1]
+ freq <- table(dm[,i])
}
- colnames(datasum)<-c('effectif','pourcentage')
+ sumfreq <- sum(freq)
+ pour <- prop.table(as.matrix(freq), 2) * 100
+ sumpour <- sum(pour)
+ pour <- round(pour, 2)
+ ntable <- cbind(as.matrix(freq), pour)
graphout <- listfiles[compteur]
if (Sys.info()["sysname"]=='Darwin') {
- quartz(file=graphout,type='jpeg')
+ quartz(file=graphout,type='png')
par(cex=1)
} else {
- jpeg(graphout,res=200)
+ png(graphout)
par(cex=0.3)
}
- if (max(nchar(rownames(datasum))) > 15) {
- lab.bar <- 1:nrow(datasum)
+ if (max(nchar(rownames(ntable))) > 15) {
+ lab.bar <- 1:nrow(ntable)
} else {
- lab.bar <- rownames(datasum)
+ lab.bar <- rownames(ntable)
}
- barplot(datasum[,2],border=NA,beside=TRUE,names.arg=lab.bar)
+ barplot(ntable[,2],border=NA,beside=TRUE,names.arg=lab.bar)
+ ntable <- cbind(ntable, rownames(as.matrix(freq)))
+ colnames(ntable) <- c('effectif','pourcentage', 'labels')
title(main=titles[compteur])
dev.off()
- datasum<-rbind(datasum,total=colSums(datasum))
- outframe<-rbind(outframe,c('***','****'))
- datasum[,1]<-as.character(datasum[,1])
- datasum[,2]<-as.character(datasum[,2])
- outframe<-rbind(outframe,datasum)
+ ntable<-rbind(ntable,total=c(sumfreq,sumpour,''))
+ outframe<-rbind(outframe,c('***','****','****'))
+ #datasum[,1]<-as.character(datasum[,1])
+ #datasum[,2]<-as.character(datasum[,2])
+ outframe<-rbind(outframe,ntable)
compteur <- compteur + 1
- }
- outframe<-rbind(outframe,c('***','****'))
- write.csv2(outframe,file="%s")
- """ % ffr(self.OutFrame)
- tmpfile = tempfile.mktemp(dir=self.TEMPDIR)
- tmpscript = open(tmpfile, 'w')
- tmpscript.write(txt)
- tmpscript.close()
- pid = exec_rcode(self.RPath, tmpfile, wait = False)
- while pid.poll() == None :
- sleep(0.2)
- check_Rresult(self.parent, pid)
- fileout = self.DoLayout()
- listfile.append(fileout)
- self.DICTFILE[self.num] = listfile
- return listfile
-
-
- def DoLayout(self):
+ }
+ outframe<-rbind(outframe,c('***','****','****'))
+ write.table(outframe, file="%s", sep="\\t")
+ """ % ffr(self.outframe)
+ self.Rscript.add(txt)
+ self.Rscript.write()
+ self.doR(self.Rscript.scriptout)
+ self.dolayout()
+
+ def dolayout(self):
listtab = []
tab = []
- filein = open(self.OutFrame, 'rU')
- content = filein.readlines()
- filein.close()
+ with open(self.outframe) as f :
+ content = f.read().splitlines()
content.pop(0)
content.pop(0)
+ content = ['\t'.join(line.split('\t')[1:]).replace('"','') for line in content]
+ content = '\n'.join(content)
+ content = content.split(u'***\t****\t****')
+ content = [[line.split('\t') for line in tab.splitlines() if line.split('\t') != ['']] for tab in content]
+ listtab = [tab for tab in content if tab != []]
texte = ''
- for ligne in content:
- ligne = ligne.replace('"', '').replace('\n', '')
- ligne = ligne.split(';')
- if ligne[1] == u'***' :
- if tab != []:
- listtab.append(tab)
- tab = []
- else :
- tab.append(ligne)
+ #for ligne in content:
+ # ligne = ligne.replace('"', '')
+ # ligne = ligne.split('\t')
+ # if ligne[1] == u'***' :
+ # if tab != []:
+ # listtab.append(tab)
+ # tab = []
+ # else :
+ # tab.append(ligne)
pretexte = u'''
\nFréquences
''' % self.parent.SysEncoding
for i in range(0, len(listtab)):
- pretexte += '%s
' % (str(i), self.ListTitre[i])
+ pretexte += '%s
' % (str(i), self.listtitre[i])
texte += '
\n'
texte += 'Retour
\n'
- texte += '%s
\n' % (str(i), self.ListTitre[i])
+ texte += '%s
\n' % (str(i), self.listtitre[i])
texte += '\n\n'
texte += ' | Effectifs | pourcentage | '
for line in listtab[i] :
texte += ''
texte += """
%s | %s | %s %% |
- """ % (line[0], line[1], line[2])
+ """ % (line[2], line[0], line[1])
texte += ' '
texte += '
| '
texte += """
|
\n
- """ % os.path.basename(self.ListFileForR[i])
+ """ % os.path.basename(self.fileforR[i])
texte += '\n'
- fileout = os.path.join(self.TEMPDIR, 'resultats%s-freq.html' % str(self.num))
- FILE = open(fileout, 'w')
- FILE.write(pretexte + texte)
- FILE.close()
- return fileout
-
+ fileout = os.path.join(self.pathout.dirout, 'resultats.html')
+ with open(fileout, 'w') as f :
+ f.write(pretexte + texte)
+ #return fileout
+class FreqMultiple(Frequences):
+ def doanalyse(self):
+ select = self.parametres['colsel']
+ freq = self.tableau.countmultiple(select)
+ tot = sum([freq[forme][0] for forme in freq])
+ freq = [[forme, freq[forme][0], `round((float(freq[forme][0])/tot)*100, 2)`,`len(list(set(freq[forme][1])))`, `round((float(len(list(set(freq[forme][1]))))/self.tableau.rownb)*100,2)`] for forme in freq]
+ freq = sorted(freq, key=itemgetter(1), reverse=True)
+ freq = [[line[0], `line[1]`, line[2], line[3], line[4]] for line in freq]
+ freq.insert(0, [u'mod', 'freq', 'percent of total', 'row number', 'percent of rows'])
+ self.freq = freq
+ with open(self.pathout['frequences.csv'], 'w') as f :
+ f.write('\n'.join(['\t'.join(line) for line in freq]))
+ self.rscript = FreqMultiScript(self)
+ self.rscript.make_script()
+ self.doR(self.rscript.scriptout)
+ self.dolayout()
+
+ def dolayout(self):
+ pretexte = u'''
+
+ \nFréquences
+
+ ''' % self.parent.SysEncoding
+ txt = """
+ \n\n
+
+ """
+ txt += ' | '.join([' | '.join(line) for line in self.freq]) + ' |
|
'
+ txt += ' | |
' % (os.path.basename(self.pathout['barplotfreq.png']), os.path.basename(self.pathout['barplotrow.png']))
+ txt += "\n"
+ with open(self.pathout['resultats.html'], 'w') as f :
+ f.write(pretexte + txt)
+
+
+
+
+
+
+
+
\ No newline at end of file