X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=tabfrequence.py;h=376c8bf557946b59534f6aaa1be59f3d50333765;hp=4eb42ee20a54c0e7f4730c5fb40e2861fc046b3e;hb=287f9e72c3e3d666b016dff0fa3dc39419adfcc2;hpb=42a67a41b64a6e0cc3fd2a63a0749e9aa4b9374c;ds=sidebyside diff --git a/tabfrequence.py b/tabfrequence.py index 4eb42ee..376c8bf 100644 --- a/tabfrequence.py +++ b/tabfrequence.py @@ -2,187 +2,202 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL #from __future__ import division import os -import sys import wx -import wx.html from chemins import ffr, FFF import tempfile from time import sleep +from analysematrix import AnalyseMatrix from functions import exec_rcode, check_Rresult from dialog import FreqDialog +from PrintRScript import PrintRScript, FreqMultiScript +from operator import itemgetter - -class Frequences(): - def __init__(self, parent): - #self.Filename = parent.filename - self.fileforR = parent.tableau.parametre['csvfile'] - self.TEMPDIR = parent.TEMPDIR - self.num = parent.FreqNum - self.DICTFILE = {} - self.RPath = parent.PathPath.get('PATHS', 'rpath') - self.parent=parent - self.tableau = parent.tableau - dlg = FreqDialog(parent, -1, self.tableau.get_colnames(), u"Fréquences", size=(350, 200)) - dlg.CenterOnParent() - val = dlg.ShowModal() - if val == wx.ID_OK : - ColSel = dlg.list_box_1.GetSelections() - self.header=dlg.header - dlg.Destroy() - listfileout = self.ShowFreq(ColSel) - parent.FreqNum += 1 - parent.DictTab[u"Fréquences_%s*" % parent.FreqNum] = listfileout - parent.FileTabList.append(listfileout) - parent.newtab = wx.html.HtmlWindow(parent.nb, -1) - if "gtk2" in wx.PlatformInfo: - parent.newtab.SetStandardFonts() - parent.newtab.LoadPage(listfileout[len(listfileout) - 1]) - parent.nb.AddPage(parent.newtab, u"Fréquences_%s*" % parent.FreqNum) - parent.nb.SetSelection(parent.nb.GetPageCount() - 1) - parent.ShowAPane("Tab_content") - parent.DisEnSaveTabAs(True) +class Frequences(AnalyseMatrix) : + def doparametres(self, dlg=None) : + if dlg is None : + return else : - dlg.Destroy() - - def ShowFreq(self, select): - listfile = [] - listfile.append(False) - self.ListFileForR = [] - self.ListTitre = [] - self.OutFrame = tempfile.mktemp(dir=self.TEMPDIR) - if self.parent.g_id: rownames = '1' - else: rownames = 'NULL' - if self.parent.g_header : header = 'TRUE' - else : header = 'FALSE' - self.ListTitre = [self.header[i] for i in select] - self.ListFileForR = [ffr(os.path.join(self.TEMPDIR, 'freq%s_%s.jpeg' % (str(self.num), i))) for i in range(len(select))] - listfile = [os.path.join(self.TEMPDIR, 'freq%s_%s.jpeg' % (str(self.num), i)) for i in range(len(select))] + dial = FreqDialog(self.parent, self.tableau.get_colnames(), u"Fréquences") + dial.CenterOnParent() + val = dial.ShowModal() + if val == wx.ID_OK : + self.parametres['colsel'] = dial.m_listBox1.GetSelections() + self.parametres['header'] = dial.header + self.parametres['NA'] = dial.includeNA.GetValue() + else : + self.parametres = None + dial.Destroy() + + def doanalyse(self): + self.pathout.createdir(self.parametres['pathout']) + header = self.tableau.get_colnames() + select = self.parametres['colsel'] + self.listtitre = [header[i] for i in select] + b, self.outframe = tempfile.mkstemp() + self.fileforR = [ffr(os.path.join(self.pathout.dirout, 'freq_%i.png' % i)) for i in range(len(select))] + self.Rscript = PrintRScript(self) sel = 'c(' + ','.join([str(val + 1) for val in select]) + ')' - listfiles = 'c("' + '","'.join(self.ListFileForR) + '")' - titles = 'c("' + '","'.join(self.ListTitre) + '")' + listfiles = 'c("' + '","'.join(self.fileforR) + '")' + titles = 'c("' + '","'.join(self.listtitre) + '")' txt = """ - source("%s") - """ % self.parent.RscriptsPath['Rfunct'] - + filein <- "%s" + encoding <- '%s' + dm <- read.csv2(filein, encoding = encoding, header = TRUE, row.names = 1, sep='\\t', quote = '"', na.string = '') + """ %(ffr(self.tableau.parametres['csvfile']), self.tableau.parametres['syscoding']) txt += """ - datadm <- ReadData("%s", encoding="%s", header = TRUE, sep = ";",quote = "\\%s", na.strings = "%s",rownames=1) - """ % (ffr(self.fileforR), self.parent.encode, self.parent.tableau.parametre['txtsep'], self.parent.nastrings) - txt += """ - outframe<-data.frame(cbind('***','****')) - colnames(outframe)<-c('effectif','pourcentage') + outframe <- data.frame(cbind('***','****','****')) + colnames(outframe)<-c('effectif','pourcentage', 'labels') select <- %s listfiles <- %s titles <- %s compteur <- 1 """ % (sel, listfiles, titles) + + if self.parametres['NA'] : + txt += """ + countNA <- TRUE + """ + else : + txt += """ + countNA <- FALSE + """ + txt += """ for (i in select) { - datasum<-as.matrix(summary(datadm[,i])) - if (rownames(datasum)[1]=='Min.' && rownames(datasum)[3]=='Median') { - dtype<-'num' - } else if (datasum[1] == "logical") { - dtype <- 'char' - datasum <- as.matrix(as.integer(datasum[2])) - rownames(datasum) <- 'NA' - } else { - dtype<-'char' - } - datasum<-as.data.frame(datasum) - if (dtype=='char') { - datasum[,2]<-round((datasum[,1]/sum(datasum[,1]))*100,digits=2) + if (countNA) { + freq <- table(dm[,i], useNA = 'ifany') } else { - datasum[,2]<-datasum[,1] + freq <- table(dm[,i]) } - colnames(datasum)<-c('effectif','pourcentage') + sumfreq <- sum(freq) + pour <- prop.table(as.matrix(freq), 2) * 100 + sumpour <- sum(pour) + pour <- round(pour, 2) + ntable <- cbind(as.matrix(freq), pour) graphout <- listfiles[compteur] if (Sys.info()["sysname"]=='Darwin') { - quartz(file=graphout,type='jpeg') + quartz(file=graphout,type='png') par(cex=1) } else { - jpeg(graphout,res=200) + png(graphout) par(cex=0.3) } - if (max(nchar(rownames(datasum))) > 15) { - lab.bar <- 1:nrow(datasum) + if (max(nchar(rownames(ntable))) > 15) { + lab.bar <- 1:nrow(ntable) } else { - lab.bar <- rownames(datasum) + lab.bar <- rownames(ntable) } - barplot(datasum[,2],border=NA,beside=TRUE,names.arg=lab.bar) + barplot(ntable[,2],border=NA,beside=TRUE,names.arg=lab.bar) + ntable <- cbind(ntable, rownames(as.matrix(freq))) + colnames(ntable) <- c('effectif','pourcentage', 'labels') title(main=titles[compteur]) dev.off() - datasum<-rbind(datasum,total=colSums(datasum)) - outframe<-rbind(outframe,c('***','****')) - datasum[,1]<-as.character(datasum[,1]) - datasum[,2]<-as.character(datasum[,2]) - outframe<-rbind(outframe,datasum) + ntable<-rbind(ntable,total=c(sumfreq,sumpour,'')) + outframe<-rbind(outframe,c('***','****','****')) + #datasum[,1]<-as.character(datasum[,1]) + #datasum[,2]<-as.character(datasum[,2]) + outframe<-rbind(outframe,ntable) compteur <- compteur + 1 - } - outframe<-rbind(outframe,c('***','****')) - write.csv2(outframe,file="%s") - """ % ffr(self.OutFrame) - tmpfile = tempfile.mktemp(dir=self.TEMPDIR) - tmpscript = open(tmpfile, 'w') - tmpscript.write(txt) - tmpscript.close() - pid = exec_rcode(self.RPath, tmpfile, wait = False) - while pid.poll() == None : - sleep(0.2) - check_Rresult(self.parent, pid) - fileout = self.DoLayout() - listfile.append(fileout) - self.DICTFILE[self.num] = listfile - return listfile - - - def DoLayout(self): + } + outframe<-rbind(outframe,c('***','****','****')) + write.table(outframe, file="%s", sep="\\t") + """ % ffr(self.outframe) + self.Rscript.add(txt) + self.Rscript.write() + self.doR(self.Rscript.scriptout) + self.dolayout() + + def dolayout(self): listtab = [] tab = [] - filein = open(self.OutFrame, 'rU') - content = filein.readlines() - filein.close() + with open(self.outframe) as f : + content = f.read().splitlines() content.pop(0) content.pop(0) + content = ['\t'.join(line.split('\t')[1:]).replace('"','') for line in content] + content = '\n'.join(content) + content = content.split(u'***\t****\t****') + content = [[line.split('\t') for line in tab.splitlines() if line.split('\t') != ['']] for tab in content] + listtab = [tab for tab in content if tab != []] texte = '' - for ligne in content: - ligne = ligne.replace('"', '').replace('\n', '') - ligne = ligne.split(';') - if ligne[1] == u'***' : - if tab != []: - listtab.append(tab) - tab = [] - else : - tab.append(ligne) + #for ligne in content: + # ligne = ligne.replace('"', '') + # ligne = ligne.split('\t') + # if ligne[1] == u'***' : + # if tab != []: + # listtab.append(tab) + # tab = [] + # else : + # tab.append(ligne) pretexte = u''' \n

Fréquences


''' % self.parent.SysEncoding for i in range(0, len(listtab)): - pretexte += '

%s

' % (str(i), self.ListTitre[i]) + pretexte += '

%s

' % (str(i), self.listtitre[i]) texte += '
\n' texte += '

Retour

\n' - texte += '

%s

\n' % (str(i), self.ListTitre[i]) + texte += '

%s

\n' % (str(i), self.listtitre[i]) texte += '\n' texte += """
\n' texte += '' for line in listtab[i] : texte += '' texte += """ - """ % (line[0], line[1], line[2]) + """ % (line[2], line[0], line[1]) texte += '' texte += '
Effectifspourcentage
%s%s%s %%
graph
\n - """ % os.path.basename(self.ListFileForR[i]) + """ % os.path.basename(self.fileforR[i]) texte += '\n' - fileout = os.path.join(self.TEMPDIR, 'resultats%s-freq.html' % str(self.num)) - FILE = open(fileout, 'w') - FILE.write(pretexte + texte) - FILE.close() - return fileout - + fileout = os.path.join(self.pathout.dirout, 'resultats.html') + with open(fileout, 'w') as f : + f.write(pretexte + texte) + #return fileout +class FreqMultiple(Frequences): + def doanalyse(self): + select = self.parametres['colsel'] + freq = self.tableau.countmultiple(select) + tot = sum([freq[forme][0] for forme in freq]) + freq = [[forme, freq[forme][0], `round((float(freq[forme][0])/tot)*100, 2)`,`len(list(set(freq[forme][1])))`, `round((float(len(list(set(freq[forme][1]))))/self.tableau.rownb)*100,2)`] for forme in freq] + freq = sorted(freq, key=itemgetter(1), reverse=True) + freq = [[line[0], `line[1]`, line[2], line[3], line[4]] for line in freq] + freq.insert(0, [u'mod', 'freq', 'percent of total', 'row number', 'percent of rows']) + self.freq = freq + with open(self.pathout['frequences.csv'], 'w') as f : + f.write('\n'.join(['\t'.join(line) for line in freq])) + self.rscript = FreqMultiScript(self) + self.rscript.make_script() + self.doR(self.rscript.scriptout) + self.dolayout() + + def dolayout(self): + pretexte = u''' + + \n

Fréquences

+
+ ''' % self.parent.SysEncoding + txt = """ + \n' + txt += '
\n +
+ """ + txt += '
'.join([''.join(line) for line in self.freq]) + '
graphgraph
' % (os.path.basename(self.pathout['barplotfreq.png']), os.path.basename(self.pathout['barplotrow.png'])) + txt += "\n" + with open(self.pathout['resultats.html'], 'w') as f : + f.write(pretexte + txt) + + + + + + + + \ No newline at end of file