self.label_1 = wx.StaticText(self, -1, u"Lemmatisation")
self.radio_1 = wx.RadioBox(self, -1, u"", choices=['oui', 'non'], majorDimension=0, style=wx.RA_SPECIFY_ROWS)
- self.label_exp = wx.StaticText(self, -1, u"Utiliser le dict. des expressions")
- self.radio_exp = wx.RadioBox(self, -1, u"", choices=['oui', 'non'], majorDimension=0, style=wx.RA_SPECIFY_ROWS)
+ #self.label_exp = wx.StaticText(self, -1, u"Utiliser le dict. des expressions")
+ #self.radio_exp = wx.RadioBox(self, -1, u"", choices=['oui', 'non'], majorDimension=0, style=wx.RA_SPECIFY_ROWS)
txt = u"""Methode de construction
de la matrice des distances"""
self.label_12 = wx.StaticText(self, -1, txt)
else:
self.radio_1.SetSelection(1)
expressions = self.pamconf.getboolean('pam', 'expressions')
- if expressions :
- self.radio_exp.SetSelection(0)
- else :
- self.radio_exp.SetSelection(1)
+ #if expressions :
+ # self.radio_exp.SetSelection(0)
+ #else :
+ # self.radio_exp.SetSelection(1)
self.choice_1.SetSelection(self.distance.index(self.pamconf.get('pam', 'method')))
if self.pamconf.get('pam', 'cluster_type') == u'pam' :
self.radio_box_3.SetSelection(0)
""" % DicoPath['listeuce2']
txt += """
-# rm(data1)
+ rm(data1)
"""
if classif_mode == 0:
txt += """
-# rm(data2)
+ rm(data2)
"""
txt += """
chd.result <- Rchdtxt("%s",mincl=%i,classif_mode=%i, nbt = nbt)
xmax <- max(afc$rowcoord[,1], na.rm = TRUE) + (0.1 * max(afc$rowcoord[,1], na.rm = TRUE))
ymin <- min(afc$rowcoord[,2], na.rm = TRUE) + (0.1 * min(afc$rowcoord[,2], na.rm = TRUE))
ymax <- max(afc$rowcoord[,2], na.rm = TRUE) + (0.1 * max(afc$rowcoord[,2], na.rm = TRUE))
+ print(xmin)
+ print(xmax)
+ print(ymin)
+ print(ymax)
""" % taillecar
txt += """
PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax)
if (valmin >=0) {
valmin <- -2
} else {
- valmin <- valmin -2
+ valmin <- valmin - 2
}
di[tominf] <- valmin
}
# dev.off()
#}
-PlotAfc2dCoul<- function(afc,chisqrtable,filename, what='coord',col=FALSE, axetoplot=c(1,2), deb=0,fin=0, width=900, height=900, quality=100, reso=200, parcex=PARCEX, xlab = NULL, ylab = NULL, xmin=NULL, xmax=NULL, ymin=NULL, ymax=NUL) {
+PlotAfc2dCoul<- function(afc,chisqrtable,filename, what='coord',col=FALSE, axetoplot=c(1,2), deb=0,fin=0, width=900, height=900, quality=100, reso=200, parcex=PARCEX, xlab = NULL, ylab = NULL, xmin=NULL, xmax=NULL, ymin=NULL, ymax=NULL) {
if (col) {
if (what == 'coord') {
rowcoord <- as.matrix(afc$colcoord)
make_afc_graph <- function(toplot, classes, clnb, xlab, ylab, cex.txt = NULL, leg = FALSE, cmd = FALSE, black = FALSE, xminmax=NULL, yminmax=NULL) {
if (is.null(xminmax)) {
- xminmax <- c(min(toplot[,1], na.rm = TRUE) + (0.1 * min(toplot[,1], na.rm = TRUE)), max(toplot[,1], na.rm = TRUE) + (0.1 * max(toplot[,1], na.rm = TRUE)))
+ xminmax <- c(min(toplot[,1], na.rm = TRUE) + ((max(cex.txt)/10) * min(toplot[,1], na.rm = TRUE)), max(toplot[,1], na.rm = TRUE) + ((max(cex.txt)/10) * max(toplot[,1], na.rm = TRUE)))
}
if (is.null(yminmax)) {
- yminmax <- c(min(toplot[,2], na.rm = TRUE) + (0.1 * min(toplot[,2], na.rm = TRUE)), max(toplot[,2], na.rm = TRUE) + (0.1 * max(toplot[,2], na.rm = TRUE)))
+ yminmax <- c(min(toplot[,2], na.rm = TRUE) + ((max(cex.txt)/10) * min(toplot[,2], na.rm = TRUE)), max(toplot[,2], na.rm = TRUE) + ((max(cex.txt)/10) * max(toplot[,2], na.rm = TRUE)))
}
rain <- rainbow(clnb)
compt <- 1
tchi.min <- %i
tchi.max <- %i
dirout <- '%s'
+#xmin <- xmin
+#xmax <- xmax
+#ymin <- ymin
+#ymax <- ymax
xlab <- paste('facteur ', x, ' -')
ylab <- paste('facteur ', y, ' -')
}
classes <- c(1:clnb)
maxchi <- 1
- cex.par <- NULL
+ cex.par <- rep(taillecar/10, nrow(table.in))
} else {
if ( what == 0 ) table.in <- afc$rowcoord
- if ( what == 1 ) table.in <- afc$rowcrl*2
+ if ( what == 1 ) table.in <- afc$rowcrl
rownames(table.in) <- afc$rownames
tablechi <- chistabletot
rn.keep <- c()
}
}
-# if (over) {
-# rn <- rownames(table.in)
-# rownames(table.in) <- 1:nrow(table.in)
-# table.in <- unique(table.in)
-# rn.keep <- as.numeric(rownames(table.in))
-# rownames(table.in) <- rn[rn.keep]
-# tablechi <- tablechi[rn.keep,]
-# if (qui==0) {
-# cex.par <- cex.par[rn.keep]
-# } else {
-# cex.par <- NULL
-# }
-# }
if (do.select.nb) {
if (select.nb > nrow(table.in)) select.nb <- nrow(table.in)
row.keep <- select_point_nb(tablechi, select.nb)
}
classes <- apply(tablechi, 1, which.max)
maxchi <- apply(tablechi, 1, max)
-
+ infp <- which(is.infinite(maxchi) & maxchi > 0)
+ if (length(infp)) {
+ maxchi[infp] <- NA
+ valmax <- max(maxchi, na.rm = TRUE)
+ maxchi[infp] <- valmax + 2
+ }
if (cex.txt) {
#row.keep <- append(row.keep, rn.keep)
#row.keep <- unique(row.keep)
cex.par <- maxchi
cex.par <- norm.vec(cex.par, tchi.min/10, tchi.max/10)
} else {
- cex.par <- NULL
+ cex.par <- rep(taillecar/10, nrow(table.in))
}
}
elif self.parametres['classif_mode'] == 2 :
self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
Rscript = self.printRscript()
- self.doR(Rscript)
+ self.doR(Rscript, dlg = self.dlg, message = 'CHD...')
#self.lc = make_ucecl_from_R(self.pathout['uce'])
#self.lc0 = self.lc.pop(0)
self.corpus.make_ucecl_from_R(self.pathout['uce'])
self.clnb = len(self.corpus.lc)
self.parametres['clnb'] = self.clnb
Rscript = self.printRscript2()
- self.doR(Rscript)
+ self.doR(Rscript, dlg = self.dlg, message = 'profils et A.F.C. ...')
self.time = time() - self.t1
minutes, seconds = divmod(self.time, 60)
hours, minutes = divmod(minutes, 60)
else :
choix=[u'2D' ,u'3D']
self.choicetype = wx.Choice(self, -1, (100,50), choices=choix)
+ self.label_format = wx.StaticText(self, -1, u"Format de l'image")
+ self.choix_format = wx.Choice(self, -1, (100,50), choices = ['png', 'svg'])
self.label_1 = wx.StaticText(self, -1, u'Largeur')
self.spin1 = wx.SpinCtrl(self, -1, '',size = (100,30), min=100, max=5000)
self.label_2 = wx.StaticText(self, -1, u'Hauteur')
fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0)
fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0)
+ fsizer.Add(self.label_format, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5)
+ fsizer.Add(self.choix_format, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5)
+ fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0)
+ fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0)
+
fsizer.Add(self.label_what, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5)
fsizer.Add(self.choice1, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5)
fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0)
tosave['corpus'] = analyse['corpus']
tosave['name'] = analyse['name']
acorpus_uuid = analyse['corpus']
- if acorpus_uuid in self.ordercorpus :
+ if acorpus_uuid in self.corpus :
if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
else :
if isinstance(pid, Popen) :
if pid.returncode != 0 :
error = pid.communicate()
+ print error
error = [str(error[0]), error[1]]
if error[1] is None :
error[1] = 'None'
#except :
# BugReport(parent)
else :
- return None
+ return True
else :
if pid != 0 :
#try :
#except :
# BugReport(parent)
else :
- return None
+ return True
def print_liste(filename,liste):
with open(filename,'w') as f :
'facteur' : [1,2,3],
'alpha' : 10,
'clnb' : clnb,
+ 'svg' : 0,
}
self.__set_properties()
dial.CenterOnParent()
val = dial.ShowModal()
if val == wx.ID_OK :
+ if dial.choix_format.GetSelection() == 0 :
+ svg = 0
+ else :
+ svg = 1
self.param = {'typegraph' : dial.choicetype.GetSelection(),
'width' : dial.spin1.GetValue(),
'height' : dial.spin2.GetValue(),
'facteur' : [dial.spin_f1.GetValue(),dial.spin_f2.GetValue(), dial.spin_f3.GetValue()],
'clnb' : self.clnb,
'film' : str(dial.film.GetValue()).upper(),
- 'alpha' : dial.slider_sphere.GetValue()
+ 'alpha' : dial.slider_sphere.GetValue(),
+ 'svg' : svg
}
self.nb.parent = self.ira
self.DictPathOut = self.Dict
afc <- afcf
afc_table <- afcf_table
chistabletot <- specfp
- infp <- which(is.infinite(chistabletot) & chistabletot > 0)
- infm <- which(is.infinite(chistabletot) & chistabletot < 0)
- chistabletot[infp] <- 0
- chistabletot[infm] <- 0
- chistabletot[infp] <- max(chistabletot) + 1
- chistabletot[infm] <- min(chistabletot) - 1
"""
elif self.itempath == 'liste_graph_afct' :
txt +="""
temps d'analyse : %s
###########################
""" % parametres['time']
- file = open(self.pathout['pre_rapport'], 'w')
- file.write(txt)
- file.close()
+ with open(self.pathout['pre_rapport'], 'w') as f :
+ f.write(txt)
class dolexlayout :
def __init__(self, ira, corpus, parametres):
import os
import shelve
#from ConfigParser import *
-#from tabsimi import DoSimi
+from tabsimi import DoSimi
from functions import BugReport, DoConf
import logging
if self.conf['type'] == 'corpus' :
corpus = self.opencorpus()
elif self.conf['corpus'] in self.parent.history.corpus :
+ print 'corpus in history.corpus'
+ if self.conf['uuid'] in self.parent.history.analyses :
+ intree = True
+ else :
+ intree = False
corpus = self.openanalyse()
if self.conf.get('lem',1) :
corpus.make_lems(True)
else :
corpus.make_lems(False)
self.doopen(corpus)
+ if not intree :
+ self.parent.tree.AddAnalyse(self.conf)
+ else :
+ print 'passe apr la'
+ print self.parent.tree.GiveFocus(uuid = self.conf['uuid'], bold = True)
else :
corpus = None
self.parent.history.addtab(self.conf)
if os.path.exists(self.parent.history.history[self.parent.history.ordercorpus[self.conf['corpus']]]['ira']) :
corpus = Corpus(self, parametres = DoConf(self.parent.history.history[self.parent.history.ordercorpus[self.conf['corpus']]]['ira']).getoptions('corpus'), read = self.parent.history.history[self.parent.history.ordercorpus[self.conf['corpus']]]['ira'])
self.parent.history.openedcorpus[self.conf['corpus']] = corpus
+ self.parent.history.add(self.conf)
return corpus
def doopen(self, corpus) :
elif self.conf['type'] == 'wordcloud' :
self.parent.ShowMenu(_("Text analysis"))
WordCloudLayout(self.parent, corpus, self.conf)
+
--- /dev/null
+#!/bin/env python
+# -*- coding: utf-8 -*-
+#Author: Pierre Ratinaud
+#Copyright (c) 2012 Pierre Ratinaud
+#Lisense: GNU/GPL
+
+import os
+import codecs
+
+
+#txtdir = 'dev/factiva_txt' #repertoire des textes
+#txtdir = 'corpus/jeunesdebanlieues'
+#fileout = 'dev/factiva_txt_out.txt'
+#encodage_in = 'utf8'
+#encodage_out = 'utf8'
+
+
+def parsetxtmail(txt):
+ """
+ parser de texte pour factiva
+ """
+ no = ['NS','RE','IPD','CO','IN'] # les balises qui signalent une fin
+ txt = txt.splitlines() #met le texte dans une liste de lignes
+ txt.pop(0) # la premiere ligne sert a rien
+ txt = txt[0:(len(txt)-10)] # les dernieres lignes ne servent a rien
+ keepline = False
+ ucis = []
+ for line in txt : #pour chaque ligne du texte...
+ if line.startswith('---------------------------------------------------------------') : # si la ligne commence avec...
+ ucis.append([['****'],'']) # c'est une nouvelle uci
+ keepline = False
+ elif line.startswith('SN ') : #source
+ source = '*source_' + line[4:].replace(' ','').replace('\'','').replace(u'´','').replace(u'’','').replace('-','').lower()
+ ucis[-1][0].append(source)
+ elif line.startswith('PD ') : #date
+ mois_annee = u'*ma_' + line[4:].split(' ')[1] + line[4:].split(' ')[2]
+ ucis[-1][0].append(mois_annee)
+ annee = u'*annee_' + line[4:].split(' ')[2]
+ ucis[-1][0].append(annee)
+ elif line in no : #fin
+ keepline = False
+ elif line.startswith('RF ') : #fin
+ keepline = False
+ elif line in ['LP', 'TD'] : #debut texte
+ keepline = True
+ else :
+ pass
+ if keepline and line not in ['LP', 'TD'] :
+ ucis[-1][1] = '\n'.join([ucis[-1][1],line])
+ return ucis
+
+
+def print_ucis(ucis, ofile, encodage) :
+ ucis = [uci for uci in ucis if uci[1].strip() != '']
+ toprint = '\n'.join(['\n'.join([' '.join(uci[0]),uci[1]]) for uci in ucis])
+ ofile.write(toprint.encode(encodage))
+
+class ParseFactivaMail :
+ def __init__(self, txtdir, fileout, encodage_in, encodage_out) :
+ files = os.listdir(txtdir) #liste des fichiers dans txtdir
+ with open(fileout,'w') as outf : #ouverture du fichier en sortie
+ for f in files : #pour chaque fichier en entree...
+ f= os.path.join(txtdir, f) #chemin du fichier
+ with codecs.open(f, 'r', encodage_in) as infile : #ouverture du fichier
+ content = infile.read() #lecture du fichier
+ ucis = parsetxtmail(content)
+ print_ucis(ucis, outf, encodage_out)
+
+#for dat in ['2001','2002','2003','2004', '2005','2006','2007','2008','2009','2010','2011'] :
+# path = os.path.join(txtdir,dat)
+# outfile = os.path.join(txtdir, 'corpus_' + dat + '.txt')
+# doparse(path, outfile)
+
+
+if __name__ == '__main__' :
+ doparse(txtdir, fileout, encodage_in, encodage_out)
+ print 'fini'
import logging
-logger = logging.getLogger('iramuteq.textsimi')
+log = logging.getLogger('iramuteq.textsimi')
class SimiTxt(AnalyseText):
def doanalyse(self) :
self.makefiles()
script = PrintSimiScript(self)
script.make_script()
- if not self.doR(script.scriptout) :
+ if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') :
+ log.info('Problem')
return False
if self.parametres['type_graph'] == 1:
if os.path.exists(self.pathout['liste_graph']):
self.makefiles()
script = PrintSimiScript(self)
script.make_script()
- if self.doR(script.scriptout) :
+ if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
return False
if self.parametres['type_graph'] == 1:
if os.path.exists(self.pathout['liste_graph']):
self.CloseItem(child, uuid)
child, cookie = self.GetNextChild(itemParent, cookie)
- def GiveFocus(self, itemParent = None, uuid = None) :
+ def GiveFocus(self, itemParent = None, uuid = None, bold = False) :
if itemParent is None :
itemParent = self.root
child, cookie = self.GetFirstChild(itemParent)
+ print child, cookie
while child :
pydata = self.GetPyData(child)
if pydata['uuid'] == uuid :
self.SelectItem(child)
- break
+ if bold :
+ self.SetItemBold(child, True)
+ return 'kool'
self.GiveFocus(child, uuid)
child, cookie = self.GetNextChild(itemParent, cookie)
+ return 'pas kool'
def OnRightDown(self, event):