From e4fcd29b0d143168ba43be91e3c829eceefb0dd3 Mon Sep 17 00:00:00 2001 From: Pierre Date: Tue, 15 Jan 2013 11:42:35 +0100 Subject: [PATCH] irlba --- OptionAlceste.py | 30 ++++++++++--- PrintRScript.py | 30 +++++++++---- Rscripts/CHD.R | 4 +- Rscripts/anacor.R | 11 +++-- Rscripts/chdtxt.R | 112 +++++++++++++++++++++------------------------- analysetxt.py | 5 ++- checkinstall.py | 2 +- configuration/alceste.cfg | 4 ++ functions.py | 19 ++++---- iramuteq.py | 36 ++++++++------- tabsimi.py | 2 +- 11 files changed, 144 insertions(+), 111 deletions(-) diff --git a/OptionAlceste.py b/OptionAlceste.py index 49ff0f1..ec3ab55 100755 --- a/OptionAlceste.py +++ b/OptionAlceste.py @@ -20,7 +20,9 @@ class OptionAlc(wx.Dialog): self.DictPath = parametres['pathout'] self.AlcesteConf = parametres self.choose = False - + self.svdmethod = ['svdR', 'irlba'] + if self.parent.pref.getboolean('iramuteq','libsvdc') : + self.svdmethod.append('svdlibc') #self.label_1 = wx.StaticText(self, -1, u"Lemmatisation") #self.radio_1 = wx.RadioBox(self, -1, u"", choices=['oui', 'non'], majorDimension=0, style=wx.RA_SPECIFY_ROWS) @@ -42,6 +44,10 @@ analysée (2 = automatique)""" self.spin_ctrl_5 = wx.SpinCtrl(self, -1, "",size = (100,30), min=2, max=1000) self.label_max_actives = wx.StaticText(self, -1, u"Nombre maximum de formes analysées") self.spin_max_actives = wx.SpinCtrl(self, -1, "",size = (100,30), min=20, max=10000) + self.label_svd = wx.StaticText(self, -1, u"Méthode pour svd") + self.choicesvd = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, self.svdmethod, 0 ) + self.label_patate = wx.StaticText(self, -1, u"Mode patate (+ rapide, - précis)") + self.check_patate = wx.CheckBox( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.DefaultSize, 0 ) #self.label_4 = wx.StaticText(self, -1, u"Configuration \ndes clés d'analyse") #self.button_5 = wx.Button(self, wx.ID_PREFERENCES, "") self.button_1 = wx.Button(self, wx.ID_CANCEL, "") @@ -72,6 +78,15 @@ analysée (2 = automatique)""" self.spin_ctrl_5.Disable() self.spin_max_actives.SetValue(int(self.parametres['max_actives'])) self.spin_nbcl.SetValue(int(self.parametres['nbcl_p1'])) + if 'svdmethod' in self.parametres : + self.choicesvd.SetSelection(self.svdmethod.index(self.parametres['svdmethod'])) + else : + self.choicesvd.SetSelection(1) + if 'mode.patate' in self.parametres : + self.check_patate.SetValue(self.parametres['mode.patate']) + else : + self.check_patate.SetValue(False) + def __do_layout(self): sizer_1 = wx.BoxSizer(wx.VERTICAL) @@ -122,10 +137,15 @@ analysée (2 = automatique)""" grid_sizer2.Add(wx.StaticLine(self), 0, wx.EXPAND | wx.ALL, 1) grid_sizer2.Add(wx.StaticLine(self, -1), 0, wx.EXPAND | wx.ALL, 1) - #grid_sizer2.Add(self.label_4, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0) - #grid_sizer2.Add(self.button_5, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0) - #grid_sizer2.Add(wx.StaticLine(self), 0, wx.EXPAND | wx.ALL, 1) - #grid_sizer2.Add(wx.StaticLine(self, -1), 0, wx.EXPAND | wx.ALL, 1) + grid_sizer2.Add(self.label_svd, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0) + grid_sizer2.Add(self.choicesvd, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0) + grid_sizer2.Add(wx.StaticLine(self), 0, wx.EXPAND | wx.ALL, 1) + grid_sizer2.Add(wx.StaticLine(self, -1), 0, wx.EXPAND | wx.ALL, 1) + + grid_sizer2.Add(self.label_patate, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0) + grid_sizer2.Add(self.check_patate, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0) + grid_sizer2.Add(wx.StaticLine(self), 0, wx.EXPAND | wx.ALL, 1) + grid_sizer2.Add(wx.StaticLine(self, -1), 0, wx.EXPAND | wx.ALL, 1) grid_button.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0) grid_button.Add(self.button_2, 0, wx.ALIGN_CENTER_HORIZONTAL, 0) diff --git a/PrintRScript.py b/PrintRScript.py index 9a92096..4987b21 100644 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -109,7 +109,7 @@ class Alceste2(PrintRScript) : # -def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False, libsvdc_path = None, R_max_mem = False): +def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'svdR', libsvdc = False, libsvdc_path = None, R_max_mem = False, mode_patate = False): txt = """ source("%s") source("%s") @@ -124,17 +124,30 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False txt += """ nbt <- %i """ % nbt - if libsvdc : + if svdmethod == 'svdlibc' and libsvdc : txt += """ - libsvdc <- TRUE + svd.method <- 'svdlibc' libsvdc.path <- "%s" """ % ffr(libsvdc_path) + elif svdmethod == 'irlba' : + txt += """ + library(irlba) + svd.method <- 'irlba' + libsvdc.path <- NULL + """ else : txt += """ - libsvdc <- FALSE + svd.method = 'svdR' libsvdc.path <- NULL """ - + if mode_patate : + txt += """ + mode.patate = TRUE + """ + else : + txt += """ + mode.patate = FALSE + """ txt +=""" library(Matrix) data1 <- readMM("%s") @@ -148,14 +161,13 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False data2 <- as(data2, "dgCMatrix") row.names(data2) <- 1:nrow(data2) """ % DicoPath['TableUc2'] - #log.info('ATTENTION ############# MODEPATATE ####################') txt += """ - chd1<-CHD(data1, x = nbt, mode.patate = FALSE, libsvdc = libsvdc, libsvdc.path = libsvdc.path) + chd1<-CHD(data1, x = nbt, mode.patate = mode.patate, svd.method = svd.method, libsvdc.path = libsvdc.path) """ if classif_mode == 0: txt += """ - chd2<-CHD(data2, x = nbt, libsvdc = libsvdc, libsvdc.path = libsvdc.path) + chd2<-CHD(data2, x = nbt, mode.patate = mode.patate, svd.method = svd.method, libsvdc.path = libsvdc.path) """ else: txt += """ @@ -694,7 +706,7 @@ class PrintSimiScript(PrintRScript) : """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv']) txt += """ dm <-readMM(dm.path) - cn <- read.table(cn.path, sep=';', quote='"') + cn <- read.table(cn.path, sep='\t', quote='"') colnames(dm) <- cn[,1] sel.col <- read.csv2(selected.col) dm <- dm[, sel.col[,1] + 1] diff --git a/Rscripts/CHD.R b/Rscripts/CHD.R index 049d5c7..53fb813 100644 --- a/Rscripts/CHD.R +++ b/Rscripts/CHD.R @@ -41,7 +41,7 @@ find.max <- function(dtable, chitable, compte, rmax, maxinter, sc, TT) { res } -CHD<-function(data.in, x=9, mode.patate = FALSE, libsvdc=FALSE, libsvdc.path=NULL){ +CHD<-function(data.in, x=9, mode.patate = FALSE, svd.method, libsvdc.path=NULL){ # sink('/home/pierre/workspace/iramuteq/dev/findchi2.txt') dataori <- data.in row.names(dataori) <- rownames(data.in) @@ -78,7 +78,7 @@ CHD<-function(data.in, x=9, mode.patate = FALSE, libsvdc=FALSE, libsvdc.path=NUL #extraction du premier facteur de l'afc print('afc') pp('taille dtable dans boucle (col/row)',c(ncol(dtable),nrow(dtable))) - afc<-boostana(dtable, nd=1, libsvdc=libsvdc, libsvdc.path=libsvdc.path) + afc<-boostana(dtable, nd=1, svd.method = svd.method, libsvdc.path=libsvdc.path) pp('SV',afc$singular.values) pp('V.P.', afc$eigen.values) coordrow <- as.matrix(afc$row.scores[,1]) diff --git a/Rscripts/anacor.R b/Rscripts/anacor.R index c68df77..a1441b9 100644 --- a/Rscripts/anacor.R +++ b/Rscripts/anacor.R @@ -1,4 +1,3 @@ -print('NEW SVD') ################################################################################# #http://www.mail-archive.com/rcpp-devel@lists.r-forge.r-project.org/msg01513.html @@ -53,7 +52,7 @@ my.svd <- function(x, nu, nv, libsvdc.path=NULL, sparse.path=NULL) { ################################################################################### #from anacor package -boostana<-function (tab, ndim = 2, libsvdc = FALSE, libsvdc.path=NULL) +boostana<-function (tab, ndim = 2, svd.method = 'svdR', libsvdc.path=NULL) { #tab <- as.matrix(tab) if (ndim > min(dim(tab)) - 1) @@ -76,7 +75,7 @@ boostana<-function (tab, ndim = 2, libsvdc = FALSE, libsvdc.path=NULL) z1 <- t(tab)/sqrt(c) z2 <- tab/sqrt(r) z <- t(z1) * z2 - if (libsvdc) { + if (svd.method == 'svdlibc') { #START NEW SVD z <- as(z, "dgCMatrix") tmpmat <- tempfile(pattern='sparse') @@ -85,10 +84,14 @@ boostana<-function (tab, ndim = 2, libsvdc = FALSE, libsvdc.path=NULL) print('do svd') sv <- my.svd(z, qdim, qdim, libsvdc.path=libsvdc.path, sparse.path=tmpmat) #END NEW SVD - } else { + } else if (svd.method == 'svdR') { print('start R svd') sv <- svd(z, nu = qdim, nv = qdim) print('end svd') + } else if (svd.method == 'irlba') { + print('irlba') + sv <- irlba(z, qdim, qdim) + print('end irlba') } sigmavec <- (sv$d)[2:qdim] x <- ((sv$u)/sqrt(r))[, -1] diff --git a/Rscripts/chdtxt.R b/Rscripts/chdtxt.R index a6c2d4c..a0a9cdd 100644 --- a/Rscripts/chdtxt.R +++ b/Rscripts/chdtxt.R @@ -80,7 +80,8 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) { } else { poids2<-poids1 } - + + print('croisement classif') croise=matrix(ncol=tcl,nrow=tcl) #production du tableau de contingence for (i in 1:ncol(classeuce1)) { @@ -91,18 +92,18 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) { #tabcolnames<-c(tabcolnames[(length(tabcolnames)-1)],tabcolnames[length(tabcolnames)]) tabrownames<-as.numeric(rownames(tablecroise)) #tabrownames<-c(tabrownames[(length(tabrownames)-1)],tabrownames[length(tabrownames)]) - for (k in (ncol(tablecroise)-1):ncol(tablecroise)) { - for (l in (nrow(tablecroise)-1):nrow(tablecroise)) { - croise[(tabrownames[l]-1),(tabcolnames[k]-1)]<-tablecroise[l,k] + for (k in (ncol(tablecroise)-1):ncol(tablecroise)) { + for (l in (nrow(tablecroise)-1):nrow(tablecroise)) { + croise[(tabrownames[l]-1),(tabcolnames[k]-1)]<-tablecroise[l,k] + } } - } } tablecroise } if (classif_mode == 0) {ind <- (nbcl * 2)} else {ind <- nbcl} if (mincl==0){ mincl<-round(nrow(classeuce1)/ind) - }#valeur a calculer nbuce/20 + } if (mincl<3){ mincl<-3 } @@ -113,24 +114,24 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) { chicroise<-croise for (i in 1:nrow(croise)) { for (j in 1:ncol(croise)) { - if (croise[i,j]==0) { - chicroise[i,j]<-0 - } else if (croise[i,j]1) { - maxchi<-0 - best<-NULL - for (i in 1:length(listcoordok)) { - chi<-NULL - uce<-NULL - if (nrow(listcoordok[[i]])==maxcl) { - for (j in 1:nrow(listcoordok[[i]])) { - chi<-c(chi,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)]) - uce<-c(uce,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)]) + maxchi<-0 + best<-NULL + for (i in 1:length(listcoordok)) { + chi<-NULL + uce<-NULL + if (nrow(listcoordok[[i]])==maxcl) { + for (j in 1:nrow(listcoordok[[i]])) { + chi<-c(chi,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)]) + uce<-c(uce,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)]) + } + if (maxchi < sum(chi)) { + maxchi <- sum(chi) + suce <- sum(uce) + best <- i + } } - if (maxchi < sum(chi)) { - maxchi<-sum(chi) - suce<-sum(uce) - best<-i - } } - } } print((suce/nrow(classeuce1)*100)) listcoordok[[best]] @@ -294,17 +293,6 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) { nchd2[which(nchd1[,ncol(nchd1)]!=nchd2[,ncol(nchd2)]),] <- 0 nchd1[which(nchd2[,ncol(nchd2)]==0),] <- 0 -# for (i in 1:nrow(nchd1)) { -# if (nchd1[i,ncol(nchd1)]==0) { -# nchd2[i,]<-nchd2[i,]*0 -# } -# if (nchd1[i,ncol(nchd1)]!=nchd2[i,ncol(nchd2)]) { -# nchd2[i,]<-nchd2[i,]*0 -# } -# if (nchd2[i,ncol(nchd2)]==0) { -# nchd1[i,]<-nchd1[i,]*0 -# } -# } print('fini croise') elim<-which(nchd1[,ncol(nchd1)]==0) keep<-which(nchd1[,ncol(nchd1)]!=0) diff --git a/analysetxt.py b/analysetxt.py index 4770f6b..ff37c98 100644 --- a/analysetxt.py +++ b/analysetxt.py @@ -174,16 +174,19 @@ class Alceste(AnalyseText) : parametres['nbcl_p1'] = self.dial.spin_nbcl.GetValue() parametres['max_actives'] = self.dial.spin_max_actives.GetValue() parametres['corpus'] = '' + parametres['svdmethod'] = self.dial.svdmethod[self.dial.choicesvd.GetSelection()] parametres['pathout'] = self.pathout.dirout + parametres['mode.patate'] = self.dial.check_patate.GetValue() DoConf(self.parent.ConfigPath['alceste']).makeoptions(['ALCESTE'], [parametres]) self.dial.Destroy() + print parametres return parametres else : self.dial.Destroy() return None def printRscript(self) : - RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False) + RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, svdmethod = self.parametres['svdmethod'], libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False, mode_patate = self.parametres['mode.patate']) return self.pathout['Rchdtxt'] def printRscript2(self) : diff --git a/checkinstall.py b/checkinstall.py index 385eb27..927f847 100644 --- a/checkinstall.py +++ b/checkinstall.py @@ -137,7 +137,7 @@ def install_textometrieR(self) : dlg.Destroy() def CheckRPackages(self): - listdep = ['ca', 'gee', 'ape', 'igraph','proxy', 'wordcloud', 'textometrieR'] + listdep = ['ca', 'gee', 'ape', 'igraph','proxy', 'wordcloud', 'irlba', 'textometrieR'] nolib = [] i=0 dlg = wx.ProgressDialog("Test des librairies de R", "test en cours...", maximum = len(listdep), parent=self, style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT) diff --git a/configuration/alceste.cfg b/configuration/alceste.cfg index 42891ff..6b9c954 100644 --- a/configuration/alceste.cfg +++ b/configuration/alceste.cfg @@ -21,6 +21,10 @@ minforme = 2 expressions = True #nbre de classe terminale de la phase 1 nbcl_p1 = 10 +#methode pour svd +svdmethod = irlba +#mode patate (+ rapide et - precis) +mode.patate = 0 [IMAGE] #non utilise diff --git a/functions.py b/functions.py index 8f9cf41..90b380e 100644 --- a/functions.py +++ b/functions.py @@ -546,23 +546,20 @@ def check_Rresult(parent, pid) : if error[1] is None : error[1] = 'None' parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)]) - #try : - #raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])])) - BugReport(parent) + try : + raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])])) + except : + BugReport(parent) return False - #except : - # BugReport(parent) else : return True else : if pid != 0 : - BugReport(parent) + try : + raise Exception(u'Erreur R') + except : + BugReport(parent) return False - #try : - #raise Exception(u'Erreur R') - #return False - #except : - # BugReport(parent) else : return True diff --git a/iramuteq.py b/iramuteq.py index af6c521..fd4c1ab 100644 --- a/iramuteq.py +++ b/iramuteq.py @@ -422,6 +422,22 @@ class IraFrame(wx.Frame): if self.SysEncoding == 'mac-roman' : self.SysEncoding = 'MacRoman' self.type = '' +##############################################################@ + self.DisEnSaveTabAs(False) + self.ShowMenu(_("View"), False) + self.ShowMenu(_("Spreadsheet analysis"), False) + self.ShowMenu(_("Text analysis"), False) + + self._mgr.Update() + + self.DataPop = False + self.DataTxt = False + self.Text = '' + + self.lexique = None + self.corpus = None + + def finish_init(self) : try : self.pref.read(self.ConfigPath['preferences']) if IsNew(self) : @@ -434,6 +450,9 @@ class IraFrame(wx.Frame): UpgradeConf(self) self.pref.read(self.ConfigPath['preferences']) New = True + self.sound = self.pref.getboolean('iramuteq', 'sound') + self.check_update = self.pref.getboolean('iramuteq', 'checkupdate') + self.version = ConfigGlob.get('DEFAULT', 'version') #configuration des chemins de R self.PathPath = ConfigParser() self.PathPath.read(ConfigPath['path']) @@ -467,21 +486,7 @@ vous devez signaler le chemin de l'éxecutable de R dans les préférences.""" if dlg.ShowModal() in [wx.ID_NO, wx.ID_CANCEL]: evt.Veto() dlg.Destroy() - self.DataPop = False - self.DataTxt = False - self.Text = '' - self.sound = self.pref.getboolean('iramuteq', 'sound') - self.check_update = self.pref.getboolean('iramuteq', 'checkupdate') - self.version = ConfigGlob.get('DEFAULT', 'version') - self.lexique = None - self.corpus = None -##############################################################@ - self.DisEnSaveTabAs(False) - self.ShowMenu(_("View"), False) - self.ShowMenu(_("Spreadsheet analysis"), False) - self.ShowMenu(_("Text analysis"), False) - - self._mgr.Update() + def OnVerif(self, evt) : pack = CheckRPackages(self) @@ -1148,6 +1153,7 @@ class MySplashScreen(wx.SplashScreen): def ShowMain(self): frame = IraFrame(None, -1, "IRaMuTeQ " + ConfigGlob.get('DEFAULT', 'version'), size=(1100, 800)) frame.Show() + frame.finish_init() frame.Upgrade() frame.OnOpenFromCmdl() # if self.fc.IsRunning(): diff --git a/tabsimi.py b/tabsimi.py index 8a5ca35..91d2230 100644 --- a/tabsimi.py +++ b/tabsimi.py @@ -343,7 +343,7 @@ class DoSimi(): """ if not self.paramsimi['keep_coord'] : txt += """ - cn <- read.table("%s", sep=';', quote='"') + cn <- read.csv("%s", sep='\t', quote='"', headers = FALSE) colnames(dm) <- cn[,1] #colnames(dml) <- cn[,1] """ % ffr(active_file) -- 2.7.4