self.DictPath = parametres['pathout']
self.AlcesteConf = parametres
self.choose = False
-
+ self.svdmethod = ['svdR', 'irlba']
+ if self.parent.pref.getboolean('iramuteq','libsvdc') :
+ self.svdmethod.append('svdlibc')
#self.label_1 = wx.StaticText(self, -1, u"Lemmatisation")
#self.radio_1 = wx.RadioBox(self, -1, u"", choices=['oui', 'non'], majorDimension=0, style=wx.RA_SPECIFY_ROWS)
self.spin_ctrl_5 = wx.SpinCtrl(self, -1, "",size = (100,30), min=2, max=1000)
self.label_max_actives = wx.StaticText(self, -1, u"Nombre maximum de formes analysées")
self.spin_max_actives = wx.SpinCtrl(self, -1, "",size = (100,30), min=20, max=10000)
+ self.label_svd = wx.StaticText(self, -1, u"Méthode pour svd")
+ self.choicesvd = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, self.svdmethod, 0 )
+ self.label_patate = wx.StaticText(self, -1, u"Mode patate (+ rapide, - précis)")
+ self.check_patate = wx.CheckBox( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.DefaultSize, 0 )
#self.label_4 = wx.StaticText(self, -1, u"Configuration \ndes clés d'analyse")
#self.button_5 = wx.Button(self, wx.ID_PREFERENCES, "")
self.button_1 = wx.Button(self, wx.ID_CANCEL, "")
self.spin_ctrl_5.Disable()
self.spin_max_actives.SetValue(int(self.parametres['max_actives']))
self.spin_nbcl.SetValue(int(self.parametres['nbcl_p1']))
+ if 'svdmethod' in self.parametres :
+ self.choicesvd.SetSelection(self.svdmethod.index(self.parametres['svdmethod']))
+ else :
+ self.choicesvd.SetSelection(1)
+ if 'mode.patate' in self.parametres :
+ self.check_patate.SetValue(self.parametres['mode.patate'])
+ else :
+ self.check_patate.SetValue(False)
+
def __do_layout(self):
sizer_1 = wx.BoxSizer(wx.VERTICAL)
grid_sizer2.Add(wx.StaticLine(self), 0, wx.EXPAND | wx.ALL, 1)
grid_sizer2.Add(wx.StaticLine(self, -1), 0, wx.EXPAND | wx.ALL, 1)
- #grid_sizer2.Add(self.label_4, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0)
- #grid_sizer2.Add(self.button_5, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0)
- #grid_sizer2.Add(wx.StaticLine(self), 0, wx.EXPAND | wx.ALL, 1)
- #grid_sizer2.Add(wx.StaticLine(self, -1), 0, wx.EXPAND | wx.ALL, 1)
+ grid_sizer2.Add(self.label_svd, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0)
+ grid_sizer2.Add(self.choicesvd, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0)
+ grid_sizer2.Add(wx.StaticLine(self), 0, wx.EXPAND | wx.ALL, 1)
+ grid_sizer2.Add(wx.StaticLine(self, -1), 0, wx.EXPAND | wx.ALL, 1)
+
+ grid_sizer2.Add(self.label_patate, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0)
+ grid_sizer2.Add(self.check_patate, 0, wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 0)
+ grid_sizer2.Add(wx.StaticLine(self), 0, wx.EXPAND | wx.ALL, 1)
+ grid_sizer2.Add(wx.StaticLine(self, -1), 0, wx.EXPAND | wx.ALL, 1)
grid_button.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
grid_button.Add(self.button_2, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
#
-def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False, libsvdc_path = None, R_max_mem = False):
+def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'svdR', libsvdc = False, libsvdc_path = None, R_max_mem = False, mode_patate = False):
txt = """
source("%s")
source("%s")
txt += """
nbt <- %i
""" % nbt
- if libsvdc :
+ if svdmethod == 'svdlibc' and libsvdc :
txt += """
- libsvdc <- TRUE
+ svd.method <- 'svdlibc'
libsvdc.path <- "%s"
""" % ffr(libsvdc_path)
+ elif svdmethod == 'irlba' :
+ txt += """
+ library(irlba)
+ svd.method <- 'irlba'
+ libsvdc.path <- NULL
+ """
else :
txt += """
- libsvdc <- FALSE
+ svd.method = 'svdR'
libsvdc.path <- NULL
"""
-
+ if mode_patate :
+ txt += """
+ mode.patate = TRUE
+ """
+ else :
+ txt += """
+ mode.patate = FALSE
+ """
txt +="""
library(Matrix)
data1 <- readMM("%s")
data2 <- as(data2, "dgCMatrix")
row.names(data2) <- 1:nrow(data2)
""" % DicoPath['TableUc2']
- #log.info('ATTENTION ############# MODEPATATE ####################')
txt += """
- chd1<-CHD(data1, x = nbt, mode.patate = FALSE, libsvdc = libsvdc, libsvdc.path = libsvdc.path)
+ chd1<-CHD(data1, x = nbt, mode.patate = mode.patate, svd.method = svd.method, libsvdc.path = libsvdc.path)
"""
if classif_mode == 0:
txt += """
- chd2<-CHD(data2, x = nbt, libsvdc = libsvdc, libsvdc.path = libsvdc.path)
+ chd2<-CHD(data2, x = nbt, mode.patate = mode.patate, svd.method = svd.method, libsvdc.path = libsvdc.path)
"""
else:
txt += """
""" % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv'])
txt += """
dm <-readMM(dm.path)
- cn <- read.table(cn.path, sep=';', quote='"')
+ cn <- read.table(cn.path, sep='\t', quote='"')
colnames(dm) <- cn[,1]
sel.col <- read.csv2(selected.col)
dm <- dm[, sel.col[,1] + 1]
res
}
-CHD<-function(data.in, x=9, mode.patate = FALSE, libsvdc=FALSE, libsvdc.path=NULL){
+CHD<-function(data.in, x=9, mode.patate = FALSE, svd.method, libsvdc.path=NULL){
# sink('/home/pierre/workspace/iramuteq/dev/findchi2.txt')
dataori <- data.in
row.names(dataori) <- rownames(data.in)
#extraction du premier facteur de l'afc
print('afc')
pp('taille dtable dans boucle (col/row)',c(ncol(dtable),nrow(dtable)))
- afc<-boostana(dtable, nd=1, libsvdc=libsvdc, libsvdc.path=libsvdc.path)
+ afc<-boostana(dtable, nd=1, svd.method = svd.method, libsvdc.path=libsvdc.path)
pp('SV',afc$singular.values)
pp('V.P.', afc$eigen.values)
coordrow <- as.matrix(afc$row.scores[,1])
-print('NEW SVD')
#################################################################################
#http://www.mail-archive.com/rcpp-devel@lists.r-forge.r-project.org/msg01513.html
###################################################################################
#from anacor package
-boostana<-function (tab, ndim = 2, libsvdc = FALSE, libsvdc.path=NULL)
+boostana<-function (tab, ndim = 2, svd.method = 'svdR', libsvdc.path=NULL)
{
#tab <- as.matrix(tab)
if (ndim > min(dim(tab)) - 1)
z1 <- t(tab)/sqrt(c)
z2 <- tab/sqrt(r)
z <- t(z1) * z2
- if (libsvdc) {
+ if (svd.method == 'svdlibc') {
#START NEW SVD
z <- as(z, "dgCMatrix")
tmpmat <- tempfile(pattern='sparse')
print('do svd')
sv <- my.svd(z, qdim, qdim, libsvdc.path=libsvdc.path, sparse.path=tmpmat)
#END NEW SVD
- } else {
+ } else if (svd.method == 'svdR') {
print('start R svd')
sv <- svd(z, nu = qdim, nv = qdim)
print('end svd')
+ } else if (svd.method == 'irlba') {
+ print('irlba')
+ sv <- irlba(z, qdim, qdim)
+ print('end irlba')
}
sigmavec <- (sv$d)[2:qdim]
x <- ((sv$u)/sqrt(r))[, -1]
} else {
poids2<-poids1
}
-
+
+ print('croisement classif')
croise=matrix(ncol=tcl,nrow=tcl)
#production du tableau de contingence
for (i in 1:ncol(classeuce1)) {
#tabcolnames<-c(tabcolnames[(length(tabcolnames)-1)],tabcolnames[length(tabcolnames)])
tabrownames<-as.numeric(rownames(tablecroise))
#tabrownames<-c(tabrownames[(length(tabrownames)-1)],tabrownames[length(tabrownames)])
- for (k in (ncol(tablecroise)-1):ncol(tablecroise)) {
- for (l in (nrow(tablecroise)-1):nrow(tablecroise)) {
- croise[(tabrownames[l]-1),(tabcolnames[k]-1)]<-tablecroise[l,k]
+ for (k in (ncol(tablecroise)-1):ncol(tablecroise)) {
+ for (l in (nrow(tablecroise)-1):nrow(tablecroise)) {
+ croise[(tabrownames[l]-1),(tabcolnames[k]-1)]<-tablecroise[l,k]
+ }
}
- }
}
tablecroise
}
if (classif_mode == 0) {ind <- (nbcl * 2)} else {ind <- nbcl}
if (mincl==0){
mincl<-round(nrow(classeuce1)/ind)
- }#valeur a calculer nbuce/20
+ }
if (mincl<3){
mincl<-3
}
chicroise<-croise
for (i in 1:nrow(croise)) {
for (j in 1:ncol(croise)) {
- if (croise[i,j]==0) {
- chicroise[i,j]<-0
- } else if (croise[i,j]<mincl) {
- chicroise[i,j]<-0
- } else {
- chitable<-matrix(ncol=2,nrow=2)
- chitable[1,1]<-croise[i,j]
- chitable[1,2]<-poids1[i]-chitable[1,1]
- chitable[2,1]<-poids2[j]-chitable[1,1]
- chitable[2,2]<-nrow(classeuce1)-poids2[j]-chitable[1,2]
- chitest<-chisq.test(chitable,correct=FALSE)
- if ((chitable[1,1]-chitest$expected)<0) {
- chicroise[i,j]<--round(chitest$statistic,digits=7)
- } else {
- chicroise[i,j]<-round(chitest$statistic,digits=7)
+ if (croise[i,j]==0) {
+ chicroise[i,j]<-0
+ } else if (croise[i,j]<mincl) {
+ chicroise[i,j]<-0
+ } else {
+ chitable<-matrix(ncol=2,nrow=2)
+ chitable[1,1]<-croise[i,j]
+ chitable[1,2]<-poids1[i]-chitable[1,1]
+ chitable[2,1]<-poids2[j]-chitable[1,1]
+ chitable[2,2]<-nrow(classeuce1)-poids2[j]-chitable[1,2]
+ chitest<-chisq.test(chitable,correct=FALSE)
+ if ((chitable[1,1]-chitest$expected)<0) {
+ chicroise[i,j]<--round(chitest$statistic,digits=7)
+ } else {
+ chicroise[i,j]<-round(chitest$statistic,digits=7)
#print(chitest)
- }
- }
+ }
+ }
}
}
#print(chicroise)
}
testpres<-function(x,listcoord) {
for (i in 1:length(listcoord)) {
- if (x==listcoord[i]) {
- return(-1)
- } else {
- a<-1
- }
+ if (x==listcoord[i]) {
+ return(-1)
+ } else {
+ a<-1
+ }
}
a
}
listyp<-listy
listxp<-listx[first:length(listx)]
listxp<-c(listxp,listx[1:(first-1)])
- # listxp<-listxp[-first]
listyp<-listy[first:length(listy)]
listyp<-c(listyp,listy[1:(first-1)])
- # listyp<-listyp[-first]
for (i in 1:length(listxp)) {
- if (!(listxp[i]+1)%in%fillemere1) {
- if (!(listyp[i]+1)%in%fillemere2) {
- coordok<-rbind(coordok,c(listyp[i]+1,listxp[i]+1))
- fillemere1<-c(fillemere1,trouvefillemere(listxp[i]+1,chd2$n1))
- fillemere2<-c(fillemere2,trouvefillemere(listyp[i]+1,chd1$n1))
- }
+ if (!(listxp[i]+1)%in%fillemere1) {
+ if (!(listyp[i]+1)%in%fillemere2) {
+ coordok<-rbind(coordok,c(listyp[i]+1,listxp[i]+1))
+ fillemere1<-c(fillemere1,trouvefillemere(listxp[i]+1,chd2$n1))
+ fillemere2<-c(fillemere2,trouvefillemere(listyp[i]+1,chd1$n1))
+ }
}
}
coordok
#si plusieurs ensemble avec le meme nombre de classe, on conserve
#la liste avec le plus fort chi2
if (length(listcoordok)>1) {
- maxchi<-0
- best<-NULL
- for (i in 1:length(listcoordok)) {
- chi<-NULL
- uce<-NULL
- if (nrow(listcoordok[[i]])==maxcl) {
- for (j in 1:nrow(listcoordok[[i]])) {
- chi<-c(chi,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
- uce<-c(uce,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
+ maxchi<-0
+ best<-NULL
+ for (i in 1:length(listcoordok)) {
+ chi<-NULL
+ uce<-NULL
+ if (nrow(listcoordok[[i]])==maxcl) {
+ for (j in 1:nrow(listcoordok[[i]])) {
+ chi<-c(chi,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
+ uce<-c(uce,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
+ }
+ if (maxchi < sum(chi)) {
+ maxchi <- sum(chi)
+ suce <- sum(uce)
+ best <- i
+ }
}
- if (maxchi < sum(chi)) {
- maxchi<-sum(chi)
- suce<-sum(uce)
- best<-i
- }
}
- }
}
print((suce/nrow(classeuce1)*100))
listcoordok[[best]]
nchd2[which(nchd1[,ncol(nchd1)]!=nchd2[,ncol(nchd2)]),] <- 0
nchd1[which(nchd2[,ncol(nchd2)]==0),] <- 0
-# for (i in 1:nrow(nchd1)) {
-# if (nchd1[i,ncol(nchd1)]==0) {
-# nchd2[i,]<-nchd2[i,]*0
-# }
-# if (nchd1[i,ncol(nchd1)]!=nchd2[i,ncol(nchd2)]) {
-# nchd2[i,]<-nchd2[i,]*0
-# }
-# if (nchd2[i,ncol(nchd2)]==0) {
-# nchd1[i,]<-nchd1[i,]*0
-# }
-# }
print('fini croise')
elim<-which(nchd1[,ncol(nchd1)]==0)
keep<-which(nchd1[,ncol(nchd1)]!=0)
parametres['nbcl_p1'] = self.dial.spin_nbcl.GetValue()
parametres['max_actives'] = self.dial.spin_max_actives.GetValue()
parametres['corpus'] = ''
+ parametres['svdmethod'] = self.dial.svdmethod[self.dial.choicesvd.GetSelection()]
parametres['pathout'] = self.pathout.dirout
+ parametres['mode.patate'] = self.dial.check_patate.GetValue()
DoConf(self.parent.ConfigPath['alceste']).makeoptions(['ALCESTE'], [parametres])
self.dial.Destroy()
+ print parametres
return parametres
else :
self.dial.Destroy()
return None
def printRscript(self) :
- RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False)
+ RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, svdmethod = self.parametres['svdmethod'], libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False, mode_patate = self.parametres['mode.patate'])
return self.pathout['Rchdtxt']
def printRscript2(self) :
dlg.Destroy()
def CheckRPackages(self):
- listdep = ['ca', 'gee', 'ape', 'igraph','proxy', 'wordcloud', 'textometrieR']
+ listdep = ['ca', 'gee', 'ape', 'igraph','proxy', 'wordcloud', 'irlba', 'textometrieR']
nolib = []
i=0
dlg = wx.ProgressDialog("Test des librairies de R", "test en cours...", maximum = len(listdep), parent=self, style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT)
expressions = True
#nbre de classe terminale de la phase 1
nbcl_p1 = 10
+#methode pour svd
+svdmethod = irlba
+#mode patate (+ rapide et - precis)
+mode.patate = 0
\r
[IMAGE]\r
#non utilise
if error[1] is None :
error[1] = 'None'
parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
- #try :
- #raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
- BugReport(parent)
+ try :
+ raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
+ except :
+ BugReport(parent)
return False
- #except :
- # BugReport(parent)
else :
return True
else :
if pid != 0 :
- BugReport(parent)
+ try :
+ raise Exception(u'Erreur R')
+ except :
+ BugReport(parent)
return False
- #try :
- #raise Exception(u'Erreur R')
- #return False
- #except :
- # BugReport(parent)
else :
return True
if self.SysEncoding == 'mac-roman' : self.SysEncoding = 'MacRoman'
self.type = ''
+##############################################################@
+ self.DisEnSaveTabAs(False)
+ self.ShowMenu(_("View"), False)
+ self.ShowMenu(_("Spreadsheet analysis"), False)
+ self.ShowMenu(_("Text analysis"), False)
+
+ self._mgr.Update()
+
+ self.DataPop = False
+ self.DataTxt = False
+ self.Text = ''
+
+ self.lexique = None
+ self.corpus = None
+
+ def finish_init(self) :
try :
self.pref.read(self.ConfigPath['preferences'])
if IsNew(self) :
UpgradeConf(self)
self.pref.read(self.ConfigPath['preferences'])
New = True
+ self.sound = self.pref.getboolean('iramuteq', 'sound')
+ self.check_update = self.pref.getboolean('iramuteq', 'checkupdate')
+ self.version = ConfigGlob.get('DEFAULT', 'version')
#configuration des chemins de R
self.PathPath = ConfigParser()
self.PathPath.read(ConfigPath['path'])
if dlg.ShowModal() in [wx.ID_NO, wx.ID_CANCEL]:
evt.Veto()
dlg.Destroy()
- self.DataPop = False
- self.DataTxt = False
- self.Text = ''
- self.sound = self.pref.getboolean('iramuteq', 'sound')
- self.check_update = self.pref.getboolean('iramuteq', 'checkupdate')
- self.version = ConfigGlob.get('DEFAULT', 'version')
- self.lexique = None
- self.corpus = None
-##############################################################@
- self.DisEnSaveTabAs(False)
- self.ShowMenu(_("View"), False)
- self.ShowMenu(_("Spreadsheet analysis"), False)
- self.ShowMenu(_("Text analysis"), False)
-
- self._mgr.Update()
+
def OnVerif(self, evt) :
pack = CheckRPackages(self)
def ShowMain(self):
frame = IraFrame(None, -1, "IRaMuTeQ " + ConfigGlob.get('DEFAULT', 'version'), size=(1100, 800))
frame.Show()
+ frame.finish_init()
frame.Upgrade()
frame.OnOpenFromCmdl()
# if self.fc.IsRunning():
"""
if not self.paramsimi['keep_coord'] :
txt += """
- cn <- read.table("%s", sep=';', quote='"')
+ cn <- read.csv("%s", sep='\t', quote='"', headers = FALSE)
colnames(dm) <- cn[,1]
#colnames(dml) <- cn[,1]
""" % ffr(active_file)