From 13666be5de5eeffbe63774c3c0aecd407b519ac6 Mon Sep 17 00:00:00 2001 From: Pierre Date: Sun, 16 Dec 2012 18:04:57 +0100 Subject: [PATCH] AFC --- PrintRScript.py | 8 +- Rscripts/CHD.R | 206 ++++++++++++++++++++++++++------------------------- Rscripts/Rgraph.R | 21 ++++-- Rscripts/afc_graph.R | 134 +++++++++++++++++++++++++++++---- dialog.py | 57 +++++++++++--- iramuteq.py | 2 +- layout.py | 10 +++ 7 files changed, 303 insertions(+), 135 deletions(-) diff --git a/PrintRScript.py b/PrintRScript.py index f4e20a1..36fd860 100644 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -148,8 +148,9 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False data2 <- as(data2, "dgCMatrix") row.names(data2) <- 1:nrow(data2) """ % DicoPath['TableUc2'] + log.info('ATTENTION ############# MODEPATATE ####################') txt += """ - chd1<-CHD(data1, x = nbt, libsvdc = libsvdc, libsvdc.path = libsvdc.path) + chd1<-CHD(data1, x = nbt, mode.patate = TRUE, libsvdc = libsvdc, libsvdc.path = libsvdc.path) """ if classif_mode == 0: @@ -417,6 +418,9 @@ def write_afc_graph(self): if self.param['do_select_chi'] : do_select_chi = 'TRUE' else : do_select_chi = 'FALSE' + if self.param['do_select_chi_classe'] : do_select_chi_classe = 'TRUE' + else : do_select_chi_classe = 'FALSE' + if self.param['cex_txt'] : cex_txt = 'TRUE' else : cex_txt = 'FALSE' @@ -438,6 +442,8 @@ def write_afc_graph(self): self.param['select_nb'], \ do_select_chi, \ self.param['select_chi'], \ + do_select_chi_classe, \ + self.param['nbchic'], \ cex_txt, \ self.param['txt_min'], \ self.param['txt_max'], \ diff --git a/Rscripts/CHD.R b/Rscripts/CHD.R index fcec03f..049d5c7 100644 --- a/Rscripts/CHD.R +++ b/Rscripts/CHD.R @@ -41,7 +41,7 @@ find.max <- function(dtable, chitable, compte, rmax, maxinter, sc, TT) { res } -CHD<-function(data.in, x=9, libsvdc=FALSE, libsvdc.path=NULL){ +CHD<-function(data.in, x=9, mode.patate = FALSE, libsvdc=FALSE, libsvdc.path=NULL){ # sink('/home/pierre/workspace/iramuteq/dev/findchi2.txt') dataori <- data.in row.names(dataori) <- rownames(data.in) @@ -120,107 +120,109 @@ CHD<-function(data.in, x=9, libsvdc=FALSE, libsvdc.path=NULL){ ################################################################### # reclassement des individus # ################################################################### - malcl<-1000000000000 - it<-0 - listsub<-list() - #in boucle - ln <- which(dtable==1, arr.ind=TRUE) - lnz <- list() - lnz[1:nrow(dtable)] <- 0 - - for (k in 1:nrow(ln)) {lnz[[ln[k,1]]]<-append(lnz[[ln[k,1]]],ln[k,2])} - for (k in 1:nrow(dtable)) {lnz[[k]] <- lnz[[k]][-1]} - TT<-sum(dtable) - - while (malcl!=0 & N1>=5 & N2>=5) { - it<-it+1 - listsub[[it]]<-vector() - txt <- paste('nombre iteration', it) - #pp('nombre iteration',it) - vdelta<-vector() - #dtable[,'cl']<-cl - t1<-dtable[which(cl[,1]==clnb),]#[,-ncol(dtable)] - t2<-dtable[which(cl[,1]==clnb+1),]#[,-ncol(dtable)] - ncolt<-ncol(t1) - #pp('ncolt',ncolt) - - if (N1 != 1) { - sc1<-colSums(t1) - } else { - sc1 <- t1 - } - if (N2 != 1) { - sc2<-colSums(t2) - } else { - sc2 <- t2 - } - - sc<-sc1+sc2 - chtableori<-rbind(sc1,sc2) - chtable<-chtableori - interori<-MyChiSq(chtableori,sc,TT)/TT#chisq.test(chtableori)$statistic#/TT - txt <- paste(txt, ' - interori : ',interori) - #pp('interori',interori) - - N1<-nrow(t1) - N2<-nrow(t2) - - #pp('N1',N1) - #pp('N2',N2) - txt <- paste(txt, 'N1:', N1,'-N2:',N2) - print(txt) - compte <- 0 - for (l in lnz){ - chi.in<-chtable - compte <- compte + 1 - if(cl[compte]==clnb){ - chtable[1,l]<-chtable[1,l]-1 - chtable[2,l]<-chtable[2,l]+1 - }else{ - chtable[1,l]<-chtable[1,l]+1 - chtable[2,l]<-chtable[2,l]-1 - } - interswitch<-MyChiSq(chtable,sc,TT)/TT#chisq.test(chtable)$statistic/TT - ws<-interori-interswitch - - if (ws<0){ - interori<-interswitch - if(cl[compte]==clnb){ - #sc1<-chtable[1,] - #sc2<-chtable[2,] - cl[compte]<-clnb+1 - listsub[[it]]<-append(listsub[[it]],compte) - } else { - #sc1<-chtable[1,] - #sc2<-chtable[2,] - cl[compte]<-clnb - listsub[[it]]<-append(listsub[[it]],compte) - } - vdelta<-append(vdelta,compte) - } else { - chtable<-chi.in - } - } -# for (val in vdelta) { -# if (cl[val]==clnb) { -# cl[val]<-clnb+1 -# listsub[[it]]<-append(listsub[[it]],val) -# }else { -# cl[val]<-clnb -# listsub[[it]]<-append(listsub[[it]],val) -# } -# } - print('###################################') - print('longueur < 0') - malcl<-length(vdelta) - if ((it>1)&&(!is.logical(listsub[[it]]))&&(!is.logical(listsub[[it-1]]))){ - if (listsub[[it]]==listsub[[(it-1)]]){ - malcl<-0 - } - } - print(malcl) - print('###################################') - } + if (!mode.patate) { + malcl<-1000000000000 + it<-0 + listsub<-list() + #in boucle + ln <- which(dtable==1, arr.ind=TRUE) + lnz <- list() + lnz[1:nrow(dtable)] <- 0 + + for (k in 1:nrow(ln)) {lnz[[ln[k,1]]]<-append(lnz[[ln[k,1]]],ln[k,2])} + for (k in 1:nrow(dtable)) {lnz[[k]] <- lnz[[k]][-1]} + TT<-sum(dtable) + + while (malcl!=0 & N1>=5 & N2>=5) { + it<-it+1 + listsub[[it]]<-vector() + txt <- paste('nombre iteration', it) + #pp('nombre iteration',it) + vdelta<-vector() + #dtable[,'cl']<-cl + t1<-dtable[which(cl[,1]==clnb),]#[,-ncol(dtable)] + t2<-dtable[which(cl[,1]==clnb+1),]#[,-ncol(dtable)] + ncolt<-ncol(t1) + #pp('ncolt',ncolt) + + if (N1 != 1) { + sc1<-colSums(t1) + } else { + sc1 <- t1 + } + if (N2 != 1) { + sc2<-colSums(t2) + } else { + sc2 <- t2 + } + + sc<-sc1+sc2 + chtableori<-rbind(sc1,sc2) + chtable<-chtableori + interori<-MyChiSq(chtableori,sc,TT)/TT#chisq.test(chtableori)$statistic#/TT + txt <- paste(txt, ' - interori : ',interori) + #pp('interori',interori) + + N1<-nrow(t1) + N2<-nrow(t2) + + #pp('N1',N1) + #pp('N2',N2) + txt <- paste(txt, 'N1:', N1,'-N2:',N2) + print(txt) + compte <- 0 + for (l in lnz){ + chi.in<-chtable + compte <- compte + 1 + if(cl[compte]==clnb){ + chtable[1,l]<-chtable[1,l]-1 + chtable[2,l]<-chtable[2,l]+1 + }else{ + chtable[1,l]<-chtable[1,l]+1 + chtable[2,l]<-chtable[2,l]-1 + } + interswitch<-MyChiSq(chtable,sc,TT)/TT#chisq.test(chtable)$statistic/TT + ws<-interori-interswitch + + if (ws<0){ + interori<-interswitch + if(cl[compte]==clnb){ + #sc1<-chtable[1,] + #sc2<-chtable[2,] + cl[compte]<-clnb+1 + listsub[[it]]<-append(listsub[[it]],compte) + } else { + #sc1<-chtable[1,] + #sc2<-chtable[2,] + cl[compte]<-clnb + listsub[[it]]<-append(listsub[[it]],compte) + } + vdelta<-append(vdelta,compte) + } else { + chtable<-chi.in + } + } + # for (val in vdelta) { + # if (cl[val]==clnb) { + # cl[val]<-clnb+1 + # listsub[[it]]<-append(listsub[[it]],val) + # }else { + # cl[val]<-clnb + # listsub[[it]]<-append(listsub[[it]],val) + # } + # } + print('###################################') + print('longueur < 0') + malcl<-length(vdelta) + if ((it>1)&&(!is.logical(listsub[[it]]))&&(!is.logical(listsub[[it-1]]))){ + if (listsub[[it]]==listsub[[(it-1)]]){ + malcl<-0 + } + } + print(malcl) + print('###################################') + } + } #dtable<-cbind(dtable,'cl'=as.vector(cl)) #dtable[,'cl'] <-as.vector(cl) ####################################################################### diff --git a/Rscripts/Rgraph.R b/Rscripts/Rgraph.R index 061c1dd..1caf440 100644 --- a/Rscripts/Rgraph.R +++ b/Rscripts/Rgraph.R @@ -183,6 +183,15 @@ select_point_chi <- function(tablechi, chi_limit) { row_keep } +select.chi.classe <- function(tablechi, nb) { + rowkeep <- NULL + for (i in 1:ncol(tablechi)) { + rowkeep <- append(rowkeep,order(tablechi[,i], decreasing = TRUE)[1:nb]) + } + rowkeep <- unique(rowkeep) + rowkeep +} + #from summary.ca summary.ca.dm <- function(object, scree = TRUE, ...){ obj <- object @@ -296,7 +305,7 @@ create_afc_table <- function(x) { res } -make_afc_graph <- function(toplot,classes,clnb, xlab, ylab, cex.txt = NULL, leg = FALSE, cmd = FALSE, black = FALSE) { +make_afc_graph <- function(toplot, classes, clnb, xlab, ylab, cex.txt = NULL, leg = FALSE, cmd = FALSE, black = FALSE) { rain <- rainbow(clnb) compt <- 1 tochange <- NULL @@ -320,15 +329,11 @@ make_afc_graph <- function(toplot,classes,clnb, xlab, ylab, cex.txt = NULL, leg cl.color <- 'black' } plot(toplot[,1],toplot[,2], pch='', xlab = xlab, ylab = ylab) - abline(h=0,v=0, lty = 'dashed') - #print('ATTENTION Rgraph.R : utilisation de maptools !') - #library(maptools) + abline(h=0, v=0, lty = 'dashed') if (is.null(cex.txt)) - #pointLabel(toplot[,1],toplot[,2],rownames(toplot),col=cl.color) - text(toplot[,1],toplot[,2],rownames(toplot),col=cl.color) + text(toplot[,1],toplot[,2],rownames(toplot),col=cl.color, offset=0) else - #pointLabel(toplot[,1],toplot[,2],rownames(toplot),col=cl.color, cex = cex.txt) - text(toplot[,1],toplot[,2],rownames(toplot),col=cl.color, cex = cex.txt) + text(toplot[,1],toplot[,2],rownames(toplot),col=cl.color, cex = cex.txt, offset=0) if (!cmd) { dev.off() diff --git a/Rscripts/afc_graph.R b/Rscripts/afc_graph.R index 0c47dfa..cc70625 100644 --- a/Rscripts/afc_graph.R +++ b/Rscripts/afc_graph.R @@ -16,6 +16,8 @@ do.select.nb <- %s select.nb <- %i do.select.chi <- %s select.chi <- %i +do.select.chi.classe <- %s +ptbycluster <- %i cex.txt <- %s txt.min <- %i txt.max <- %i @@ -105,19 +107,19 @@ if ( qui == 3 ) { } } - if (over) { - rn <- rownames(table.in) - rownames(table.in) <- 1:nrow(table.in) - table.in <- unique(table.in) - rn.keep <- as.numeric(rownames(table.in)) - rownames(table.in) <- rn[rn.keep] - tablechi <- tablechi[rn.keep,] - if (qui==0) { - cex.par <- cex.par[rn.keep] - } else { - cex.par <- NULL - } - } +# if (over) { +# rn <- rownames(table.in) +# rownames(table.in) <- 1:nrow(table.in) +# table.in <- unique(table.in) +# rn.keep <- as.numeric(rownames(table.in)) +# rownames(table.in) <- rn[rn.keep] +# tablechi <- tablechi[rn.keep,] +# if (qui==0) { +# cex.par <- cex.par[rn.keep] +# } else { +# cex.par <- NULL +# } +# } if (do.select.nb) { if (select.nb > nrow(table.in)) select.nb <- nrow(table.in) row.keep <- select_point_nb(tablechi, select.nb) @@ -128,6 +130,10 @@ if ( qui == 3 ) { row.keep <- select_point_chi(tablechi, select.chi) table.in <- table.in[row.keep,] tablechi <- tablechi[row.keep,] + } else if (do.select.chi.classe) { + row.keep <- select.chi.classe(tablechi, ptbycluster) + table.in <- table.in[row.keep,] + tablechi <- tablechi[row.keep,] } else { row.keep <- 1:nrow(table.in) } @@ -147,11 +153,113 @@ if ( qui == 3 ) { } } +#################################################@@ +#from wordcloud +overlap <- function(x1, y1, sw1, sh1, boxes) { + use.r.layout <- FALSE + if(!use.r.layout) + return(.overlap(x1,y1,sw1,sh1,boxes)) + s <- 0 + if (length(boxes) == 0) + return(FALSE) + for (i in c(last,1:length(boxes))) { + bnds <- boxes[[i]] + x2 <- bnds[1] + y2 <- bnds[2] + sw2 <- bnds[3] + sh2 <- bnds[4] + if (x1 < x2) + overlap <- x1 + sw1 > x2-s + else + overlap <- x2 + sw2 > x1-s + + if (y1 < y2) + overlap <- overlap && (y1 + sh1 > y2-s) + else + overlap <- overlap && (y2 + sh2 > y1-s) + if(overlap){ + last <<- i + return(TRUE) + } + } + FALSE +} + +.overlap <- function(x11,y11,sw11,sh11,boxes1){ + .Call("is_overlap",x11,y11,sw11,sh11,boxes1) +} + +stopoverlap <- function(x, cex.par = NULL) { +#from wordcloud + library(wordcloud) + tails <- "g|j|p|q|y" + rot.per <- 0 + last <- 1 + thetaStep <- .1 + rStep <- .5 + toplot <- NULL + +# plot.new() + plot(x[,1],x[,2], pch='') + + words <- rownames(x) + if (is.null(cex.par)) { + size <- rep(0.9, nrow(x)) + } else { + size <- cex.par + } + #cols <- rainbow(clnb) + boxes <- list() + for (i in 1:nrow(x)) { + rotWord <- runif(1)sqrt(.5)){ + print(paste(words[i], "could not be fit on page. It will not be plotted.")) + isOverlaped <- FALSE + } + theta <- theta+thetaStep + r <- r + rStep*thetaStep/(2*pi) + x1 <- x[i,1]+r*cos(theta) + y1 <- x[i,2]+r*sin(theta) + } + } + } + row.names(toplot) <- words[toplot[,4]] + return(toplot) +} +############################################################################### + if (typegraph == 0) { open_file_graph(fileout, width = width, height = height) parcex <- taillecar/10 par(cex = parcex) + if (over) { + table.in <- stopoverlap(table.in, cex.par=cex.par) + classes <- classes[table.in[,4]] + cex.par <- cex.par[table.in[,4]] + } make_afc_graph(table.in, classes, clnb, xlab, ylab, cex.txt = cex.par) } else { diff --git a/dialog.py b/dialog.py index 1bdeedb..e97d19e 100755 --- a/dialog.py +++ b/dialog.py @@ -589,6 +589,10 @@ class PrefGraph(wx.Dialog): self.label_4 = wx.StaticText(self, -1, txt) self.check1 = wx.CheckBox(self, -1) self.spin_nb = wx.SpinCtrl(self, -1, '', size = (100,30), min=2, max=1000) + txt = u"""Prendre les x premiers points par classe""" + self.label_chic = wx.StaticText(self, -1, txt) + self.check_chic = wx.CheckBox(self, -1) + self.spin_nbchic = wx.SpinCtrl(self, -1, '', size = (100,30), min=2, max=1000) txt = u"""Limiter le nombre de points par le chi2 de liaison aux classes""" self.label_5 = wx.StaticText(self, -1, txt) @@ -643,6 +647,7 @@ au chi2 d'association de la forme""" self.Bind(wx.EVT_CHECKBOX, self.OnCheckTchi, self.check_tchi) self.Bind(wx.EVT_CHOICE, self.On3D, self.choicetype) self.Bind(wx.EVT_CHOICE, self.OnPass, self.choice2) + self.Bind(wx.EVT_CHECKBOX, self.OnCheckChic, self.check_chic) self.__set_properties() self.OnNorm(wx.EVT_CHECKBOX) self.OnCheckTchi(wx.EVT_CHECKBOX) @@ -663,14 +668,29 @@ au chi2 d'association de la forme""" self.spin3.SetValue(self.paramgraph['taillecar']) self.spin_nb.SetValue(self.paramgraph['select_nb']) self.spin_chi.SetValue(self.paramgraph['select_chi']) + self.spin_nbchic.SetValue(self.paramgraph['nbchic']) self.check1.SetValue(self.paramgraph['do_select_nb']) self.check2.SetValue(self.paramgraph['do_select_chi']) + self.check_chic.SetValue(self.paramgraph['do_select_chi_classe']) self.check3.SetValue(self.paramgraph['over']) if self.paramgraph['do_select_nb'] : self.spin_nb.Enable(True) self.spin_chi.Enable(False) + self.spin_nbchic.Enable(False) + elif self.paramgraph['do_select_chi_classe'] : + self.spin_nb.Enable(False) + self.spin_chi.Enable(False) + self.spin_nbchic.Enable(True) + elif self.paramgraph['do_select_chi'] : + self.spin_nb.Enable(False) + self.spin_chi.Enable(True) + self.spin_nbchic.Enable(False) else : self.spin_nb.Enable(False) + self.spin_chi.Enable(False) + self.spin_nbchic.Enable(False) + + self.check4.SetValue(self.paramgraph['cex_txt']) self.spin_min.SetValue(self.paramgraph['txt_min']) self.spin_max.SetValue(self.paramgraph['txt_max']) @@ -678,11 +698,6 @@ au chi2 d'association de la forme""" self.spin_min_tchi.SetValue(self.paramgraph['tchi_min']) self.spin_max_tchi.SetValue(self.paramgraph['tchi_max']) - if self.paramgraph['do_select_chi'] : - self.spin_nb.Enable(False) - self.spin_chi.Enable(True) - else : - self.spin_chi.Enable(False) self.spin_f1.SetValue(self.paramgraph['facteur'][0]) self.spin_f2.SetValue(self.paramgraph['facteur'][1]) self.spin_f3.SetValue(self.paramgraph['facteur'][2]) @@ -739,6 +754,14 @@ au chi2 d'association de la forme""" fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) + fsizer.Add(self.label_chic, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5) + sizer_nbchic = wx.BoxSizer(wx.HORIZONTAL) + sizer_nbchic.Add(self.check_chic, 0, wx.ALL | wx.ALIGN_RIGHT | wx.ALIGN_CENTER_VERTICAL, 5) + sizer_nbchic.Add(self.spin_nbchic, 0, wx.ALL | wx.ALIGN_RIGHT | wx.ALIGN_CENTER_VERTICAL, 5) + fsizer.Add(sizer_nbchic, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5) + fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) + fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) + fsizer.Add(self.label_5, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5) sizer_chi = wx.BoxSizer(wx.HORIZONTAL) sizer_chi.Add(self.check2, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5) @@ -811,22 +834,36 @@ au chi2 d'association de la forme""" sizer_2.Fit(self) self.Layout() - def OnCheck1(self,event): + def OnCheck1(self, event): if self.check1.GetValue() : self.check2.SetValue(False) + self.check_chic.SetValue(False) self.spin_chi.Enable(False) self.spin_nb.Enable(True) + self.spin_nbchic.Enable(False) else : self.spin_nb.Enable(False) - def OnCheck2(self,event): + def OnCheck2(self, event): if self.check2.GetValue() : - self.check1.SetValue(False) - self.spin_nb.Enable(False) - self.spin_chi.Enable(True) + self.check1.SetValue(False) + self.check_chic.SetValue(False) + self.spin_chi.Enable(True) + self.spin_nb.Enable(False) + self.spin_nbchic.Enable(False) else : self.spin_chi.Enable(False) + def OnCheckChic(self, event) : + if self.check_chic.GetValue() : + self.check1.SetValue(False) + self.check2.SetValue(False) + self.spin_chi.Enable(False) + self.spin_nb.Enable(False) + self.spin_nbchic.Enable(True) + else : + self.spin_nbchic.Enable(False) + def OnNorm(self, event): if not self.check4.GetValue() : self.spin_min.Disable() diff --git a/iramuteq.py b/iramuteq.py index 699bde5..afe0966 100644 --- a/iramuteq.py +++ b/iramuteq.py @@ -1096,7 +1096,7 @@ class MySplashScreen(wx.SplashScreen): bmp = wx.Image(os.path.join(ImagePath, 'splash.png')).ConvertToBitmap() wx.SplashScreen.__init__(self, bmp, wx.SPLASH_CENTRE_ON_SCREEN | wx.SPLASH_TIMEOUT, - 3000, None, -1) + 1000, None, -1) self.Bind(wx.EVT_CLOSE, self.OnClose) self.fc = wx.FutureCall(1500, self.ShowMain) diff --git a/layout.py b/layout.py index 0211668..0e89647 100644 --- a/layout.py +++ b/layout.py @@ -65,8 +65,10 @@ class GraphPanelAfc(wx.Panel): 'qui' : 0, 'do_select_nb' : 0, 'do_select_chi' : 0, + 'do_select_chi_classe' : 0, 'select_nb' : 50, 'select_chi' : 4, + 'nbchic' : 30, 'over' : 0, 'cex_txt' : 0, 'txt_min' : 5, @@ -116,8 +118,10 @@ class GraphPanelAfc(wx.Panel): 'qui' : dial.choice2.GetSelection(), 'do_select_nb' : dial.check1.GetValue(), 'do_select_chi' : dial.check2.GetValue(), + 'do_select_chi_classe' : dial.check_chic.GetValue(), 'select_nb' : dial.spin_nb.GetValue(), 'select_chi' : dial.spin_chi.GetValue(), + 'nbchic' : dial.spin_nbchic.GetValue(), 'over' : dial.check3.GetValue(), 'cex_txt' : dial.check4.GetValue(), 'txt_min' : dial.spin_min.GetValue(), @@ -142,6 +146,12 @@ class GraphPanelAfc(wx.Panel): afc <- afcf afc_table <- afcf_table chistabletot <- specfp + infp <- which(is.infinite(chistabletot) & chistabletot > 0) + infm <- which(is.infinite(chistabletot) & chistabletot < 0) + chistabletot[infp] <- 0 + chistabletot[infm] <- 0 + chistabletot[infp] <- max(chistabletot) + 1 + chistabletot[infm] <- min(chistabletot) - 1 """ elif self.itempath == 'liste_graph_afct' : txt +=""" -- 2.7.4