X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=Rscripts%2Fchdtxt.R;h=0a6350a1b8d3620cb783495cf9cd31f7bcabf6bc;hp=436d0ba917e1c056b642f41a1b5a519554e122d5;hb=5f12c0323a321a9fc4f09f3720d77ce0d65d1c3a;hpb=42a67a41b64a6e0cc3fd2a63a0749e9aa4b9374c diff --git a/Rscripts/chdtxt.R b/Rscripts/chdtxt.R index 436d0ba..0a6350a 100644 --- a/Rscripts/chdtxt.R +++ b/Rscripts/chdtxt.R @@ -1,6 +1,6 @@ #Author: Pierre Ratinaud #Copyright (c) 2008-2009 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL #fonction pour la double classification @@ -60,20 +60,21 @@ nlf } getfille <- function(nlf, classe, pf) { - if (length(nlf[[classe]]) == 1) { + if (!length(nlf[[classe]])) { return(pf) } else { - pf <- c(pf, nlf[[classe]]) - c1 <- nlf[[classe]][1] - c2 <- nlf[[classe]][2] - pf <- getfille(nlf, c1, pf) - pf <- getfille(nlf, c2, pf) - } + for (cl in nlf[[classe]]) { + pf <- c(pf, cl) + if (cl <= length(nlf)) { + pf <- getfille(nlf, cl, pf) + } + } + } return(pf) } getmere <- function(list_mere, classe) { - i <- classe + i <- as.numeric(classe) pf <- NULL while (i != 1 ) { pf <- c(pf, list_mere[[i]]) @@ -86,6 +87,117 @@ getfillemere <- function(list_fille, list_mere, classe) { return(c(getfille(list_fille, classe, NULL), getmere(list_mere, classe))) } +getlength <- function(n1, clnb) { + colnb <- (clnb %/%2) + tab <- table(n1[,colnb]) + eff <- tab[which(names(tab) == as.character(clnb))] + return(eff) +} + + +find.terminales <- function(n1, list_mere, list_fille, mincl) { + tab <- table(n1[,ncol(n1)]) + clnames <- rownames(tab) + terminales <- clnames[which(tab >= mincl)] + tocheck <- setdiff(clnames, terminales) + if ("0" %in% terminales) { + terminales <- terminales[which(terminales != 0)] + } + if (length(terminales) == 0) { + return('no clusters') + } + if ("0" %in% tocheck) { + tocheck <- tocheck[which(tocheck != "0")] + } + print(terminales) + print(tocheck) + while (length(tocheck)!=0) { + for (val in tocheck) { + print(val) + mere <- list_mere[[as.numeric(val)]] + print('mere') + print(mere) + if (mere != 1) { + ln.mere <- getlength(n1, mere) + print('ln.mere') + print(ln.mere) + filles.mere <- getfille(list_fille, mere, NULL) + print('fille mere') + print(filles.mere) + filles.mere <- filles.mere[which(filles.mere != val)] + print(filles.mere) + if ((ln.mere >= mincl) & (length(intersect(filles.mere, tocheck)) == 0) & (length(intersect(filles.mere, terminales)) == 0 )) { + print('mere ok') + terminales <- c(terminales, mere) + for (f in c(filles.mere, val, mere)) { + tocheck <- tocheck[which(tocheck != f)] + } + } else if ((ln.mere >= mincl) & (length(intersect(filles.mere, terminales)) == 0) & (length(intersect(filles.mere, tocheck))!=0)){ + print('mere a checke cause fille ds tocheck') + tocheck <- tocheck[which(tocheck != val)] + tocheck <- c(mere, tocheck) + + } else { + print('pas ok on vire du check') + tocheck <- tocheck[which(tocheck != val)] + } + } else { + print('mere == 1') + tocheck <- tocheck[which(tocheck != val)] + } + print('tocheck') + print(tocheck) + } + print(tocheck) + } + terminales +} + +make.classes <- function(terminales, n1, tree, lf) { + term.n1 <- unique(n1[,ncol(n1)]) + tree.tip <- tree$tip.label + cl.n1 <- n1[,ncol(n1)] + classes <- rep(NA, nrow(n1)) + cl.names <- 1:length(terminales) + new.cl <- list() + for (i in 1:length(terminales)) { + if (terminales[i] %in% term.n1) { + classes[which(cl.n1==terminales[i])] <- cl.names[i] + new.cl[[terminales[i]]] <- cl.names[i] + tree.tip[which(tree.tip==terminales[i])] <- paste('a', cl.names[i], sep='') + } else { + filles <- getfille(lf, as.numeric(terminales[i]), NULL) + tochange <- intersect(filles, term.n1) + for (cl in tochange) { + classes[which(cl.n1==cl)] <- cl.names[i] + new.cl[[cl]] <- cl.names[i] + tree.tip[which(tree.tip==cl)] <- paste('a', cl.names[i], sep='') + } + } + } + make.tip <- function(x) { + if (substring(x,1,1)=='a') { + return(substring(x,2)) + } else { + return(0) + } + } + tree$tip.label <- tree.tip + ntree <- tree + tree$tip.label <- sapply(tree.tip, make.tip) + tovire <- sapply(tree.tip, function(x) {substring(x,1,1)!='a'}) + tovire <- which(tovire) + ntree <- drop.tip(ntree, tip=tovire) + en.double <- ntree$tip.label[duplicated(ntree$tip.label)] + en.double <- unique(en.double) + tovire <- sapply(en.double, function(x) {which(ntree$tip.label == x)[1]}) + ntree <- drop.tip(ntree, tip=tovire) + ntree$tip.label <- sapply(ntree$tip.label, function(x) {substring(x,2)}) + classes[which(is.na(classes))] <- 0 + res <- list(dendro_tot_cl = tree, tree.cl = ntree, n1=as.matrix(classes)) + res +} + #nbt nbcl = nbt+1 tcl=((nbt+1) *2) - 2 n1[,ncol(n1)], nchd1[,ncol(nchd1)] Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { #FIXME: le nombre de classe peut etre inferieur @@ -95,9 +207,10 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { classeuce1<-AssignClasseToUce(listuce1,chd1$n1) if (classif_mode==0) { classeuce2<-AssignClasseToUce(listuce2,chd2$n1) - } else { - classeuce2<-classeuce1 - } + } + #} else { + # classeuce2<-classeuce1 + #} #calcul des poids (effectifs) @@ -129,9 +242,9 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { if (classif_mode==0) { poids2<-vector(mode='integer',length = tcl) poids2<-makepoids(classeuce2,poids2) - } else { - poids2<-poids1 - } + }# else { + # poids2<-poids1 + #} print('croisement classif') @@ -156,14 +269,19 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { # } # croise # } - croise <- croiseeff( matrix(ncol=tcl,nrow=tcl), classeuce1, classeuce2) + if (classif_mode==0) { + croise <- croiseeff( matrix(ncol=tcl,nrow=tcl), classeuce1, classeuce2) + } else { + croise <- croiseeff( matrix(ncol=tcl,nrow=tcl), classeuce1, classeuce1) + } + #print(croise) if (classif_mode == 0) {ind <- (nbcl * 2)} else {ind <- nbcl} if (mincl==0){ mincl<-round(nrow(classeuce1)/ind) } - if (mincl<3){ - mincl<-3 - } + #if (mincl<3){ + # mincl<-3 + #} print(mincl) #print('table1') #print(croise) @@ -216,7 +334,39 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { } chicroise } - chicroise <- dochicroise(croise, mincl) + + dochicroisesimple <- function(croise, mincl) { + chicroise <- croise + for (i in 1:nrow(croise)) { + for (j in 1:ncol(croise)) { + if (croise[i,j]==0) { + chicroise[i,j]<-0 + } else if (croise[i,j] maxcl) { +# nb <- 1 +# # listcoordok <- list() +# listcoordok[[nb]] <- coordok +# # maxcl <- nrow(coordok) +# #} else if (nrow(coordok) == maxcl) { +# nb <- nb + 1 +# # listcoordok[[nb]] <- coordok +# #} +# } +# } + #print(listcoordok) listcoordok <- unique(listcoordok) print(listcoordok) best <- 1 @@ -384,8 +582,8 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { chi <- NULL uce <- NULL for (j in 1:nrow(listcoordok[[i]])) { - chi<-c(chi,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)]) - uce<-c(uce,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)]) + chi<-c(chi,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)]) + uce<-c(uce,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)]) } if (maxchi < sum(chi)) { maxchi <- sum(chi) @@ -437,7 +635,7 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { } } listcoordok<-unique(listcoordok) - print(listcoordok) + #print(listcoordok) #si plusieurs ensemble avec le meme nombre de classe, on conserve #la liste avec le plus fort chi2 if (length(listcoordok)>1) { @@ -469,7 +667,8 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { #findmaxclasse(listx,listy) #coordok<-trouvecoordok(1) #coordok <- oldfindbestcoord(listx, listy) - coordok <- findbestcoord(listx, listy) + print('begin bestcoord') + coordok <- findbestcoord(listx, listy, classif_mode, nbt) lfilletot<-function(classeuce,x) { @@ -481,7 +680,9 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { } print('listfille') listfille1<-lfilletot(classeuce1,1) - listfille2<-lfilletot(classeuce2,2) + if (classif_mode == 0) { + listfille2<-lfilletot(classeuce2,2) + } #utiliser rownames comme coordonnees dans un tableau de 0 Assignclasse<-function(classeuce,x) { @@ -497,20 +698,24 @@ Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) { nchd1<-Assignclasse(classeuce1,1) if (classif_mode==0) { nchd2<-Assignclasse(classeuce2,2) - } else { - nchd2<-nchd1 - } + } print('fini assign new classe') #croisep<-matrix(ncol=nrow(coordok),nrow=nrow(coordok)) - nchd2[which(nchd1[,ncol(nchd1)]==0),] <- 0 - nchd2[which(nchd1[,ncol(nchd1)]!=nchd2[,ncol(nchd2)]),] <- 0 - nchd1[which(nchd2[,ncol(nchd2)]==0),] <- 0 + if (classif_mode==0) { + nchd2[which(nchd1[,ncol(nchd1)]==0),] <- 0 + nchd2[which(nchd1[,ncol(nchd1)]!=nchd2[,ncol(nchd2)]),] <- 0 + nchd1[which(nchd2[,ncol(nchd2)]==0),] <- 0 + } print('fini croise') elim<-which(nchd1[,ncol(nchd1)]==0) keep<-which(nchd1[,ncol(nchd1)]!=0) n1<-nchd1[nchd1[,ncol(nchd1)]!=0,] - n2<-nchd2[nchd2[,ncol(nchd2)]!=0,] + if (classif_mode==0) { + n2<-nchd2[nchd2[,ncol(nchd2)]!=0,] + } else { + classeuce2 <- NULL + } #clnb<-nrow(coordok) print('fini') write.csv2(nchd1[,ncol(nchd1)],uceout)