X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=Rscripts%2Fchdfunct.R;h=a5034b6649562dbfb94786dbdb8785b418d90b0c;hp=a2dc502e3bf2c5bf161af037ec1554163abe724a;hb=43c6fca06a3c58a789548c4b306d54e03a94b1c4;hpb=7fb5b2b86f6c9a0617208ee85211177c23d12f47 diff --git a/Rscripts/chdfunct.R b/Rscripts/chdfunct.R index a2dc502..a5034b6 100644 --- a/Rscripts/chdfunct.R +++ b/Rscripts/chdfunct.R @@ -49,7 +49,7 @@ PrintProfile<- function(dataclasse,profileactlist,profileetlist,antiproact,antip cltot<-as.data.frame(as.character(cltot[,ncol(cltot)])) tot<-nrow(cltot) classes<-as.data.frame(as.character(dataclasse[,ncol(dataclasse)])) - classes.s<-as.data.frame(summary(cltot[,1],maxsum=500)) + classes.s<-as.data.frame(summary(as.factor(cltot[,1]),maxsum=500)) profile<-rbind(profile,c('***','nb classes',clusternb,'***','','')) antipro<-rbind(antipro,c('***','nb classes',clusternb,'***','','')) for(i in 1:clusternb) { @@ -98,12 +98,13 @@ AddCorrelationOk<-function(afc) { rowcoord<-afc$rowcoord colcoord<-afc$colcoord factor <- ncol(rowcoord) + hypo<-function(rowcoord,ligne) { somme<-0 for (i in 1:factor) { somme<-somme+(rowcoord[ligne,i])^2 } - sqrt(somme) + sqrt(somme) } cor<-function(d,hypo) { d/hypo @@ -117,8 +118,17 @@ AddCorrelationOk<-function(afc) { } out } - afc$rowcrl<-CompCrl(rowcoord) - afc$colcrl<-CompCrl(colcoord) + + get.corr <- function(rowcol) { + sqrowcol <- rowcol^2 + sqrowcol <- sqrt(rowSums(sqrowcol)) + corr <- rowcol / sqrowcol + corr + } + #afc$rowcrl<-CompCrl(rowcoord) + afc$rowcrl <- get.corr(rowcoord) + #afc$colcrl<-CompCrl(colcoord) + afc$colcrl<-get.corr(colcoord) afc } @@ -294,8 +304,8 @@ AsLexico2<- function(mat, chip = FALSE) { } make.spec.hypergeo <- function(mat) { - library(textometrieR) - spec <- specificites(mat) + library(textometry) + spec <- specificities(mat) sumcol<-colSums(mat) eff_relatif<-round(t(apply(mat,1,function(x) {(x/t(as.matrix(sumcol))*1000)})),2) colnames(eff_relatif) <- colnames(mat) @@ -321,7 +331,105 @@ BuildProf01<-function(x,classes) { mat } +build.prof.tgen <- function(x) { + nbst <- sum(x[nrow(x),]) + totcl <- x[nrow(x),] + tottgen <- rowSums(x) + nbtgen <- nrow(x) - 1 + chi2 <- x[1:(nrow(x)-1),] + pchi2 <- chi2 + for (classe in 1:ncol(x)) { + for (tg in 1:nbtgen) { + cont <- c(x[tg, classe], tottgen[tg] - x[tg, classe], totcl[classe] - x[tg, classe], (nbst - totcl[classe]) - (tottgen[tg] - x[tg, classe])) + cont <- matrix(unlist(cont), nrow=2) + chiresult<-chisq.test(cont,correct=FALSE) + if (is.na(chiresult$p.value)) { + chiresult$p.value<-1 + chiresult$statistic<-0 + } + if (chiresult$expected[1,1] > cont[1,1]) { + chiresult$statistic <- chiresult$statistic * -1 + } + chi2[tg,classe] <- chiresult$statistic + pchi2[tg,classe] <- chiresult$p.value + } + } + res <- list(chi2 = chi2, pchi2 = pchi2) +} + + +new.build.prof <- function(x,dataclasse,clusternb,lim=2) { + cl <- dataclasse[,ncol(dataclasse)] + nst <- length(which(cl != 0)) + rs <- rowSums(x) + mod.names<-rownames(x) + lnbligne <- list() + lchi <- list() + prof <- list() + aprof <- list() + for (classe in 1:clusternb) { + lnbligne[[classe]]<-length(which(cl==classe)) + tmpprof <- data.frame() + tmpanti <- data.frame() + obs1 <- x[,classe] #1,1 + obs2 <- rs - obs1 #1,2 + obs3 <- lnbligne[[classe]] - obs1 #2,1 + obs4 <- nst - (obs1 + obs2 + obs3) #2,2 + exp1 <- ((obs1 + obs3) * (obs1 + obs2)) / nst + exp2 <- ((obs2 + obs1) * (obs2 + obs4)) / nst + exp3 <- ((obs3 + obs4) * (obs3 + obs1)) / nst + exp4 <- ((obs4 + obs3) * (obs4 + obs2)) / nst + chi1 <- ((obs1 - exp1)^2) / exp1 + chi2 <- ((obs2 - exp2)^2) / exp2 + chi3 <- ((obs3 - exp3)^2) / exp3 + chi4 <- ((obs4 - exp4)^2) / exp4 + chi <- chi1 + chi2 + chi3 + chi4 + chi[which(is.na(chi)==T)] <- 0 + tochange <- ifelse(obs1 > exp1, 1, -1) + lchi[[classe]] <- chi * tochange + tokeep <- which(lchi[[classe]] > lim) + if (length(tokeep)) { + tmpprof[1:length(tokeep),1] <- obs1[tokeep] + tmpprof[,2] <- rs[tokeep] + tmpprof[,3] <- round((obs1/rs)*100, digits=2)[tokeep] + tmpprof[,4] <- round(lchi[[classe]], digits=3)[tokeep] + tmpprof[,5] <- mod.names[tokeep] + tmpprof[,6] <- pchisq(lchi[[classe]] ,1, lower.tail=F)[tokeep] + } + prof[[classe]] <- tmpprof + toanti <- which(lchi[[classe]] < -lim) + if (length(toanti)) { + tmpanti[1:length(toanti),1] <- obs1[toanti] + tmpanti[,2] <- rs[toanti] + tmpanti[,3] <- round((obs1/rs)*100, digits=2)[toanti] + tmpanti[,4] <- round(lchi[[classe]], digits=3)[toanti] + tmpanti[,5] <- mod.names[toanti] + tmpanti[,6] <- pchisq(-lchi[[classe]] ,1, lower.tail=F)[toanti] + } + aprof[[classe]] <- tmpanti + if (length(prof[[classe]])!=0) { + prof[[classe]]<-prof[[classe]][order(prof[[classe]][,4],decreasing=TRUE),] + } + if (length(aprof[[classe]])!=0) { + aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),] + } + } + tablechi <- do.call(cbind, lchi) + tablep <- pchisq(tablechi,1, lower.tail=F) + tablep <- round(tablep, digits=3) + tablechi <- round(tablechi, digits=3) + out <- list() + out[[1]] <- tablep + out[[2]] <- tablechi + out[[3]] <- cbind(x, rowSums(x)) + out[[4]] <- prof + out[[5]] <- aprof + out +} + + BuildProf<- function(x,dataclasse,clusternb,lim=2) { + print('build prof') #### #r.names<-rownames(x) #x<-as.matrix(x) @@ -427,6 +535,7 @@ BuildProf<- function(x,dataclasse,clusternb,lim=2) { aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),] } } + print('fini build prof') output<-list() output[[1]]<-tablep output[[2]]<-tablesqr