X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=Rscripts%2Fchdfunct.R;h=a5034b6649562dbfb94786dbdb8785b418d90b0c;hp=2ca1eac603ae3d2af1ea109b5da4719d343f06ea;hb=43c6fca06a3c58a789548c4b306d54e03a94b1c4;hpb=8fa853a25a9d62b1446e1bc543e5a3a4d0e03dcf diff --git a/Rscripts/chdfunct.R b/Rscripts/chdfunct.R index 2ca1eac..a5034b6 100644 --- a/Rscripts/chdfunct.R +++ b/Rscripts/chdfunct.R @@ -49,7 +49,7 @@ PrintProfile<- function(dataclasse,profileactlist,profileetlist,antiproact,antip cltot<-as.data.frame(as.character(cltot[,ncol(cltot)])) tot<-nrow(cltot) classes<-as.data.frame(as.character(dataclasse[,ncol(dataclasse)])) - classes.s<-as.data.frame(summary(cltot[,1],maxsum=500)) + classes.s<-as.data.frame(summary(as.factor(cltot[,1]),maxsum=500)) profile<-rbind(profile,c('***','nb classes',clusternb,'***','','')) antipro<-rbind(antipro,c('***','nb classes',clusternb,'***','','')) for(i in 1:clusternb) { @@ -98,12 +98,13 @@ AddCorrelationOk<-function(afc) { rowcoord<-afc$rowcoord colcoord<-afc$colcoord factor <- ncol(rowcoord) + hypo<-function(rowcoord,ligne) { somme<-0 for (i in 1:factor) { somme<-somme+(rowcoord[ligne,i])^2 } - sqrt(somme) + sqrt(somme) } cor<-function(d,hypo) { d/hypo @@ -117,8 +118,17 @@ AddCorrelationOk<-function(afc) { } out } - afc$rowcrl<-CompCrl(rowcoord) - afc$colcrl<-CompCrl(colcoord) + + get.corr <- function(rowcol) { + sqrowcol <- rowcol^2 + sqrowcol <- sqrt(rowSums(sqrowcol)) + corr <- rowcol / sqrowcol + corr + } + #afc$rowcrl<-CompCrl(rowcoord) + afc$rowcrl <- get.corr(rowcoord) + #afc$colcrl<-CompCrl(colcoord) + afc$colcrl<-get.corr(colcoord) afc } @@ -293,95 +303,12 @@ AsLexico2<- function(mat, chip = FALSE) { out } - -#from textometrieR -#http://txm.sourceforge.net/doc/R/textometrieR-package.html -#Sylvain Loiseau -specificites.probabilities <- function (lexicaltable, types = NULL, parts = NULL) -{ - rowMargin <- rowSums(lexicaltable) - colMargin <- colSums(lexicaltable) - F <- sum(lexicaltable) - if (!is.null(types)) { - if (is.character(types)) { - if (is.null(rownames(lexicaltable))) - stop("The lexical table has no row names and the \"types\" argument is a character vector.") - if (!all(types %in% rownames(lexicaltable))) - stop(paste("Some requested types are not known in the lexical table: ", - paste(types[!(types %in% rownames(lexicaltable))], - collapse = " "))) - } - else { - if (any(types < 1)) - stop("The row index must be greater than 0.") - if (max(types) > nrow(lexicaltable)) - stop("Row index must be smaller than the number of rows.") - } - lexicaltable <- lexicaltable[types, , drop = FALSE] - rowMargin <- rowMargin[types] - } - if (!is.null(parts)) { - if (is.character(parts)) { - if (is.null(colnames(lexicaltable))) - stop("The lexical table has no col names and the \"parts\" argument is a character vector.") - if (!all(parts %in% colnames(lexicaltable))) - stop(paste("Some requested parts are not known in the lexical table: ", - paste(parts[!(parts %in% colnames(lexicaltable))], - collapse = " "))) - } - else { - if (max(parts) > ncol(lexicaltable)) - stop("Column index must be smaller than the number of cols.") - if (any(parts < 1)) - stop("The col index must be greater than 0.") - } - lexicaltable <- lexicaltable[, parts, drop = FALSE] - colMargin <- colMargin[parts] - } - if (nrow(lexicaltable) == 0 | ncol(lexicaltable) == 0) { - stop("The lexical table must contains at least one row and one column.") - } - specif <- matrix(0, nrow = nrow(lexicaltable), ncol = ncol(lexicaltable)) - for (i in 1:ncol(lexicaltable)) { - whiteDrawn <- lexicaltable[, i] - white <- rowMargin - black <- F - white - drawn <- colMargin[i] - independance <- (white * drawn)/F - specif_negative <- whiteDrawn < independance - specif_positive <- whiteDrawn >= independance - specif[specif_negative, i] <- phyper(whiteDrawn[specif_negative], - white[specif_negative], black[specif_negative], drawn) - specif[specif_positive, i] <- phyper(whiteDrawn[specif_positive] - - 1, white[specif_positive], black[specif_positive], - drawn) - } - dimnames(specif) <- dimnames(lexicaltable) - return(specif) -} - -#from textometrieR -#http://txm.sourceforge.net/doc/R/textometrieR-package.html -#Sylvain Loiseau -specificites <- function (lexicaltable, types = NULL, parts = NULL) -{ - spe <- specificites.probabilities(lexicaltable, types, parts) - spelog <- matrix(0, nrow = nrow(spe), ncol = ncol(spe)) - spelog[spe < 0.5] <- log10(spe[spe < 0.5]) - spelog[spe > 0.5] <- abs(log10(1 - spe[spe > 0.5])) - spelog[spe == 0.5] <- 0 - spelog[is.infinite(spe)] <- 0 - spelog <- round(spelog, digits = 4) - rownames(spelog) <- rownames(spe) - colnames(spelog) <- colnames(spe) - return(spelog) -} - make.spec.hypergeo <- function(mat) { - #library(textometrieR) - spec <- specificites(mat) + library(textometry) + spec <- specificities(mat) sumcol<-colSums(mat) eff_relatif<-round(t(apply(mat,1,function(x) {(x/t(as.matrix(sumcol))*1000)})),2) + colnames(eff_relatif) <- colnames(mat) out <-list() out[[1]]<-spec out[[3]]<-eff_relatif @@ -404,7 +331,105 @@ BuildProf01<-function(x,classes) { mat } +build.prof.tgen <- function(x) { + nbst <- sum(x[nrow(x),]) + totcl <- x[nrow(x),] + tottgen <- rowSums(x) + nbtgen <- nrow(x) - 1 + chi2 <- x[1:(nrow(x)-1),] + pchi2 <- chi2 + for (classe in 1:ncol(x)) { + for (tg in 1:nbtgen) { + cont <- c(x[tg, classe], tottgen[tg] - x[tg, classe], totcl[classe] - x[tg, classe], (nbst - totcl[classe]) - (tottgen[tg] - x[tg, classe])) + cont <- matrix(unlist(cont), nrow=2) + chiresult<-chisq.test(cont,correct=FALSE) + if (is.na(chiresult$p.value)) { + chiresult$p.value<-1 + chiresult$statistic<-0 + } + if (chiresult$expected[1,1] > cont[1,1]) { + chiresult$statistic <- chiresult$statistic * -1 + } + chi2[tg,classe] <- chiresult$statistic + pchi2[tg,classe] <- chiresult$p.value + } + } + res <- list(chi2 = chi2, pchi2 = pchi2) +} + + +new.build.prof <- function(x,dataclasse,clusternb,lim=2) { + cl <- dataclasse[,ncol(dataclasse)] + nst <- length(which(cl != 0)) + rs <- rowSums(x) + mod.names<-rownames(x) + lnbligne <- list() + lchi <- list() + prof <- list() + aprof <- list() + for (classe in 1:clusternb) { + lnbligne[[classe]]<-length(which(cl==classe)) + tmpprof <- data.frame() + tmpanti <- data.frame() + obs1 <- x[,classe] #1,1 + obs2 <- rs - obs1 #1,2 + obs3 <- lnbligne[[classe]] - obs1 #2,1 + obs4 <- nst - (obs1 + obs2 + obs3) #2,2 + exp1 <- ((obs1 + obs3) * (obs1 + obs2)) / nst + exp2 <- ((obs2 + obs1) * (obs2 + obs4)) / nst + exp3 <- ((obs3 + obs4) * (obs3 + obs1)) / nst + exp4 <- ((obs4 + obs3) * (obs4 + obs2)) / nst + chi1 <- ((obs1 - exp1)^2) / exp1 + chi2 <- ((obs2 - exp2)^2) / exp2 + chi3 <- ((obs3 - exp3)^2) / exp3 + chi4 <- ((obs4 - exp4)^2) / exp4 + chi <- chi1 + chi2 + chi3 + chi4 + chi[which(is.na(chi)==T)] <- 0 + tochange <- ifelse(obs1 > exp1, 1, -1) + lchi[[classe]] <- chi * tochange + tokeep <- which(lchi[[classe]] > lim) + if (length(tokeep)) { + tmpprof[1:length(tokeep),1] <- obs1[tokeep] + tmpprof[,2] <- rs[tokeep] + tmpprof[,3] <- round((obs1/rs)*100, digits=2)[tokeep] + tmpprof[,4] <- round(lchi[[classe]], digits=3)[tokeep] + tmpprof[,5] <- mod.names[tokeep] + tmpprof[,6] <- pchisq(lchi[[classe]] ,1, lower.tail=F)[tokeep] + } + prof[[classe]] <- tmpprof + toanti <- which(lchi[[classe]] < -lim) + if (length(toanti)) { + tmpanti[1:length(toanti),1] <- obs1[toanti] + tmpanti[,2] <- rs[toanti] + tmpanti[,3] <- round((obs1/rs)*100, digits=2)[toanti] + tmpanti[,4] <- round(lchi[[classe]], digits=3)[toanti] + tmpanti[,5] <- mod.names[toanti] + tmpanti[,6] <- pchisq(-lchi[[classe]] ,1, lower.tail=F)[toanti] + } + aprof[[classe]] <- tmpanti + if (length(prof[[classe]])!=0) { + prof[[classe]]<-prof[[classe]][order(prof[[classe]][,4],decreasing=TRUE),] + } + if (length(aprof[[classe]])!=0) { + aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),] + } + } + tablechi <- do.call(cbind, lchi) + tablep <- pchisq(tablechi,1, lower.tail=F) + tablep <- round(tablep, digits=3) + tablechi <- round(tablechi, digits=3) + out <- list() + out[[1]] <- tablep + out[[2]] <- tablechi + out[[3]] <- cbind(x, rowSums(x)) + out[[4]] <- prof + out[[5]] <- aprof + out +} + + BuildProf<- function(x,dataclasse,clusternb,lim=2) { + print('build prof') #### #r.names<-rownames(x) #x<-as.matrix(x) @@ -510,6 +535,7 @@ BuildProf<- function(x,dataclasse,clusternb,lim=2) { aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),] } } + print('fini build prof') output<-list() output[[1]]<-tablep output[[2]]<-tablesqr