X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=Rscripts%2Fchdfunct.R;h=a5034b6649562dbfb94786dbdb8785b418d90b0c;hp=2ca1eac603ae3d2af1ea109b5da4719d343f06ea;hb=43c6fca06a3c58a789548c4b306d54e03a94b1c4;hpb=8fa853a25a9d62b1446e1bc543e5a3a4d0e03dcf

diff --git a/Rscripts/chdfunct.R b/Rscripts/chdfunct.R
index 2ca1eac..a5034b6 100644
--- a/Rscripts/chdfunct.R
+++ b/Rscripts/chdfunct.R
@@ -49,7 +49,7 @@ PrintProfile<- function(dataclasse,profileactlist,profileetlist,antiproact,antip
 	cltot<-as.data.frame(as.character(cltot[,ncol(cltot)]))
 	tot<-nrow(cltot)
     classes<-as.data.frame(as.character(dataclasse[,ncol(dataclasse)]))
-    classes.s<-as.data.frame(summary(cltot[,1],maxsum=500))
+    classes.s<-as.data.frame(summary(as.factor(cltot[,1]),maxsum=500))
     profile<-rbind(profile,c('***','nb classes',clusternb,'***','',''))
     antipro<-rbind(antipro,c('***','nb classes',clusternb,'***','',''))
     for(i in 1:clusternb) {
@@ -98,12 +98,13 @@ AddCorrelationOk<-function(afc) {
 	rowcoord<-afc$rowcoord
 	colcoord<-afc$colcoord
 	factor <- ncol(rowcoord)
+	
 	hypo<-function(rowcoord,ligne) {
 		somme<-0
 		for (i in 1:factor) {
 			somme<-somme+(rowcoord[ligne,i])^2
 		}
-	sqrt(somme)
+		sqrt(somme)
 	}
 	cor<-function(d,hypo) {
 		d/hypo
@@ -117,8 +118,17 @@ AddCorrelationOk<-function(afc) {
 		}
 	out
 	}
-	afc$rowcrl<-CompCrl(rowcoord)
-	afc$colcrl<-CompCrl(colcoord)
+
+	get.corr <- function(rowcol) {
+		sqrowcol <- rowcol^2
+		sqrowcol <- sqrt(rowSums(sqrowcol))
+		corr <- rowcol / sqrowcol
+		corr
+	}
+	#afc$rowcrl<-CompCrl(rowcoord)
+	afc$rowcrl <- get.corr(rowcoord)
+	#afc$colcrl<-CompCrl(colcoord)
+	afc$colcrl<-get.corr(colcoord)
 	afc
 }
 
@@ -293,95 +303,12 @@ AsLexico2<- function(mat, chip = FALSE) {
     out
 }
 
-
-#from textometrieR
-#http://txm.sourceforge.net/doc/R/textometrieR-package.html
-#Sylvain Loiseau
-specificites.probabilities <- function (lexicaltable, types = NULL, parts = NULL) 
-{
-    rowMargin <- rowSums(lexicaltable)
-    colMargin <- colSums(lexicaltable)
-    F <- sum(lexicaltable)
-    if (!is.null(types)) {
-        if (is.character(types)) {
-            if (is.null(rownames(lexicaltable))) 
-                stop("The lexical table has no row names and the \"types\" argument is a character vector.")
-            if (!all(types %in% rownames(lexicaltable))) 
-                stop(paste("Some requested types are not known in the lexical table: ", 
-                  paste(types[!(types %in% rownames(lexicaltable))], 
-                    collapse = " ")))
-        }
-        else {
-            if (any(types < 1)) 
-                stop("The row index must be greater than 0.")
-            if (max(types) > nrow(lexicaltable)) 
-                stop("Row index must be smaller than the number of rows.")
-        }
-        lexicaltable <- lexicaltable[types, , drop = FALSE]
-        rowMargin <- rowMargin[types]
-    }
-    if (!is.null(parts)) {
-        if (is.character(parts)) {
-            if (is.null(colnames(lexicaltable))) 
-                stop("The lexical table has no col names and the \"parts\" argument is a character vector.")
-            if (!all(parts %in% colnames(lexicaltable))) 
-                stop(paste("Some requested parts are not known in the lexical table: ", 
-                  paste(parts[!(parts %in% colnames(lexicaltable))], 
-                    collapse = " ")))
-        }
-        else {
-            if (max(parts) > ncol(lexicaltable)) 
-                stop("Column index must be smaller than the number of cols.")
-            if (any(parts < 1)) 
-                stop("The col index must be greater than 0.")
-        }
-        lexicaltable <- lexicaltable[, parts, drop = FALSE]
-        colMargin <- colMargin[parts]
-    }
-    if (nrow(lexicaltable) == 0 | ncol(lexicaltable) == 0) {
-        stop("The lexical table must contains at least one row and one column.")
-    }
-    specif <- matrix(0, nrow = nrow(lexicaltable), ncol = ncol(lexicaltable))
-    for (i in 1:ncol(lexicaltable)) {
-        whiteDrawn <- lexicaltable[, i]
-        white <- rowMargin
-        black <- F - white
-        drawn <- colMargin[i]
-        independance <- (white * drawn)/F
-        specif_negative <- whiteDrawn < independance
-        specif_positive <- whiteDrawn >= independance
-        specif[specif_negative, i] <- phyper(whiteDrawn[specif_negative], 
-            white[specif_negative], black[specif_negative], drawn)
-        specif[specif_positive, i] <- phyper(whiteDrawn[specif_positive] - 
-            1, white[specif_positive], black[specif_positive], 
-            drawn)
-    }
-    dimnames(specif) <- dimnames(lexicaltable)
-    return(specif)
-}
-
-#from textometrieR
-#http://txm.sourceforge.net/doc/R/textometrieR-package.html
-#Sylvain Loiseau
-specificites <- function (lexicaltable, types = NULL, parts = NULL) 
-{
-    spe <- specificites.probabilities(lexicaltable, types, parts)
-    spelog <- matrix(0, nrow = nrow(spe), ncol = ncol(spe))
-    spelog[spe < 0.5] <- log10(spe[spe < 0.5])
-    spelog[spe > 0.5] <- abs(log10(1 - spe[spe > 0.5]))
-    spelog[spe == 0.5] <- 0
-    spelog[is.infinite(spe)] <- 0
-    spelog <- round(spelog, digits = 4)
-    rownames(spelog) <- rownames(spe)
-    colnames(spelog) <- colnames(spe)
-    return(spelog)
-}
-
 make.spec.hypergeo <- function(mat) {
-    #library(textometrieR)
-    spec <- specificites(mat)
+    library(textometry)
+    spec <- specificities(mat)
 	sumcol<-colSums(mat)
     eff_relatif<-round(t(apply(mat,1,function(x) {(x/t(as.matrix(sumcol))*1000)})),2)
+    colnames(eff_relatif) <- colnames(mat)
     out <-list()
     out[[1]]<-spec
     out[[3]]<-eff_relatif
@@ -404,7 +331,105 @@ BuildProf01<-function(x,classes) {
 	mat
 }
 
+build.prof.tgen <- function(x) {
+    nbst <- sum(x[nrow(x),])
+    totcl <- x[nrow(x),]
+    tottgen <- rowSums(x)
+    nbtgen <- nrow(x) - 1
+    chi2 <- x[1:(nrow(x)-1),]
+    pchi2 <- chi2
+    for (classe in 1:ncol(x)) {
+        for (tg in 1:nbtgen) {
+            cont <- c(x[tg, classe], tottgen[tg] - x[tg, classe], totcl[classe] - x[tg, classe], (nbst - totcl[classe]) - (tottgen[tg] - x[tg, classe]))
+            cont <- matrix(unlist(cont), nrow=2)
+            chiresult<-chisq.test(cont,correct=FALSE)
+            if (is.na(chiresult$p.value)) {
+                chiresult$p.value<-1
+                chiresult$statistic<-0
+            }
+            if (chiresult$expected[1,1] > cont[1,1]) {
+                chiresult$statistic <- chiresult$statistic * -1
+            }
+            chi2[tg,classe] <- chiresult$statistic
+            pchi2[tg,classe] <- chiresult$p.value
+        }
+    }
+    res <- list(chi2 = chi2, pchi2 = pchi2)
+}
+
+
+new.build.prof <- function(x,dataclasse,clusternb,lim=2) {
+	cl <- dataclasse[,ncol(dataclasse)]
+	nst <- length(which(cl != 0))
+	rs <- rowSums(x)
+	mod.names<-rownames(x)
+	lnbligne <- list()
+	lchi <- list()
+	prof <- list()
+	aprof <- list()
+	for (classe in 1:clusternb) {
+		lnbligne[[classe]]<-length(which(cl==classe))
+		tmpprof <- data.frame()
+		tmpanti <- data.frame()
+		obs1 <- x[,classe] #1,1
+		obs2 <- rs - obs1 #1,2
+	    obs3 <- lnbligne[[classe]] - obs1	#2,1
+		obs4 <- nst - (obs1 + obs2 + obs3) #2,2
+		exp1 <- ((obs1 + obs3) * (obs1 + obs2)) / nst
+		exp2 <- ((obs2 + obs1) * (obs2 + obs4)) / nst
+		exp3 <- ((obs3 + obs4) * (obs3 + obs1)) / nst
+		exp4 <- ((obs4 + obs3) * (obs4 + obs2)) / nst
+		chi1 <- ((obs1 - exp1)^2) / exp1
+		chi2 <- ((obs2 - exp2)^2) / exp2
+		chi3 <- ((obs3 - exp3)^2) / exp3
+		chi4 <- ((obs4 - exp4)^2) / exp4
+		chi <- chi1 + chi2 + chi3 + chi4	
+		chi[which(is.na(chi)==T)] <- 0
+		tochange <- ifelse(obs1 > exp1, 1, -1)
+		lchi[[classe]] <- chi * tochange
+		tokeep <- which(lchi[[classe]] > lim)
+		if (length(tokeep)) {
+			tmpprof[1:length(tokeep),1] <- obs1[tokeep]
+			tmpprof[,2] <- rs[tokeep]
+			tmpprof[,3] <- round((obs1/rs)*100, digits=2)[tokeep]
+			tmpprof[,4] <- round(lchi[[classe]], digits=3)[tokeep]
+			tmpprof[,5] <- mod.names[tokeep] 
+			tmpprof[,6] <- pchisq(lchi[[classe]] ,1, lower.tail=F)[tokeep]
+		}
+		prof[[classe]] <- tmpprof
+		toanti <- which(lchi[[classe]] < -lim)
+		if (length(toanti)) {
+			tmpanti[1:length(toanti),1] <- obs1[toanti]
+			tmpanti[,2] <- rs[toanti]
+			tmpanti[,3] <- round((obs1/rs)*100, digits=2)[toanti]
+			tmpanti[,4] <- round(lchi[[classe]], digits=3)[toanti]
+			tmpanti[,5] <- mod.names[toanti] 
+			tmpanti[,6] <- pchisq(-lchi[[classe]] ,1, lower.tail=F)[toanti]
+		}
+		aprof[[classe]] <- tmpanti
+		if (length(prof[[classe]])!=0) {
+			prof[[classe]]<-prof[[classe]][order(prof[[classe]][,4],decreasing=TRUE),]
+		}
+		if (length(aprof[[classe]])!=0) {
+			aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),]	
+		}
+	}
+    tablechi <- do.call(cbind, lchi)
+	tablep <- pchisq(tablechi,1, lower.tail=F)
+	tablep <- round(tablep, digits=3)
+	tablechi <- round(tablechi, digits=3)
+	out <- list()
+	out[[1]] <- tablep
+	out[[2]] <- tablechi
+	out[[3]] <- cbind(x, rowSums(x))
+    out[[4]] <- prof	
+	out[[5]] <- aprof
+	out
+}
+
+
 BuildProf<- function(x,dataclasse,clusternb,lim=2) {
+	print('build prof')
 	####
 	#r.names<-rownames(x)
 	#x<-as.matrix(x)
@@ -510,6 +535,7 @@ BuildProf<- function(x,dataclasse,clusternb,lim=2) {
 			aprof[[classe]]<-aprof[[classe]][order(aprof[[classe]][,4]),]	
 		}
 	}
+	print('fini build prof')
 	output<-list()
 	output[[1]]<-tablep
 	output[[2]]<-tablesqr