X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=Rscripts%2Fchdfunct.R;h=914278c3dd7d726e4c3f13a47ea49ea0d5fa32f0;hp=2ca1eac603ae3d2af1ea109b5da4719d343f06ea;hb=83802e662acde01994fe0bd2bf6978fef90b14f8;hpb=8fa853a25a9d62b1446e1bc543e5a3a4d0e03dcf diff --git a/Rscripts/chdfunct.R b/Rscripts/chdfunct.R index 2ca1eac..914278c 100644 --- a/Rscripts/chdfunct.R +++ b/Rscripts/chdfunct.R @@ -293,95 +293,12 @@ AsLexico2<- function(mat, chip = FALSE) { out } - -#from textometrieR -#http://txm.sourceforge.net/doc/R/textometrieR-package.html -#Sylvain Loiseau -specificites.probabilities <- function (lexicaltable, types = NULL, parts = NULL) -{ - rowMargin <- rowSums(lexicaltable) - colMargin <- colSums(lexicaltable) - F <- sum(lexicaltable) - if (!is.null(types)) { - if (is.character(types)) { - if (is.null(rownames(lexicaltable))) - stop("The lexical table has no row names and the \"types\" argument is a character vector.") - if (!all(types %in% rownames(lexicaltable))) - stop(paste("Some requested types are not known in the lexical table: ", - paste(types[!(types %in% rownames(lexicaltable))], - collapse = " "))) - } - else { - if (any(types < 1)) - stop("The row index must be greater than 0.") - if (max(types) > nrow(lexicaltable)) - stop("Row index must be smaller than the number of rows.") - } - lexicaltable <- lexicaltable[types, , drop = FALSE] - rowMargin <- rowMargin[types] - } - if (!is.null(parts)) { - if (is.character(parts)) { - if (is.null(colnames(lexicaltable))) - stop("The lexical table has no col names and the \"parts\" argument is a character vector.") - if (!all(parts %in% colnames(lexicaltable))) - stop(paste("Some requested parts are not known in the lexical table: ", - paste(parts[!(parts %in% colnames(lexicaltable))], - collapse = " "))) - } - else { - if (max(parts) > ncol(lexicaltable)) - stop("Column index must be smaller than the number of cols.") - if (any(parts < 1)) - stop("The col index must be greater than 0.") - } - lexicaltable <- lexicaltable[, parts, drop = FALSE] - colMargin <- colMargin[parts] - } - if (nrow(lexicaltable) == 0 | ncol(lexicaltable) == 0) { - stop("The lexical table must contains at least one row and one column.") - } - specif <- matrix(0, nrow = nrow(lexicaltable), ncol = ncol(lexicaltable)) - for (i in 1:ncol(lexicaltable)) { - whiteDrawn <- lexicaltable[, i] - white <- rowMargin - black <- F - white - drawn <- colMargin[i] - independance <- (white * drawn)/F - specif_negative <- whiteDrawn < independance - specif_positive <- whiteDrawn >= independance - specif[specif_negative, i] <- phyper(whiteDrawn[specif_negative], - white[specif_negative], black[specif_negative], drawn) - specif[specif_positive, i] <- phyper(whiteDrawn[specif_positive] - - 1, white[specif_positive], black[specif_positive], - drawn) - } - dimnames(specif) <- dimnames(lexicaltable) - return(specif) -} - -#from textometrieR -#http://txm.sourceforge.net/doc/R/textometrieR-package.html -#Sylvain Loiseau -specificites <- function (lexicaltable, types = NULL, parts = NULL) -{ - spe <- specificites.probabilities(lexicaltable, types, parts) - spelog <- matrix(0, nrow = nrow(spe), ncol = ncol(spe)) - spelog[spe < 0.5] <- log10(spe[spe < 0.5]) - spelog[spe > 0.5] <- abs(log10(1 - spe[spe > 0.5])) - spelog[spe == 0.5] <- 0 - spelog[is.infinite(spe)] <- 0 - spelog <- round(spelog, digits = 4) - rownames(spelog) <- rownames(spe) - colnames(spelog) <- colnames(spe) - return(spelog) -} - make.spec.hypergeo <- function(mat) { - #library(textometrieR) - spec <- specificites(mat) + library(textometry) + spec <- specificities(mat) sumcol<-colSums(mat) eff_relatif<-round(t(apply(mat,1,function(x) {(x/t(as.matrix(sumcol))*1000)})),2) + colnames(eff_relatif) <- colnames(mat) out <-list() out[[1]]<-spec out[[3]]<-eff_relatif @@ -404,6 +321,32 @@ BuildProf01<-function(x,classes) { mat } +build.prof.tgen <- function(x) { + nbst <- sum(x[nrow(x),]) + totcl <- x[nrow(x),] + tottgen <- rowSums(x) + nbtgen <- nrow(x) - 1 + chi2 <- x[1:(nrow(x)-1),] + pchi2 <- chi2 + for (classe in 1:ncol(x)) { + for (tg in 1:nbtgen) { + cont <- c(x[tg, classe], tottgen[tg] - x[tg, classe], totcl[classe] - x[tg, classe], (nbst - totcl[classe]) - (tottgen[tg] - x[tg, classe])) + cont <- matrix(unlist(cont), nrow=2) + chiresult<-chisq.test(cont,correct=FALSE) + if (is.na(chiresult$p.value)) { + chiresult$p.value<-1 + chiresult$statistic<-0 + } + if (chiresult$expected[1,1] > cont[1,1]) { + chiresult$statistic <- chiresult$statistic * -1 + } + chi2[tg,classe] <- chiresult$statistic + pchi2[tg,classe] <- chiresult$p.value + } + } + res <- list(chi2 = chi2, pchi2 = pchi2) +} + BuildProf<- function(x,dataclasse,clusternb,lim=2) { #### #r.names<-rownames(x)