iramuteq.org Git - iramuteq/blob - Rscripts/simi.R

   1 #from proxy package
   2 #############################################################
   3 #a, b, c, and d are the counts of all (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), and (FALSE, FALSE)
   4 # n <- a + b + c + d = nrow(x)
   5
   6 make.a <- function(x) {
   7     a  <- t(x) %*% (x)
   8     a
   9 }
  10
  11 make.b <- function(x) {
  12     b <- t(x) %*% (1 - x)
  13     b
  14 }
  15
  16 make.c <- function(x) {
  17     c <- (1-t(x)) %*% x
  18     c
  19 }
  20
  21 make.d <- function(x, a, b, c) {
  22 #??????????? ncol ?
  23     d <- ncol(x) - a - b - c
  24     d
  25 }
  26
  27 ###########################################
  28 #x, a
  29 ###########################################
  30 my.jaccard <- function(x) {
  31     a <- make.a(x)
  32     b <- make.b(x)
  33     c <- make.c(x)
  34     d <- make.d(x, a, b, c)
  35     jac <- a / (a + b + c)
  36     jac
  37 }
  38
  39
  40 prcooc <- function(x, a) {
  41     prc <- (a / nrow(x))
  42     prc
  43 }
  44
  45 make.bin <- function(cs, a, i, j, nb) {
  46     if (a[i, j] >= 1) {
  47         ab <- a[i, j] - 1
  48         res <- binom.test(ab, nb, (cs[i]/nb) * (cs[j]/nb), "less")
  49     } else {
  50         res <- NULL
  51         res$p.value <- 0
  52     }
  53     #res <- binom.test(ab, nb, (cs[i]/nb) * (cs[j]/nb), "less")
  54     res$p.value
  55     }
  56
  57 binom.sim <- function(x) {
  58     a <- make.a(x)
  59     n <- nrow(x)
  60     cs <- colSums(x)
  61     mat <- matrix(0,ncol(x),ncol(x))
  62     colnames(mat)<-colnames(a)
  63     rownames(mat)<-rownames(a)
  64     for (i in 1:(ncol(x)-1)) {
  65         for (j in (i+1):ncol(x)) {
  66             mat[j,i] <- make.bin(cs, a, i, j , n)
  67         }
  68     }
  69 #    print(mat)
  70     mat
  71 }
  72
  73
  74 ############################################
  75 # a, b, c
  76 ############################################
  77 # jaccard a, b, c   a / (a + b + c)
  78 # Kulczynski1 a, b, c   a / (b + c)
  79 # Kulczynski2 a, b, c   [a / (a + b) + a / (a + c)] / 2
  80 # Mountford a, b, c    2a / (ab + ac + 2bc)
  81 # Fager, McGowan a, b, c   a / sqrt((a + b)(a + c)) - 1 / 2 sqrt(a + c)
  82 # Russel, Rao a (a/n)
  83 # Dice, Czekanowski, Sorensen a, b, c   2a / (2a + b + c)
  84 # Mozley, Margalef a, b, c  an / (a + b)(a + c)
  85 # Ochiai a, b, c  a / sqrt[(a + b)(a + c)]
  86 # Simpson a, b, c   a / min{(a + b), (a + c)}
  87 # Braun-Blanquet a, b, c  a / max{(a + b), (a + c)}
  88
  89 #simple matching, Sokal/Michener a, b, c, d, ((a + d) /n)
  90 # Hamman, a, b, c, d, ([a + d] - [b + c]) / n
  91 # Faith , a, b, c, d, (a + d/2) / n
  92 # Tanimoto, Rogers a, b, c, d, (a + d) / (a + 2b + 2c + d)
  93 # Phi  a, b, c, d   (ad - bc) / sqrt[(a + b)(c + d)(a + c)(b + d)]
  94 # Stiles a, b, c, d  log(n(|ad-bc| - 0.5n)^2 / [(a + b)(c + d)(a + c)(b + d)])
  95 # Michael   a, b, c, d   4(ad - bc) / [(a + d)^2 + (b + c)^2]
  96 # Yule a, b, c, d  (ad - bc) / (ad + bc)
  97 # Yule2  a, b, c, d  (sqrt(ad) - sqrt(bc)) / (sqrt(ad) + sqrt(bc))
  98
  99 BuildProf01<-function(x,classes) {
 100         #x : donnees en 0/1
 101         #classes : classes de chaque lignes de x
 102         dm<-cbind(x,cl=classes)
 103         clnb=length(summary(as.data.frame(as.character(classes)),max=100))
 104         print(clnb)
 105         print(summary(as.data.frame(as.character(classes)),max=100))
 106         mat<-matrix(0,ncol(x),clnb)
 107         rownames(mat)<-colnames(x)
 108         for (i in 1:clnb) {
 109                 dtmp<-dm[which(dm$cl==i),]
 110                 for (j in 1:(ncol(dtmp)-1)) {
 111                         mat[j,i]<-sum(dtmp[,j])
 112                 }
 113         }
 114         mat
 115 }
 116
 117 do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.type = 'frutch', max.tree = TRUE, coeff.vertex=NULL, coeff.edge = NULL, minmaxeff=c(NULL,NULL), vcexminmax= c(NULL,NULL), cex = 1, coords = NULL) {
 118         mat.simi <- x$mat
 119     mat.eff <- x$eff
 120     v.label <- colnames(mat.simi)
 121         g1<-graph.adjacency(mat.simi,mode="lower",weighted=TRUE)
 122         g.toplot<-g1
 123         weori<-get.edge.attribute(g1,'weight')
 124         if (max.tree) {
 125                 invw<-1/weori
 126                 E(g1)$weight<-invw
 127                 g.max<-minimum.spanning.tree(g1)
 128                 E(g.max)$weight<-1/E(g.max)$weight
 129                 g.toplot<-g.max
 130         }
 131
 132     if (!is.null(seuil)) {
 133         if (seuil >= max(mat.simi)) seuil <- max(mat.simi)-1
 134         vec<-vector()
 135         w<-E(g.toplot)$weight
 136         tovire <- which(w<=seuil)
 137         g.toplot <- delete.edges(g.toplot,(tovire-1))
 138         for (i in 0:(length(V(g.toplot))-1)) {
 139             if (length(neighbors(g.toplot,i))==0) {
 140                 vec<-append(vec,i)
 141             }
 142         }
 143         g.toplot <- delete.vertices(g.toplot,vec)
 144         v.label <- V(g.toplot)$name
 145         if (!is.logical(vec)) mat.eff <- mat.eff[-(vec+1)]
 146     }
 147
 148         if (!is.null(minmaxeff[1])) {
 149         eff<-norm.vec(mat.eff,minmaxeff[1],minmaxeff[2])
 150     } else {
 151         eff<-coeff.vertex
 152     }
 153     if (!is.null(vcexminmax[1])) {
 154         label.cex = norm.vec(mat.eff, vcexminmax[1], vcexminmax[2])
 155     } else {
 156         label.cex = cex
 157     }
 158     if (!is.null(coeff.edge)) {
 159         we.width <- norm.vec(abs(E(g.toplot)$weight), coeff.edge[1], coeff.edge[2])
 160             #we.width <- abs((E(g.toplot)$weight/max(abs(E(g.toplot)$weight)))*coeff.edge)
 161     } else {
 162         we.width <- NULL
 163     }
 164     if (method != 'binom') {
 165         we.label <- round(E(g.toplot)$weight,1)
 166     } else {
 167         we.label <- round(E(g.toplot)$weight,3)
 168     }
 169         if (p.type=='rgl') {
 170         nd<-3
 171     } else {
 172         nd<-2
 173     }
 174     if (is.null(coords)) {
 175         if (layout.type == 'frutch')
 176                 lo <- layout.fruchterman.reingold(g.toplot,dim=nd)#, weightsA=E(g.toplot)$weight)
 177         if (layout.type == 'kawa')
 178                 lo <- layout.kamada.kawai(g.toplot,dim=nd)
 179         if (layout.type == 'random')
 180                 lo <- layout.random(g.toplot,dim=nd)
 181         if (layout.type == 'circle' & p.type != 'rgl')
 182                 lo <- layout.circle(g.toplot)
 183         if (layout.type == 'circle' & p.type == 'rgl')
 184                 lo <- layout.sphere(g.toplot)
 185         if (layout.type == 'graphopt')
 186             lo <- layout.graphopt(g.toplot)
 187     } else {
 188         lo <- coords
 189     }
 190         out <- list(graph = g.toplot, mat.eff = mat.eff, eff = eff, mat = mat.simi, v.label = v.label, we.width = we.width, we.label=we.label, label.cex = label.cex, layout = lo)
 191 }
 192
 193 plot.simi <- function(graph.simi, p.type = 'tkplot',filename=NULL, vertex.col = 'red', edge.col = 'black', edge.label = TRUE, vertex.label=TRUE, vertex.label.color = 'black', vertex.label.cex= NULL, vertex.size=NULL, leg=NULL, width = 800, height = 800, alpha = 0.1, cexalpha = FALSE, movie = NULL) {
 194         mat.simi <- graph.simi$mat
 195         g.toplot <- graph.simi$graph
 196     if (is.null(vertex.size)) {
 197         vertex.size <- graph.simi$eff
 198     } else {
 199         vertex.size <- vertex.size
 200     }
 201         we.width <- graph.simi$we.width
 202     if (vertex.label) {
 203         #v.label <- vire.nonascii(graph.simi$v.label)
 204         v.label <- graph.simi$v.label
 205     } else {
 206         v.label <- NA
 207     }
 208     if (edge.label) {
 209         we.label <- graph.simi$we.label
 210     } else {
 211         we.label <- NA
 212     }
 213         lo <- graph.simi$layout
 214     if (!is.null(vertex.label.cex)) {
 215         label.cex<-vertex.label.cex
 216     } else {
 217         label.cex = graph.simi$label.cex
 218     }
 219     if (cexalpha) {
 220         alphas <- norm.vec(label.cex, 0.5,1)
 221         nvlc <- NULL
 222         if (length(vertex.label.color) == 1) {
 223             for (i in 1:length(alphas)) {
 224              nvlc <- append(nvlc, adjustcolor(vertex.label.color, alpha=alphas[i]))
 225             }
 226         } else {
 227             for (i in 1:length(alphas)) {
 228                 nvlc <- append(nvlc, adjustcolor(vertex.label.color[i], alpha=alphas[i]))
 229             }
 230         }
 231         vertex.label.color <- nvlc
 232     }
 233     if (p.type=='nplot') {
 234         #print('ATTENTION - PAS OPEN FILE')
 235         open_file_graph(filename, width = width, height = height)
 236         par(mar=c(2,2,2,2))
 237         if (!is.null(leg)) {
 238             layout(matrix(c(1,2),1,2, byrow=TRUE),widths=c(3,lcm(7)))
 239             par(mar=c(2,2,1,0))
 240         }
 241         par(pch=' ')
 242         plot(g.toplot,vertex.label='', edge.width=we.width, vertex.size=vertex.size, vertex.color=vertex.col, vertex.label.color='white', edge.label=we.label, edge.label.cex=cex, edge.color=edge.col, vertex.label.cex = 0, layout=lo)
 243         txt.layout <- layout.norm(lo, -1, 1, -1, 1, -1, 1)
 244         #txt.layout <- txt.layout[order(label.cex),]
 245         #vertex.label.color <- vertex.label.color[order(label.cex)]
 246         #v.label <- v.label[order(label.cex)]
 247         #label.cex <- label.cex[order(label.cex)]
 248         text(txt.layout[,1], txt.layout[,2], v.label, cex=label.cex, col=vertex.label.color)
 249         if (!is.null(leg)) {
 250             par(mar=c(0,0,0,0))
 251             plot(0, axes = FALSE, pch = '')
 252             legend(x = 'center' , leg$unetoile, fill = leg$gcol)
 253         }
 254         dev.off()
 255         return(lo)
 256     }
 257         if (p.type=='tkplot') {
 258                 id <- tkplot(g.toplot,vertex.label=v.label, edge.width=we.width, vertex.size=vertex.size, vertex.color=vertex.col, vertex.label.color=vertex.label.color, edge.label=we.label, edge.color=edge.col, layout=lo)
 259         coords = tkplot.getcoords(id)
 260         ok <- try(coords <- tkplot.getcoords(id), TRUE)
 261                 while (is.matrix(ok)) {
 262             ok <- try(coords <- tkplot.getcoords(id), TRUE)
 263                         Sys.sleep(0.5)
 264         }
 265         tkplot.off()
 266     return(coords)
 267         }
 268
 269         if (p.type == 'rgl') {
 270                 library('rgl')
 271                 rglplot(g.toplot,vertex.label= vire.nonascii(v.label), edge.width=we.width/10, vertex.size=0.01, vertex.color=vertex.col, vertex.label.color="black", edge.color = edge.col, layout=lo)
 272         los <- layout.norm(lo, -1, 1, -1, 1, -1, 1)
 273         rgl.spheres(los, col = vertex.col, radius = vertex.size/100, alpha = alpha)
 274                 rgl.bg(color = c('white','black'))
 275         if (!is.null(movie)) {
 276             require(tcltk)
 277             ReturnVal <- tkmessageBox(title="RGL 3 D",message="Cliquez pour commencer le film",icon="info",type="ok")
 278
 279             movie3d(spin3d(axis=c(0,1,0),rpm=6), movie = 'film_graph', frames = "tmpfilm", duration=10, clean=TRUE, top = TRUE, dir = movie)
 280             ReturnVal <- tkmessageBox(title="RGL 3 D",message="Film fini !",icon="info",type="ok")
 281         }
 282         #play3d(spin3d(axis=c(0,1,0),rpm=6))
 283         require(tcltk)
 284         ReturnVal <- tkmessageBox(title="RGL 3 D",message="Cliquez pour fermer",icon="info",type="ok")
 285         rgl.close()
 286         #       while (rgl.cur() != 0)
 287         #               Sys.sleep(0.5)
 288         }
 289 }
 290
 291
 292 graph.word <- function(mat.simi, index) {
 293     nm <- matrix(0, ncol = ncol(mat.simi), nrow=nrow(mat.simi), dimnames=list(row.names(mat.simi), colnames(mat.simi)))
 294     nm[,index] <- mat.simi[,index]
 295     nm[index,] <- mat.simi[index,]
 296     nm
 297 }