iramuteq.org Git - iramuteq/blob - Rscripts/simi.R

   1 #from proxy package
   2 #############################################################
   3 #a, b, c, and d are the counts of all (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), and (FALSE, FALSE)
   4 # n <- a + b + c + d = nrow(x)
   5
   6 make.a <- function(x) {
   7     a  <- t(x) %*% (x)
   8     a
   9 }
  10
  11 make.b <- function(x) {
  12     b <- t(x) %*% (1 - x)
  13     b
  14 }
  15
  16 make.c <- function(x) {
  17     c <- (1-t(x)) %*% x
  18     c
  19 }
  20
  21 make.d <- function(x, a, b, c) {
  22 #??????????? ncol ?
  23     d <- ncol(x) - a - b - c
  24     d
  25 }
  26
  27 ###########################################
  28 #x, a
  29 ###########################################
  30 my.jaccard <- function(x) {
  31     a <- make.a(x)
  32     b <- make.b(x)
  33     c <- make.c(x)
  34     #d <- make.d(x, a, b, c)
  35     jac <- a / (a + b + c)
  36     jac
  37 }
  38
  39
  40 prcooc <- function(x, a) {
  41     prc <- (a / nrow(x))
  42     prc
  43 }
  44
  45 make.bin <- function(cs, a, i, j, nb) {
  46     if (a[i, j] >= 1) {
  47         ab <- a[i, j] - 1
  48         res <- binom.test(ab, nb, (cs[i]/nb) * (cs[j]/nb), "less")
  49     } else {
  50         res <- NULL
  51         res$p.value <- 0
  52     }
  53     #res <- binom.test(ab, nb, (cs[i]/nb) * (cs[j]/nb), "less")
  54     res$p.value
  55     }
  56
  57 binom.sim <- function(x) {
  58     a <- make.a(x)
  59     n <- nrow(x)
  60     cs <- colSums(x)
  61     mat <- matrix(0,ncol(x),ncol(x))
  62     colnames(mat)<-colnames(a)
  63     rownames(mat)<-rownames(a)
  64     for (i in 1:(ncol(x)-1)) {
  65         for (j in (i+1):ncol(x)) {
  66             mat[j,i] <- make.bin(cs, a, i, j , n)
  67         }
  68     }
  69 #    print(mat)
  70     mat
  71 }
  72
  73
  74 ############################################
  75 # a, b, c
  76 ############################################
  77 # jaccard a, b, c   a / (a + b + c)
  78 # Kulczynski1 a, b, c   a / (b + c)
  79 # Kulczynski2 a, b, c   [a / (a + b) + a / (a + c)] / 2
  80 # Mountford a, b, c    2a / (ab + ac + 2bc)
  81 # Fager, McGowan a, b, c   a / sqrt((a + b)(a + c)) - 1 / 2 sqrt(a + c)
  82 # Russel, Rao a (a/n)
  83 # Dice, Czekanowski, Sorensen a, b, c   2a / (2a + b + c)
  84 # Mozley, Margalef a, b, c  an / (a + b)(a + c)
  85 # Ochiai a, b, c  a / sqrt[(a + b)(a + c)]
  86 # Simpson a, b, c   a / min{(a + b), (a + c)}
  87 # Braun-Blanquet a, b, c  a / max{(a + b), (a + c)}
  88
  89 #simple matching, Sokal/Michener a, b, c, d, ((a + d) /n)
  90 # Hamman, a, b, c, d, ([a + d] - [b + c]) / n
  91 # Faith , a, b, c, d, (a + d/2) / n
  92 # Tanimoto, Rogers a, b, c, d, (a + d) / (a + 2b + 2c + d)
  93 # Phi  a, b, c, d   (ad - bc) / sqrt[(a + b)(c + d)(a + c)(b + d)]
  94 # Stiles a, b, c, d  log(n(|ad-bc| - 0.5n)^2 / [(a + b)(c + d)(a + c)(b + d)])
  95 # Michael   a, b, c, d   4(ad - bc) / [(a + d)^2 + (b + c)^2]
  96 # Yule a, b, c, d  (ad - bc) / (ad + bc)
  97 # Yule2  a, b, c, d  (sqrt(ad) - sqrt(bc)) / (sqrt(ad) + sqrt(bc))
  98
  99 BuildProf01<-function(x,classes) {
 100         #x : donnees en 0/1
 101         #classes : classes de chaque lignes de x
 102         dm<-cbind(x,cl=classes)
 103         clnb=length(summary(as.data.frame(as.character(classes)),max=100))
 104         print(clnb)
 105         print(summary(as.data.frame(as.character(classes)),max=100))
 106         mat<-matrix(0,ncol(x),clnb)
 107         rownames(mat)<-colnames(x)
 108         for (i in 1:clnb) {
 109                 dtmp<-dm[which(dm$cl==i),]
 110                 for (j in 1:(ncol(dtmp)-1)) {
 111                         mat[j,i]<-sum(dtmp[,j])
 112                 }
 113         }
 114         mat
 115 }
 116
 117 do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.type = 'frutch', max.tree = TRUE, coeff.vertex=NULL, coeff.edge = NULL, minmaxeff=c(NULL,NULL), vcexminmax= c(NULL,NULL), cex = 1, coords = NULL, communities = NULL, halo = FALSE, fromcoords=NULL, forvertex=NULL) {
 118         mat.simi <- x$mat
 119     mat.eff <- x$eff
 120     v.label <- colnames(mat.simi)
 121         g1<-graph.adjacency(mat.simi,mode="lower",weighted=TRUE)
 122         g.toplot<-g1
 123         weori<-get.edge.attribute(g1,'weight')
 124         if (max.tree) {
 125         if (method == 'cooc') {
 126                     invw <- 1 / weori
 127         } else {
 128             invw <- 1 - weori
 129         }
 130                 E(g1)$weight<-invw
 131                 g.max<-minimum.spanning.tree(g1)
 132         if (method == 'cooc') {
 133                     E(g.max)$weight<-1 / E(g.max)$weight
 134         } else {
 135             E(g.max)$weight<-1 - E(g.max)$weight
 136         }
 137                 g.toplot<-g.max
 138         }
 139
 140     if (!is.null(seuil)) {
 141         if (seuil >= max(mat.simi)) seuil <- -Inf
 142         vec<-vector()
 143         w<-E(g.toplot)$weight
 144         tovire <- which(w<=seuil)
 145         g.toplot <- delete.edges(g.toplot,(tovire))
 146         for (i in 1:(length(V(g.toplot)))) {
 147             if (length(neighbors(g.toplot,i))==0) {
 148                 vec<-append(vec,i)
 149             }
 150         }
 151         g.toplot <- delete.vertices(g.toplot,vec)
 152         v.label <- V(g.toplot)$name
 153         if (!is.logical(vec)) mat.eff <- mat.eff[-(vec)]
 154     } else {
 155                 vec <- NULL
 156         }
 157
 158         if (!is.null(minmaxeff[1])) {
 159         eff<-norm.vec(mat.eff,minmaxeff[1],minmaxeff[2])
 160     } else {
 161         eff<-coeff.vertex
 162     }
 163     if (!is.null(vcexminmax[1])) {
 164         label.cex = norm.vec(mat.eff, vcexminmax[1], vcexminmax[2])
 165     } else {
 166         label.cex = cex
 167     }
 168     if (!is.null(coeff.edge)) {
 169         #FIXME
 170         we.width <- norm.vec(abs(E(g.toplot)$weight), coeff.edge[1], coeff.edge[2])
 171             #we.width <- abs((E(g.toplot)$weight/max(abs(E(g.toplot)$weight)))*coeff.edge)
 172     } else {
 173         we.width <- NULL
 174     }
 175     if (method != 'binom') {
 176         we.label <- round(E(g.toplot)$weight,3)
 177     } else {
 178         we.label <- round(E(g.toplot)$weight,4)
 179     }
 180         if (p.type=='rgl' || p.type=='rglweb') {
 181         nd<-3
 182     } else {
 183         nd<-2
 184     }
 185     if (! is.null(fromcoords)) {
 186         newfrom <- matrix(runif(nd*length(V(g.toplot)$name),min(fromcoords)),max(fromcoords),ncol=nd, nrow=length(V(g.toplot)$name))
 187         for (i in 1:length(V(g.toplot)$name)) {
 188             if(V(g.toplot)$name[i] %in% forvertex) {
 189                 newfrom[i,] <- fromcoords[which(forvertex==V(g.toplot)$name[i]),]
 190             }
 191         }
 192        fromcoords <- newfrom
 193     }
 194     #print(layout.type)
 195     if (is.null(coords)) {
 196         if (layout.type == 'frutch') {
 197             #lo <- layout_with_drl(g.toplot,dim=nd)
 198             lo <- layout_with_fr(g.toplot,dim=nd, grid="grid", niter=10000, weights=1/E(g.toplot)$weight)#, start.temp = 1)#, )
 199         }
 200         if (layout.type == 'kawa') {
 201                 lo <- layout_with_kk(g.toplot,dim=nd, weights=1/E(g.toplot)$weight, start=fromcoords, epsilon=0, maxiter = 10000)
 202             #print(lo)
 203         }
 204         if (layout.type == 'random')
 205                 lo <- layout_on_grid(g.toplot,dim=nd)
 206         if (layout.type == 'circle' & p.type != 'rgl')
 207                 lo <- layout_in_circle(g.toplot)
 208         if (layout.type == 'circle' & p.type == 'rgl')
 209                 lo <- layout_on_sphere(g.toplot)
 210         if (layout.type == 'graphopt')
 211             lo <- layout_as_tree(g.toplot, circular = TRUE)
 212     } else {
 213         lo <- coords
 214     }
 215     if (!is.null(communities)) {
 216         if (communities == 0 ){ #'edge.betweenness.community') {
 217             com <- edge.betweenness.community(g.toplot)
 218         } else if (communities == 1) {
 219             com <- fastgreedy.community(g.toplot)
 220         } else if (communities == 2) {
 221             com <- label.propagation.community(g.toplot)
 222         } else if (communities == 3) {
 223             com <- leading.eigenvector.community(g.toplot)
 224         } else if (communities == 4) {
 225             com <- multilevel.community(g.toplot)
 226         } else if (communities == 5) {
 227             com <- optimal.community(g.toplot)
 228         } else if (communities == 6) {
 229             com <- spinglass.community(g.toplot)
 230         } else if (communities == 7) {
 231             com <- walktrap.community(g.toplot)
 232         }
 233     } else {
 234         com <- NULL
 235     }
 236
 237         out <- list(graph = g.toplot, mat.eff = mat.eff, eff = eff, mat = mat.simi, v.label = v.label, we.width = we.width, we.label=we.label, label.cex = label.cex, layout = lo, communities = com, halo = halo, elim=vec)
 238 }
 239
 240 plot.simi <- function(graph.simi, p.type = 'tkplot',filename=NULL, communities = NULL, vertex.col = 'red', edge.col = 'black', edge.label = TRUE, vertex.label=TRUE, vertex.label.color = 'black', vertex.label.cex= NULL, vertex.size=NULL, leg=NULL, width = 800, height = 800, alpha = 0.1, cexalpha = FALSE, movie = NULL, edge.curved = TRUE, svg = FALSE, bg='white') {
 241         mat.simi <- graph.simi$mat
 242         g.toplot <- graph.simi$graph
 243     if (is.null(vertex.size)) {
 244         vertex.size <- graph.simi$eff
 245     } else {
 246         vertex.size <- vertex.size
 247     }
 248         we.width <- graph.simi$we.width
 249     if (vertex.label) {
 250         #v.label <- vire.nonascii(graph.simi$v.label)
 251         v.label <- graph.simi$v.label
 252     } else {
 253         v.label <- NA
 254     }
 255     if (edge.label) {
 256         we.label <- graph.simi$we.label
 257     } else {
 258         we.label <- NA
 259     }
 260         lo <- graph.simi$layout
 261     #rownames(lo) <- v.label
 262     if (!is.null(vertex.label.cex)) {
 263         label.cex<-vertex.label.cex
 264     } else {
 265         label.cex = graph.simi$label.cex
 266     }
 267
 268     if (cexalpha) {
 269         alphas <- norm.vec(label.cex, 0.5,1)
 270         nvlc <- NULL
 271         if (length(vertex.label.color) == 1) {
 272             for (i in 1:length(alphas)) {
 273              nvlc <- append(nvlc, adjustcolor(vertex.label.color, alpha=alphas[i]))
 274             }
 275         } else {
 276             for (i in 1:length(alphas)) {
 277                 nvlc <- append(nvlc, adjustcolor(vertex.label.color[i], alpha=alphas[i]))
 278             }
 279         }
 280         vertex.label.color <- nvlc
 281     }
 282     if (p.type=='nplot') {
 283         #print('ATTENTION - PAS OPEN FILE')
 284         open_file_graph(filename, width = width, height = height, svg = svg)
 285         par(mar=c(2,2,2,2))
 286         par(bg=bg)
 287         if (!is.null(leg)) {
 288             layout(matrix(c(1,2),1,2, byrow=TRUE),widths=c(3,lcm(7)))
 289             par(mar=c(2,2,1,0))
 290         }
 291         par(pch=' ')
 292         if (is.null(graph.simi$com)) {
 293             plot(g.toplot,vertex.label='', edge.width=we.width, vertex.size=vertex.size, vertex.color=vertex.col, vertex.label.color='white', edge.label=we.label, edge.label.cex=cex, edge.color=edge.col, vertex.label.cex = 0, layout=lo, edge.curved=edge.curved)#, rescale = FALSE)
 294         } else {
 295             if (graph.simi$halo) {
 296                 mark.groups <- communities(graph.simi$com)
 297             } else {
 298                 mark.groups <- NULL
 299             }
 300             plot(com, g.toplot,vertex.label='', edge.width=we.width, vertex.size=vertex.size, vertex.color=vertex.col, vertex.label.color='white', edge.label=we.label, edge.label.cex=cex, edge.color=edge.col, vertex.label.cex = 0, layout=lo, mark.groups = mark.groups, edge.curved=edge.curved)
 301         }
 302         #txt.layout <- lo
 303         txt.layout <- layout.norm(lo, -1, 1, -1, 1, -1, 1)
 304         #txt.layout <- txt.layout[order(label.cex),]
 305         #vertex.label.color <- vertex.label.color[order(label.cex)]
 306         #v.label <- v.label[order(label.cex)]
 307         #label.cex <- label.cex[order(label.cex)]
 308         text(txt.layout[,1], txt.layout[,2], v.label, cex=label.cex, col=vertex.label.color)
 309         if (!is.null(leg)) {
 310             par(mar=c(0,0,0,0))
 311             plot(0, axes = FALSE, pch = '')
 312             legend(x = 'center' , leg$unetoile, fill = leg$gcol)
 313         }
 314         dev.off()
 315         return(lo)
 316     }
 317         if (p.type=='tkplot') {
 318                 id <- tkplot(g.toplot,vertex.label=v.label, edge.width=we.width, vertex.size=vertex.size, vertex.color=vertex.col, vertex.label.color=vertex.label.color, edge.label=we.label, edge.color=edge.col, layout=lo)
 319         coords = tkplot.getcoords(id)
 320         ok <- try(coords <- tkplot.getcoords(id), TRUE)
 321                 while (is.matrix(ok)) {
 322             ok <- try(coords <- tkplot.getcoords(id), TRUE)
 323                         Sys.sleep(0.5)
 324         }
 325         tkplot.off()
 326     return(coords)
 327         }
 328
 329         if (p.type == 'rgl' || p.type == 'rglweb') {
 330                 library('rgl')
 331         #rgl.open()
 332         #par3d(cex=0.8)
 333         lo <- layout.norm(lo, -10, 10, -10, 10, -10, 10)
 334                 bg3d('white')
 335                 rglplot(g.toplot,vertex.label='', edge.width=we.width/10, vertex.size=0.01, vertex.color=vertex.col, vertex.label.color="black", edge.color = edge.col, layout=lo, rescale = FALSE)
 336         #los <- layout.norm(lo, -1, 1, -1, 1, -1, 1)
 337                 text3d(lo[,1], lo[,2], lo[,3], vire.nonascii(v.label), col = vertex.label.color, alpha = 1, cex = vertex.label.cex)
 338         rgl.spheres(lo, col = vertex.col, radius = vertex.size/100, alpha = alpha)
 339         #rgl.bg(color = c('white','black'))
 340         #bg3d('white')
 341         if (!is.null(movie)) {
 342             require(tcltk)
 343             ReturnVal <- tkmessageBox(title="RGL 3 D",message="Cliquez pour commencer le film",icon="info",type="ok")
 344
 345             movie3d(spin3d(axis=c(0,1,0),rpm=6), movie = 'film_graph', frames = "tmpfilm", duration=10, clean=TRUE, top = TRUE, dir = movie)
 346             ReturnVal <- tkmessageBox(title="RGL 3 D",message="Film fini !",icon="info",type="ok")
 347         }
 348         #play3d(spin3d(axis=c(0,1,0),rpm=6))
 349         if (p.type == 'rglweb') {
 350             writeWebGL(dir = filename, width = width, height= height)
 351         } else {
 352             require(tcltk)
 353             ReturnVal <- tkmessageBox(title="RGL 3 D",message="Cliquez pour fermer",icon="info",type="ok")
 354         }
 355         rgl.close()
 356         #       while (rgl.cur() != 0)
 357         #               Sys.sleep(0.5)
 358         } else if (p.type == 'web') {
 359                 library(rgexf)
 360         graph.simi$label.cex <- label.cex
 361         graph.simi$color <- vertex.col
 362         label <- v.label
 363         nodes.attr <- data.frame(label)
 364                 simi.to.gexf(filename, graph.simi, nodes.attr = nodes.attr)
 365         }
 366 }
 367
 368
 369 graph.word <- function(mat.simi, index) {
 370     nm <- matrix(0, ncol = ncol(mat.simi), nrow=nrow(mat.simi), dimnames=list(row.names(mat.simi), colnames(mat.simi)))
 371     nm[,index] <- mat.simi[,index]
 372     nm[index,] <- mat.simi[index,]
 373     nm
 374 }
 375
 376 #from :
 377 #http://gopalakrishna.palem.in/iGraphExport.html#GexfExport
 378 # Converts the given igraph object to GEXF format and saves it at the given filepath location
 379 #     g: input igraph object to be converted to gexf format
 380 #     filepath: file location where the output gexf file should be saved
 381 #
 382 saveAsGEXF = function(g, filepath="converted_graph.gexf")
 383 {
 384   require(igraph)
 385   require(rgexf)
 386
 387   # gexf nodes require two column data frame (id, label)
 388   # check if the input vertices has label already present
 389   # if not, just have the ids themselves as the label
 390   if(is.null(V(g)$label))
 391     V(g)$label <- as.character(V(g))
 392
 393   # similarily if edges does not have weight, add default 1 weight
 394   if(is.null(E(g)$weight))
 395     E(g)$weight <- rep.int(1, ecount(g))
 396
 397   nodes <- data.frame(cbind(1:vcount(g), V(g)$label))
 398   nodes[,1] <- as.character(nodes[,1])
 399   nodes[,2] <- as.character(nodes[,2])
 400   edges <- t(Vectorize(get.edge, vectorize.args='id')(g, 1:ecount(g)))
 401
 402   # combine all node attributes into a matrix (and take care of & for xml)
 403   vAttrNames <- setdiff(list.vertex.attributes(g), "label")
 404   for (val in c("x","y","color")) {
 405         vAttrNames <- setdiff(vAttrNames, val)
 406   }
 407   nodesAtt <- data.frame(sapply(vAttrNames, function(attr) sub("&", "&",get.vertex.attribute(g, attr))))
 408   for (i in 1:ncol(nodesAtt)) {
 409       nodesAtt[,i] <- as.character(nodesAtt[,i])
 410   }
 411
 412   # combine all edge attributes into a matrix (and take care of & for xml)
 413   eAttrNames <- setdiff(list.edge.attributes(g), "weight")
 414   edgesAtt <- data.frame(sapply(eAttrNames, function(attr) sub("&", "&",get.edge.attribute(g, attr))))
 415
 416   # combine all graph attributes into a meta-data
 417   graphAtt <- sapply(list.graph.attributes(g), function(attr) sub("&", "&",get.graph.attribute(g, attr)))
 418   ll <- length(V(g)$x)
 419   cc <- t(sapply(V(g)$color, col2rgb, alpha=TRUE))
 420   cc[,4] <- cc[,4]/255
 421   # generate the gexf object
 422   output <- write.gexf(nodes, edges,
 423                        edgesWeight=E(g)$weight,
 424                        edgesAtt = edgesAtt,
 425                        #edgesVizAtt = list(size=as.matrix(E(g)$weight)),
 426                        nodesAtt = nodesAtt,
 427                        nodesVizAtt=list(color=cc, position=cbind(V(g)$x,V(g)$y, rep(0,ll)), size=V(g)$weight),
 428                        meta=c(list(creator="iramuteq", description="igraph -> gexf converted file", keywords="igraph, gexf, R, rgexf"), graphAtt))
 429
 430   print(output, filepath, replace=T)
 431 }
 432
 433
 434 merge.graph <- function(graphs) {
 435     library(colorspace)
 436     ng <- graph.union(graphs, byname=T)
 437     V.weight <- V(ng)$weight_1
 438     E.weight <- E(ng)$weight_1
 439     cols <- rainbow(length(graphs))
 440     V.color <- rep(cols[1], length(V.weight))
 441     for (i in 2:length(graphs)) {
 442         tw <- paste('weight_', i, sep='')
 443         tocomp <- get.vertex.attribute(ng,tw)
 444         totest <- intersect(which(!is.na(V.weight)), which(!is.na(tocomp)))
 445         maxmat <- cbind(V.weight[totest], tocomp[totest])
 446         resmax <- apply(maxmat, 1, which.max)
 447         ncolor <- c(cols[(i-1)], cols[i])
 448         #rbgcol1 <- col2rgb(cols[(i-1)])
 449         #rbgcol1 <- rbgcol1/255
 450         #rgbcol1 <- RGB(rbgcol1[1],rbgcol1[2],rbgcol1[3])
 451         rbgcol2 <- col2rgb(cols[i])
 452         rbgcol2 <- rbgcol2/255
 453         rgbcol2 <- RGB(rbgcol2[1],rbgcol2[2],rbgcol2[3])
 454         for (j in totest) {
 455             alpha <- tocomp[j] /(V.weight[j] + tocomp[j])
 456             rbgcol1 <- col2rgb(V.color[j])
 457             rbgcol1 <- rbgcol1/255
 458             #mix.col <- mixcolor(alpha,rbgcol1, rbgcol2)
 459             mix.col <- mixcolor(alpha, RGB(rbgcol1[1],rbgcol1[2],rbgcol1[3]), RGB(rbgcol2[1],rbgcol2[2],rbgcol2[3]))
 460             V.color[j] <- adjustcolor(hex(mix.col), 0.6)
 461         }
 462         #to.change <- totest[which(resmax == 2)]
 463         #V.color[to.change] <- cols[i]
 464         V.weight[totest] <- apply(maxmat, 1, max)
 465         nas <- which(is.na(V.weight))
 466         nas2 <- which(is.na(tocomp))
 467         fr2 <- setdiff(nas,nas2)
 468         V.weight[fr2] <- tocomp[fr2]
 469         V.color[fr2] <- cols[i]
 470         tocomp <- get.edge.attribute(ng, tw)
 471         totest <- intersect(which(!is.na(E.weight)), which(!is.na(tocomp)))
 472         maxmat <- cbind(E.weight[totest], tocomp[totest])
 473         resmax <- apply(maxmat, 1, which.max)
 474         E.weight[totest] <- apply(maxmat, 1, max)
 475         nas <- which(is.na(E.weight))
 476         nas2 <- which(is.na(tocomp))
 477         fr2 <- setdiff(nas,nas2)
 478         E.weight[fr2] <- tocomp[fr2]
 479     }
 480     V(ng)$weight <- V.weight
 481     print(V.color)
 482     V(ng)$color <- V.color
 483     E(ng)$weight <- E.weight
 484     ng
 485 }