X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=Rscripts%2Fsimi.R;h=2a4c8b7f58562da49a8901aa0d16387162cc613a;hp=0439a3b5dc1de84a9d7529d2d6b76c29b5a99bba;hb=23408178e223dd2080abf593547d7fc718b77f12;hpb=ed43a2036c36b4146a957ae2ba893e80b3c14342 diff --git a/Rscripts/simi.R b/Rscripts/simi.R old mode 100644 new mode 100755 index 0439a3b..2a4c8b7 --- a/Rscripts/simi.R +++ b/Rscripts/simi.R @@ -1,6 +1,6 @@ #from proxy package ############################################################# -#a, b, c, and d are the counts of all (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), and (FALSE, FALSE) +#a, b, c, and d are the counts of all (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), and (FALSE, FALSE) # n <- a + b + c + d = nrow(x) make.a <- function(x) { @@ -36,15 +36,34 @@ my.jaccard <- function(x) { jac } +#Col-wise Jaccard similarity +#http://stats.stackexchange.com/a/89947/2817 +sparse.jaccard <- function(x) { + A = crossprod(x) + ix = which(A > 0, arr.ind=TRUE) + b = colSums(x) + Aix = A[ix] + J = sparseMatrix( + i = ix[,1], + j = ix[,2], + x = Aix / (b[ix[,1]] + b[ix[,2]] - Aix), + dims = dim(A) + ) + colnames(J) <- colnames(x) + rownames(J) <- row.names(x) + return(J) +} + + prcooc <- function(x, a) { - prc <- (a / nrow(x)) + prc <- (a / nrow(x)) prc } make.bin <- function(cs, a, i, j, nb) { if (a[i, j] >= 1) { - ab <- a[i, j] - 1 + ab <- a[i, j] - 1 res <- binom.test(ab, nb, (cs[i]/nb) * (cs[j]/nb), "less") } else { res <- NULL @@ -114,7 +133,7 @@ BuildProf01<-function(x,classes) { mat } -do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.type = 'frutch', max.tree = TRUE, coeff.vertex=NULL, coeff.edge = NULL, minmaxeff=c(NULL,NULL), vcexminmax= c(NULL,NULL), cex = 1, coords = NULL, communities = NULL, halo = FALSE, fromcoords=NULL, forvertex=NULL) { +do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.type = 'frutch', max.tree = TRUE, coeff.vertex=NULL, coeff.edge = NULL, minmaxeff=c(NULL,NULL), vcexminmax= c(NULL,NULL), cex = 1, coords = NULL, communities = NULL, halo = FALSE, fromcoords=NULL, forvertex=NULL, index.word=NULL) { mat.simi <- x$mat mat.eff <- x$eff v.label <- colnames(mat.simi) @@ -167,7 +186,7 @@ do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.ty } if (!is.null(coeff.edge)) { #FIXME - we.width <- norm.vec(abs(E(g.toplot)$weight), coeff.edge[1], coeff.edge[2]) + we.width <- norm.vec(abs(E(g.toplot)$weight), coeff.edge[1], coeff.edge[2]) #we.width <- abs((E(g.toplot)$weight/max(abs(E(g.toplot)$weight)))*coeff.edge) } else { we.width <- NULL @@ -195,7 +214,18 @@ do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.ty if (is.null(coords)) { if (layout.type == 'frutch') { #lo <- layout_with_drl(g.toplot,dim=nd) - lo <- layout_with_fr(g.toplot,dim=nd, grid="grid", niter=10000, weights=1/E(g.toplot)$weight)#, start.temp = 1)#, ) + #lo <- layout_with_fr(g.toplot,dim=nd, grid="grid", niter=10000, weights=1/E(g.toplot)$weight)#, start.temp = 1)#, ) + if (nd==2) { + library(sna) + library(intergraph) + lo <- gplot.layout.fruchtermanreingold(asNetwork(g.toplot), list()) + detach("package:intergraph", unload=TRUE) + detach("package:sna", unload=TRUE) + detach("package:network", unload=TRUE) + library(igraph) + } else { + lo <- layout_with_fr(g.toplot,dim=nd) + } } if (layout.type == 'kawa') { lo <- layout_with_kk(g.toplot,dim=nd, weights=1/E(g.toplot)$weight, start=fromcoords, epsilon=0, maxiter = 10000) @@ -209,11 +239,15 @@ do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.ty lo <- layout_on_sphere(g.toplot) if (layout.type == 'graphopt') lo <- layout_as_tree(g.toplot, circular = TRUE) + if (layout.type == 'spirale') + lo <- spirale(g.toplot, E(g.toplot)$weight, index.word) + if (layout.type == 'spirale3D') + lo <- spirale3D(g.toplot, E(g.toplot)$weight, index.word) } else { lo <- coords } if (!is.null(communities)) { - if (communities == 0 ){ #'edge.betweenness.community') { + if (communities == 0 ){ com <- edge.betweenness.community(g.toplot) } else if (communities == 1) { com <- fastgreedy.community(g.toplot) @@ -229,14 +263,14 @@ do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.ty com <- spinglass.community(g.toplot) } else if (communities == 7) { com <- walktrap.community(g.toplot) - } + } } else { com <- NULL } - + out <- list(graph = g.toplot, mat.eff = mat.eff, eff = eff, mat = mat.simi, v.label = v.label, we.width = we.width, we.label=we.label, label.cex = label.cex, layout = lo, communities = com, halo = halo, elim=vec) } - + plot.simi <- function(graph.simi, p.type = 'tkplot',filename=NULL, communities = NULL, vertex.col = 'red', edge.col = 'black', edge.label = TRUE, vertex.label=TRUE, vertex.label.color = 'black', vertex.label.cex= NULL, vertex.size=NULL, leg=NULL, width = 800, height = 800, alpha = 0.1, cexalpha = FALSE, movie = NULL, edge.curved = TRUE, svg = FALSE, bg='white') { mat.simi <- graph.simi$mat g.toplot <- graph.simi$graph @@ -348,16 +382,20 @@ plot.simi <- function(graph.simi, p.type = 'tkplot',filename=NULL, communities = #play3d(spin3d(axis=c(0,1,0),rpm=6)) if (p.type == 'rglweb') { writeWebGL(dir = filename, width = width, height= height) - } else { + #rglwidget() + }# else { require(tcltk) ReturnVal <- tkmessageBox(title="RGL 3 D",message="Cliquez pour fermer",icon="info",type="ok") - } + #} rgl.close() # while (rgl.cur() != 0) # Sys.sleep(0.5) } else if (p.type == 'web') { library(rgexf) graph.simi$label.cex <- label.cex + if (length(vertex.col)==1) { + vertex.col <- rep(vertex.col, length(v.label)) + } graph.simi$color <- vertex.col label <- v.label nodes.attr <- data.frame(label) @@ -457,7 +495,8 @@ merge.graph <- function(graphs) { rbgcol1 <- rbgcol1/255 #mix.col <- mixcolor(alpha,rbgcol1, rbgcol2) mix.col <- mixcolor(alpha, RGB(rbgcol1[1],rbgcol1[2],rbgcol1[3]), RGB(rbgcol2[1],rbgcol2[2],rbgcol2[3])) - V.color[j] <- adjustcolor(hex(mix.col), 0.6) + V.color[j] <- hex(mix.col) + #V.color[j] <- adjustcolor(hex(mix.col), 0.6) } #to.change <- totest[which(resmax == 2)] #V.color[to.change] <- cols[i] @@ -480,5 +519,145 @@ merge.graph <- function(graphs) { V(ng)$weight <- V.weight V(ng)$color <- V.color E(ng)$weight <- E.weight + colors <- col2rgb(V(ng)$color) + V(ng)$r <- colors["red", ] + V(ng)$g <- colors["green", ] + V(ng)$b <- colors["blue", ] + ng +} + +merge.graph.proto <- function(graphs) { + library(colorspace) + ng <- graph.union(graphs, byname=T) + V.weight <- V(ng)$weight_1 + E.weight <- E(ng)$weight_1 + V.proto.color <- V(ng)$proto.color_1 + cols <- rainbow(length(graphs)) + V.color <- rep(cols[1], length(V.weight)) + for (i in 2:length(graphs)) { + tw <- paste('weight_', i, sep='') + tocomp <- get.vertex.attribute(ng,tw) + totest <- intersect(which(!is.na(V.weight)), which(!is.na(tocomp))) + maxmat <- cbind(V.weight[totest], tocomp[totest]) + resmax <- apply(maxmat, 1, which.max) + V.weight[totest] <- apply(maxmat, 1, max) + nas <- which(is.na(V.weight)) + nas2 <- which(is.na(tocomp)) + fr2 <- setdiff(nas,nas2) + V.weight[fr2] <- tocomp[fr2] + + cw <- paste('proto.color_', i, sep='') + tocomp.col <- get.vertex.attribute(ng,cw) + which.sup <- which(resmax==2) + V.proto.color[totest[which.sup]] <- tocomp.col[totest[which.sup]] + V.proto.color[fr2] <- tocomp.col[fr2] + + V.color[totest[which.sup]] <- cols[i] + V.color[fr2] <- cols[i] + + tocomp <- get.edge.attribute(ng, tw) + totest <- intersect(which(!is.na(E.weight)), which(!is.na(tocomp))) + maxmat <- cbind(E.weight[totest], tocomp[totest]) + resmax <- apply(maxmat, 1, which.max) + E.weight[totest] <- apply(maxmat, 1, max) + nas <- which(is.na(E.weight)) + nas2 <- which(is.na(tocomp)) + fr2 <- setdiff(nas,nas2) + E.weight[fr2] <- tocomp[fr2] + } + V(ng)$weight <- V.weight + V(ng)$proto.color <- V.proto.color + V(ng)$color <- V.proto.color + E(ng)$weight <- E.weight + V(ng)$ocolor <- V.color + colors <- col2rgb(V(ng)$color) + V(ng)$r <- colors["red", ] + V(ng)$g <- colors["green", ] + V(ng)$b <- colors["blue", ] ng } + + +spirale <- function(g, weigth, center, miny=0.1) { + ncoord <- matrix(0, nrow=length(weigth)+1, ncol=2) + v.names <- V(g)$name + center.name <- v.names[center] + first <- which.max(weigth)[1] + if (head_of(g, first)$name == center.name) { + n.name <- tail_of(g, first) + } else { + n.name <- head_of(g, first) + } + n.name <- n.name$name + nb <- length(weigth) + ncoord[which(v.names==n.name),] <- c(0,1) + weigth[first] <- 0 + rs <- norm.vec(weigth,1, miny) + nbt <- nb %/% 50 + if (nbt == 0) nbt <- 1 + angler <- ((360 * nbt) / (nb- 1)) * (pi/180) + ang <- 90 * (pi/180) + rr <- (1-miny) / (nb-1) + r <- 1 + while (max(weigth != 0)) { + first <- which.max(weigth)[1] + if (head_of(g, first)$name == center.name) { + n.name <- tail_of(g, first) + } else { + n.name <- head_of(g, first) + } + n.name <- n.name$name + #r <- rs[first] + r <- r - rr + ang <- ang + angler + x <- r * cos(ang) + y <- r * sin(ang) + weigth[first] <- 0 + ncoord[which(v.names==n.name),] <- c(x,y) + } + ncoord +} + +spirale3D <- function(g, weigth, center, miny=0.1) { + ncoord <- matrix(0, nrow=length(weigth)+1, ncol=3) + v.names <- V(g)$name + center.name <- v.names[center] + first <- which.max(weigth)[1] + if (head_of(g, first)$name == center.name) { + n.name <- tail_of(g, first) + } else { + n.name <- head_of(g, first) + } + n.name <- n.name$name + nb <- length(weigth) + ncoord[which(v.names==n.name),] <- c(0,0,1) + weigth[first] <- 0 + rs <- norm.vec(weigth,1, miny) + nbt <- nb %/% 50 + if (nbt == 0) nbt <- 1 + angler <- ((360 * nbt) / (nb- 1)) * (pi/180) + theta <- 0 + phi <- 90 * (pi/180) + rr <- (1-miny) / (nb-1) + r <- 1 + while (max(weigth != 0)) { + first <- which.max(weigth)[1] + if (head_of(g, first)$name == center.name) { + n.name <- tail_of(g, first) + } else { + n.name <- head_of(g, first) + } + n.name <- n.name$name + #r <- rs[first] + r <- r - rr + theta <- theta + angler + phi <- phi + angler/2 + x <- r * sin(theta) * cos(phi) + y <- r * sin(theta) * sin(phi) + z <- r * cos(theta) + weigth[first] <- 0 + ncoord[which(v.names==n.name),] <- c(x,y,z) + } + ncoord +} +