X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=Rscripts%2Fsimi.R;h=2a4c8b7f58562da49a8901aa0d16387162cc613a;hp=48b871d4e42240ad78ad6d2937d6a89360cb3d5b;hb=23408178e223dd2080abf593547d7fc718b77f12;hpb=2301b7f97349d29b07fe4f51d30af2721280687a diff --git a/Rscripts/simi.R b/Rscripts/simi.R old mode 100644 new mode 100755 index 48b871d..2a4c8b7 --- a/Rscripts/simi.R +++ b/Rscripts/simi.R @@ -1,6 +1,6 @@ #from proxy package ############################################################# -#a, b, c, and d are the counts of all (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), and (FALSE, FALSE) +#a, b, c, and d are the counts of all (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), and (FALSE, FALSE) # n <- a + b + c + d = nrow(x) make.a <- function(x) { @@ -31,20 +31,39 @@ my.jaccard <- function(x) { a <- make.a(x) b <- make.b(x) c <- make.c(x) - d <- make.d(x, a, b, c) + #d <- make.d(x, a, b, c) jac <- a / (a + b + c) jac } +#Col-wise Jaccard similarity +#http://stats.stackexchange.com/a/89947/2817 +sparse.jaccard <- function(x) { + A = crossprod(x) + ix = which(A > 0, arr.ind=TRUE) + b = colSums(x) + Aix = A[ix] + J = sparseMatrix( + i = ix[,1], + j = ix[,2], + x = Aix / (b[ix[,1]] + b[ix[,2]] - Aix), + dims = dim(A) + ) + colnames(J) <- colnames(x) + rownames(J) <- row.names(x) + return(J) +} + + prcooc <- function(x, a) { - prc <- (a / nrow(x)) + prc <- (a / nrow(x)) prc } make.bin <- function(cs, a, i, j, nb) { if (a[i, j] >= 1) { - ab <- a[i, j] - 1 + ab <- a[i, j] - 1 res <- binom.test(ab, nb, (cs[i]/nb) * (cs[j]/nb), "less") } else { res <- NULL @@ -114,7 +133,7 @@ BuildProf01<-function(x,classes) { mat } -do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.type = 'frutch', max.tree = TRUE, coeff.vertex=NULL, coeff.edge = NULL, minmaxeff=c(NULL,NULL), vcexminmax= c(NULL,NULL), cex = 1, coords = NULL, communities = NULL, halo = FALSE) { +do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.type = 'frutch', max.tree = TRUE, coeff.vertex=NULL, coeff.edge = NULL, minmaxeff=c(NULL,NULL), vcexminmax= c(NULL,NULL), cex = 1, coords = NULL, communities = NULL, halo = FALSE, fromcoords=NULL, forvertex=NULL, index.word=NULL) { mat.simi <- x$mat mat.eff <- x$eff v.label <- colnames(mat.simi) @@ -138,7 +157,7 @@ do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.ty } if (!is.null(seuil)) { - if (seuil >= max(mat.simi)) seuil <- 0 + if (seuil >= max(mat.simi)) seuil <- -Inf vec<-vector() w<-E(g.toplot)$weight tovire <- which(w<=seuil) @@ -166,39 +185,69 @@ do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.ty label.cex = cex } if (!is.null(coeff.edge)) { - we.width <- norm.vec(abs(E(g.toplot)$weight), coeff.edge[1], coeff.edge[2]) + #FIXME + we.width <- norm.vec(abs(E(g.toplot)$weight), coeff.edge[1], coeff.edge[2]) #we.width <- abs((E(g.toplot)$weight/max(abs(E(g.toplot)$weight)))*coeff.edge) } else { we.width <- NULL } if (method != 'binom') { - we.label <- round(E(g.toplot)$weight,2) - } else { we.label <- round(E(g.toplot)$weight,3) + } else { + we.label <- round(E(g.toplot)$weight,4) } if (p.type=='rgl' || p.type=='rglweb') { nd<-3 } else { nd<-2 } + if (! is.null(fromcoords)) { + newfrom <- matrix(runif(nd*length(V(g.toplot)$name),min(fromcoords)),max(fromcoords),ncol=nd, nrow=length(V(g.toplot)$name)) + for (i in 1:length(V(g.toplot)$name)) { + if(V(g.toplot)$name[i] %in% forvertex) { + newfrom[i,] <- fromcoords[which(forvertex==V(g.toplot)$name[i]),] + } + } + fromcoords <- newfrom + } + #print(layout.type) if (is.null(coords)) { - if (layout.type == 'frutch') - lo <- layout.fruchterman.reingold(g.toplot,dim=nd)#, weightsA=E(g.toplot)$weight) - if (layout.type == 'kawa') - lo <- layout.kamada.kawai(g.toplot,dim=nd) + if (layout.type == 'frutch') { + #lo <- layout_with_drl(g.toplot,dim=nd) + #lo <- layout_with_fr(g.toplot,dim=nd, grid="grid", niter=10000, weights=1/E(g.toplot)$weight)#, start.temp = 1)#, ) + if (nd==2) { + library(sna) + library(intergraph) + lo <- gplot.layout.fruchtermanreingold(asNetwork(g.toplot), list()) + detach("package:intergraph", unload=TRUE) + detach("package:sna", unload=TRUE) + detach("package:network", unload=TRUE) + library(igraph) + } else { + lo <- layout_with_fr(g.toplot,dim=nd) + } + } + if (layout.type == 'kawa') { + lo <- layout_with_kk(g.toplot,dim=nd, weights=1/E(g.toplot)$weight, start=fromcoords, epsilon=0, maxiter = 10000) + #print(lo) + } if (layout.type == 'random') - lo <- layout.random(g.toplot,dim=nd) + lo <- layout_on_grid(g.toplot,dim=nd) if (layout.type == 'circle' & p.type != 'rgl') - lo <- layout.circle(g.toplot) + lo <- layout_in_circle(g.toplot) if (layout.type == 'circle' & p.type == 'rgl') - lo <- layout.sphere(g.toplot) + lo <- layout_on_sphere(g.toplot) if (layout.type == 'graphopt') - lo <- layout.graphopt(g.toplot) + lo <- layout_as_tree(g.toplot, circular = TRUE) + if (layout.type == 'spirale') + lo <- spirale(g.toplot, E(g.toplot)$weight, index.word) + if (layout.type == 'spirale3D') + lo <- spirale3D(g.toplot, E(g.toplot)$weight, index.word) } else { lo <- coords } if (!is.null(communities)) { - if (communities == 0 ){ #'edge.betweenness.community') { + if (communities == 0 ){ com <- edge.betweenness.community(g.toplot) } else if (communities == 1) { com <- fastgreedy.community(g.toplot) @@ -214,14 +263,14 @@ do.simi <- function(x, method = 'cooc',seuil = NULL, p.type = 'tkplot',layout.ty com <- spinglass.community(g.toplot) } else if (communities == 7) { com <- walktrap.community(g.toplot) - } + } } else { com <- NULL } - + out <- list(graph = g.toplot, mat.eff = mat.eff, eff = eff, mat = mat.simi, v.label = v.label, we.width = we.width, we.label=we.label, label.cex = label.cex, layout = lo, communities = com, halo = halo, elim=vec) } - + plot.simi <- function(graph.simi, p.type = 'tkplot',filename=NULL, communities = NULL, vertex.col = 'red', edge.col = 'black', edge.label = TRUE, vertex.label=TRUE, vertex.label.color = 'black', vertex.label.cex= NULL, vertex.size=NULL, leg=NULL, width = 800, height = 800, alpha = 0.1, cexalpha = FALSE, movie = NULL, edge.curved = TRUE, svg = FALSE, bg='white') { mat.simi <- graph.simi$mat g.toplot <- graph.simi$graph @@ -243,11 +292,13 @@ plot.simi <- function(graph.simi, p.type = 'tkplot',filename=NULL, communities = we.label <- NA } lo <- graph.simi$layout + #rownames(lo) <- v.label if (!is.null(vertex.label.cex)) { label.cex<-vertex.label.cex } else { label.cex = graph.simi$label.cex } + if (cexalpha) { alphas <- norm.vec(label.cex, 0.5,1) nvlc <- NULL @@ -331,16 +382,24 @@ plot.simi <- function(graph.simi, p.type = 'tkplot',filename=NULL, communities = #play3d(spin3d(axis=c(0,1,0),rpm=6)) if (p.type == 'rglweb') { writeWebGL(dir = filename, width = width, height= height) - } else { + #rglwidget() + }# else { require(tcltk) ReturnVal <- tkmessageBox(title="RGL 3 D",message="Cliquez pour fermer",icon="info",type="ok") - } + #} rgl.close() # while (rgl.cur() != 0) # Sys.sleep(0.5) } else if (p.type == 'web') { library(rgexf) - simi.to.gexf(filename, graph.simi, nodes.attr = NULL) + graph.simi$label.cex <- label.cex + if (length(vertex.col)==1) { + vertex.col <- rep(vertex.col, length(v.label)) + } + graph.simi$color <- vertex.col + label <- v.label + nodes.attr <- data.frame(label) + simi.to.gexf(filename, graph.simi, nodes.attr = nodes.attr) } } @@ -408,3 +467,197 @@ saveAsGEXF = function(g, filepath="converted_graph.gexf") print(output, filepath, replace=T) } + + +merge.graph <- function(graphs) { + library(colorspace) + ng <- graph.union(graphs, byname=T) + V.weight <- V(ng)$weight_1 + E.weight <- E(ng)$weight_1 + cols <- rainbow(length(graphs)) + V.color <- rep(cols[1], length(V.weight)) + for (i in 2:length(graphs)) { + tw <- paste('weight_', i, sep='') + tocomp <- get.vertex.attribute(ng,tw) + totest <- intersect(which(!is.na(V.weight)), which(!is.na(tocomp))) + maxmat <- cbind(V.weight[totest], tocomp[totest]) + resmax <- apply(maxmat, 1, which.max) + ncolor <- c(cols[(i-1)], cols[i]) + #rbgcol1 <- col2rgb(cols[(i-1)]) + #rbgcol1 <- rbgcol1/255 + #rgbcol1 <- RGB(rbgcol1[1],rbgcol1[2],rbgcol1[3]) + rbgcol2 <- col2rgb(cols[i]) + rbgcol2 <- rbgcol2/255 + #rgbcol2 <- RGB(rbgcol2[1],rbgcol2[2],rbgcol2[3]) + for (j in totest) { + alpha <- tocomp[j] /(V.weight[j] + tocomp[j]) + rbgcol1 <- col2rgb(V.color[j]) + rbgcol1 <- rbgcol1/255 + #mix.col <- mixcolor(alpha,rbgcol1, rbgcol2) + mix.col <- mixcolor(alpha, RGB(rbgcol1[1],rbgcol1[2],rbgcol1[3]), RGB(rbgcol2[1],rbgcol2[2],rbgcol2[3])) + V.color[j] <- hex(mix.col) + #V.color[j] <- adjustcolor(hex(mix.col), 0.6) + } + #to.change <- totest[which(resmax == 2)] + #V.color[to.change] <- cols[i] + V.weight[totest] <- apply(maxmat, 1, max) + nas <- which(is.na(V.weight)) + nas2 <- which(is.na(tocomp)) + fr2 <- setdiff(nas,nas2) + V.weight[fr2] <- tocomp[fr2] + V.color[fr2] <- cols[i] + tocomp <- get.edge.attribute(ng, tw) + totest <- intersect(which(!is.na(E.weight)), which(!is.na(tocomp))) + maxmat <- cbind(E.weight[totest], tocomp[totest]) + resmax <- apply(maxmat, 1, which.max) + E.weight[totest] <- apply(maxmat, 1, max) + nas <- which(is.na(E.weight)) + nas2 <- which(is.na(tocomp)) + fr2 <- setdiff(nas,nas2) + E.weight[fr2] <- tocomp[fr2] + } + V(ng)$weight <- V.weight + V(ng)$color <- V.color + E(ng)$weight <- E.weight + colors <- col2rgb(V(ng)$color) + V(ng)$r <- colors["red", ] + V(ng)$g <- colors["green", ] + V(ng)$b <- colors["blue", ] + ng +} + +merge.graph.proto <- function(graphs) { + library(colorspace) + ng <- graph.union(graphs, byname=T) + V.weight <- V(ng)$weight_1 + E.weight <- E(ng)$weight_1 + V.proto.color <- V(ng)$proto.color_1 + cols <- rainbow(length(graphs)) + V.color <- rep(cols[1], length(V.weight)) + for (i in 2:length(graphs)) { + tw <- paste('weight_', i, sep='') + tocomp <- get.vertex.attribute(ng,tw) + totest <- intersect(which(!is.na(V.weight)), which(!is.na(tocomp))) + maxmat <- cbind(V.weight[totest], tocomp[totest]) + resmax <- apply(maxmat, 1, which.max) + V.weight[totest] <- apply(maxmat, 1, max) + nas <- which(is.na(V.weight)) + nas2 <- which(is.na(tocomp)) + fr2 <- setdiff(nas,nas2) + V.weight[fr2] <- tocomp[fr2] + + cw <- paste('proto.color_', i, sep='') + tocomp.col <- get.vertex.attribute(ng,cw) + which.sup <- which(resmax==2) + V.proto.color[totest[which.sup]] <- tocomp.col[totest[which.sup]] + V.proto.color[fr2] <- tocomp.col[fr2] + + V.color[totest[which.sup]] <- cols[i] + V.color[fr2] <- cols[i] + + tocomp <- get.edge.attribute(ng, tw) + totest <- intersect(which(!is.na(E.weight)), which(!is.na(tocomp))) + maxmat <- cbind(E.weight[totest], tocomp[totest]) + resmax <- apply(maxmat, 1, which.max) + E.weight[totest] <- apply(maxmat, 1, max) + nas <- which(is.na(E.weight)) + nas2 <- which(is.na(tocomp)) + fr2 <- setdiff(nas,nas2) + E.weight[fr2] <- tocomp[fr2] + } + V(ng)$weight <- V.weight + V(ng)$proto.color <- V.proto.color + V(ng)$color <- V.proto.color + E(ng)$weight <- E.weight + V(ng)$ocolor <- V.color + colors <- col2rgb(V(ng)$color) + V(ng)$r <- colors["red", ] + V(ng)$g <- colors["green", ] + V(ng)$b <- colors["blue", ] + ng +} + + +spirale <- function(g, weigth, center, miny=0.1) { + ncoord <- matrix(0, nrow=length(weigth)+1, ncol=2) + v.names <- V(g)$name + center.name <- v.names[center] + first <- which.max(weigth)[1] + if (head_of(g, first)$name == center.name) { + n.name <- tail_of(g, first) + } else { + n.name <- head_of(g, first) + } + n.name <- n.name$name + nb <- length(weigth) + ncoord[which(v.names==n.name),] <- c(0,1) + weigth[first] <- 0 + rs <- norm.vec(weigth,1, miny) + nbt <- nb %/% 50 + if (nbt == 0) nbt <- 1 + angler <- ((360 * nbt) / (nb- 1)) * (pi/180) + ang <- 90 * (pi/180) + rr <- (1-miny) / (nb-1) + r <- 1 + while (max(weigth != 0)) { + first <- which.max(weigth)[1] + if (head_of(g, first)$name == center.name) { + n.name <- tail_of(g, first) + } else { + n.name <- head_of(g, first) + } + n.name <- n.name$name + #r <- rs[first] + r <- r - rr + ang <- ang + angler + x <- r * cos(ang) + y <- r * sin(ang) + weigth[first] <- 0 + ncoord[which(v.names==n.name),] <- c(x,y) + } + ncoord +} + +spirale3D <- function(g, weigth, center, miny=0.1) { + ncoord <- matrix(0, nrow=length(weigth)+1, ncol=3) + v.names <- V(g)$name + center.name <- v.names[center] + first <- which.max(weigth)[1] + if (head_of(g, first)$name == center.name) { + n.name <- tail_of(g, first) + } else { + n.name <- head_of(g, first) + } + n.name <- n.name$name + nb <- length(weigth) + ncoord[which(v.names==n.name),] <- c(0,0,1) + weigth[first] <- 0 + rs <- norm.vec(weigth,1, miny) + nbt <- nb %/% 50 + if (nbt == 0) nbt <- 1 + angler <- ((360 * nbt) / (nb- 1)) * (pi/180) + theta <- 0 + phi <- 90 * (pi/180) + rr <- (1-miny) / (nb-1) + r <- 1 + while (max(weigth != 0)) { + first <- which.max(weigth)[1] + if (head_of(g, first)$name == center.name) { + n.name <- tail_of(g, first) + } else { + n.name <- head_of(g, first) + } + n.name <- n.name$name + #r <- rs[first] + r <- r - rr + theta <- theta + angler + phi <- phi + angler/2 + x <- r * sin(theta) * cos(phi) + y <- r * sin(theta) * sin(phi) + z <- r * cos(theta) + weigth[first] <- 0 + ncoord[which(v.names==n.name),] <- c(x,y,z) + } + ncoord +} +