X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=PrintRScript.py;h=0673aa491013f6a0381bacf5ae846d43e943aa5e;hp=3311a123cfe68357bd1bf6a3cd8d56f299b405e1;hb=bf4bf75287ff2a34d072cf7f7c0dea6ba4281a24;hpb=b4d905ea7bcebb3b1d7be8349ac66fb95a0948c1 diff --git a/PrintRScript.py b/PrintRScript.py old mode 100644 new mode 100755 index 3311a12..0673aa4 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -13,11 +13,14 @@ import logging log = logging.getLogger('iramuteq.printRscript') class PrintRScript : - def __init__ (self, analyse): + def __init__ (self, analyse, parametres = None): log.info('Rscript') self.pathout = analyse.pathout self.analyse = analyse - self.parametres = analyse.parametres + if parametres is None : + self.parametres = analyse.parametres + else : + self.parametres = parametres #self.scriptout = ffr(self.pathout['lastRscript.R']) self.scriptout = self.pathout['temp'] self.script = u"#Script genere par IRaMuTeQ - %s\n" % datetime.now().ctime() @@ -110,7 +113,7 @@ class Alceste2(PrintRScript) : # -def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'svdR', libsvdc = False, libsvdc_path = None, R_max_mem = False, mode_patate = False): +def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'svdR', libsvdc = False, libsvdc_path = None, R_max_mem = False, mode_patate = False, nbproc=1): txt = """ source("%s") source("%s") @@ -164,15 +167,18 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'sv """ % ffr(DicoPath['TableUc2']) txt += """ log1 <- "%s" - chd1<-CHD(data1, x = nbt, mode.patate = mode.patate, svd.method = - svd.method, libsvdc.path = libsvdc.path, log.file = log1) - """ % ffr(DicoPath['log-chd1.txt']) + print('FIXME : source newCHD') + source('/home/pierre/workspace/iramuteq/Rscripts/newCHD.R') + nbproc <- %s + chd1<-CHD(data1, x = nbt, mode.patate = mode.patate, svd.method = svd.method, libsvdc.path = libsvdc.path, find='matrix', select.next='size', sample=20, amp=500, proc.nb=nbproc) + #chd1<-CHD(data1, x = nbt, mode.patate = mode.patate, svd.method = svd.method, libsvdc.path = libsvdc.path)#, log.file = log1) + """ % (ffr(DicoPath['log-chd1.txt']), nbproc) if classif_mode == 0: txt += """ log2 <- "%s" chd2<-CHD(data2, x = nbt, mode.patate = mode.patate, svd.method = - svd.method, libsvdc.path = libsvdc.path) log.file = log2) + svd.method, libsvdc.path = libsvdc.path)#, log.file = log2) """ % ffr(DicoPath['log-chd2.txt']) txt += """ @@ -196,21 +202,29 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'sv txt += """ classif_mode <- %i mincl <- %i + if (mincl == 0) {mincl <- round(nrow(chd1$n1)/(nbt+1))} uceout <- "%s" + write.csv2(chd1$n1, file="%s") if (classif_mode == 0) { chd.result <- Rchdtxt(uceout, chd1, chd2 = chd2, mincl = mincl,classif_mode = classif_mode, nbt = nbt) + classeuce1 <- chd.result$cuce1 + tree.tot1 <- make_tree_tot(chd1) + tree.cut1 <- make_dendro_cut_tuple(tree.tot1$dendro_tuple, chd.result$coord_ok, classeuce1, 1, nbt) + } else { - chd.result <- Rchdtxt(uceout, chd1, chd2 = chd1, mincl = mincl,classif_mode = classif_mode, nbt = nbt) + #chd.result <- Rchdtxt(uceout, chd1, chd2 = chd1, mincl = mincl,classif_mode = classif_mode, nbt = nbt) + tree.tot1 <- make_tree_tot(chd1) + terminales <- find.terminales(chd1$n1, chd1$list_mere, chd1$list_fille, mincl) + tree.cut1 <- make.classes(terminales, chd1$n1, tree.tot1$tree.cl, chd1$list_fille) + write.csv2(tree.cut1$n1, uceout) + chd.result <- tree.cut1 } - n1 <- chd.result$n1 - classeuce1 <- chd.result$cuce1 - classes<-n1[,ncol(n1)] - write.csv2(n1, file="%s") - rm(n1) - """ % (classif_mode, mincl, ffr(DicoPath['uce']), ffr(DicoPath['n1.csv'])) + classes<-chd.result$n1[,ncol(chd.result$n1)] + write.csv2(chd.result$n1, file="%s") + """ % (classif_mode, mincl, ffr(DicoPath['uce']), ffr(DicoPath['n1-1.csv']), ffr(DicoPath['n1.csv'])) txt += """ - tree.tot1 <- make_tree_tot(chd1) +# tree.tot1 <- make_tree_tot(chd1) # open_file_graph("%s", widt = 600, height=400) # plot(tree.tot1$tree.cl) # dev.off() @@ -226,8 +240,7 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'sv """ % ffr(DicoPath['arbre2'] ) txt += """ - tree.cut1 <- make_dendro_cut_tuple(tree.tot1$dendro_tuple, chd.result$coord_ok, classeuce1, 1, nbt) - save(tree.cut1, file="%s") + save(tree.cut1, file="%s") open_file_graph("%s", width = 600, height=400) plot.dendropr(tree.cut1$tree.cl,classes, histo=TRUE) @@ -337,9 +350,14 @@ datasup<-read.csv2("%s", header = FALSE, sep = ';',quote = '\"', row.names = 1, dataet<-read.csv2("%s", header = FALSE, sep = ';',quote = '\"', row.names = 1, na.strings = 'NA') """ % (ffr(DictChdTxtOut['Contout']), ffr(DictChdTxtOut['ContSupOut']), ffr(DictChdTxtOut['ContEtOut'])) txt += """ -tablesqrpact<-BuildProf(as.matrix(dataact),n1,clnb) -tablesqrpsup<-BuildProf(as.matrix(datasup),n1,clnb) -tablesqrpet<-BuildProf(as.matrix(dataet),n1,clnb) +print('ATTENTION NEW BUILD PROF') +#tablesqrpact<-BuildProf(as.matrix(dataact),n1,clnb) +#tablesqrpsup<-BuildProf(as.matrix(datasup),n1,clnb) +#tablesqrpet<-BuildProf(as.matrix(dataet),n1,clnb) +tablesqrpact<-new.build.prof(as.matrix(dataact),n1,clnb) +tablesqrpsup<-new.build.prof(as.matrix(datasup),n1,clnb) +tablesqrpet<-new.build.prof(as.matrix(dataet),n1,clnb) + """ txt += """ PrintProfile(n1,tablesqrpact[4],tablesqrpet[4],tablesqrpact[5],tablesqrpet[5],clnb,"%s","%s",tablesqrpsup[4],tablesqrpsup[5]) @@ -450,12 +468,20 @@ def write_afc_graph(self): if self.param['svg'] : svg = 'TRUE' else : svg = 'FALSE' + if self.param['typegraph'] == 4 : + nodesfile = os.path.join(os.path.dirname(self.fileout),'nodes.csv') + edgesfile = os.path.join(os.path.dirname(self.fileout),'edges.csv') + else : + nodesfile = 'NULL' + edgesfile = 'NULL' + with open(self.RscriptsPath['afc_graph'], 'r') as f: txt = f.read() # self.DictPathOut['RData'], \ scripts = txt % (ffr(self.RscriptsPath['Rgraph']),\ self.param['typegraph'], \ + edgesfile, nodesfile, \ self.param['what'], \ self.param['facteur'][0],\ self.param['facteur'][1], \ @@ -592,7 +618,7 @@ def barplot(table, parametres, intxt = False) : height <- %i open_file_graph("%s",width = width, height = height, svg = %s) par(mar=c(0,0,0,0)) - layout(matrix(c(1,2),1,2, byrow=TRUE),widths=c(3,lcm(7))) + layout(matrix(c(1,2),1,2, byrow=TRUE),widths=c(3,lcm(12))) par(mar=c(8,4,1,0)) yp = ifelse(length(toinf), 0.2, 0) ym = ifelse(length(tominf), 0.2, 0) @@ -723,10 +749,11 @@ class PrintSimiScript(PrintRScript) : else : txt += """ word <- FALSE + index <- NULL """ txt += """ dm <-readMM(dm.path) - cn <- read.table(cn.path, sep='\t', quote='"') + cn <- read.table(cn.path, sep="\t", quote='"') colnames(dm) <- cn[,1] if (file.exists(selected.col)) { sel.col <- read.csv2(selected.col, header = FALSE) @@ -783,7 +810,7 @@ class PrintSimiScript(PrintRScript) : txt += """ load("%s") """ % ffr(self.pathout['RData.RData']) - + if self.parametres['coeff'] == 0 : method = 'cooc' if not self.parametres['keep_coord'] : @@ -791,6 +818,13 @@ class PrintSimiScript(PrintRScript) : method <- 'cooc' mat <- make.a(dm) """ + elif self.analyse.indices[self.parametres['coeff']] == 'Jaccard' : + method = 'Jaccard' + if not self.parametres['keep_coord'] : + txt += """ + method <- 'Jaccard' + mat <- sparse.jaccard(dm) + """ else : if not self.parametres['keep_coord'] : txt += """ @@ -809,7 +843,7 @@ class PrintSimiScript(PrintRScript) : method <- 'binomial' mat <- binom.sim(dm) """ - elif self.parametres['coeff'] != 0 : + elif self.parametres['coeff'] != 0 and self.analyse.indices[self.parametres['coeff']] != 'Jaccard': method = self.analyse.indices[self.parametres['coeff']] if not self.parametres['keep_coord'] : txt += """ @@ -847,10 +881,13 @@ class PrintSimiScript(PrintRScript) : txt += """ mat <- graph.word(mat, index) cs <- colSums(mat) - if (length(cs)) mat <- mat[,-which(cs==0)] + if (length(which(cs==0))) mat <- mat[,-which(cs==0)] rs <- rowSums(mat) - if (length(rs)) mat <- mat[-which(rs==0),] - if (length(cs)) dm <- dm[, -which(cs==0)] + if (length(which(rs==0))) mat <- mat[-which(rs==0),] + if (length(which(cs==0))) dm <- dm[,-which(cs==0)] + if (word) { + index <- which(colnames(mat)==forme) + } """ if self.parametres['layout'] == 0 : layout = 'random' @@ -858,6 +895,8 @@ class PrintSimiScript(PrintRScript) : if self.parametres['layout'] == 2 : layout = 'frutch' if self.parametres['layout'] == 3 : layout = 'kawa' if self.parametres['layout'] == 4 : layout = 'graphopt' + if self.parametres['layout'] == 5 : layout = 'spirale' + if self.parametres['layout'] == 6 : layout = 'spirale3D' self.filename='' @@ -877,7 +916,7 @@ class PrintSimiScript(PrintRScript) : while os.path.exists(os.path.join(dirout,'web_'+str(graphnb))): graphnb +=1 self.filename = ffr(os.path.join(dirout,'web_'+str(graphnb))) - os.mkdir(self.filename) + os.mkdir(self.filename) self.filename = os.path.join(self.filename, 'gexf.gexf') if self.parametres['type_graph'] == 4 : graphnb = 1 @@ -1064,7 +1103,7 @@ class PrintSimiScript(PrintRScript) : txt += """ eff <- colSums(dm) x <- list(mat = mat, eff = eff) - graph.simi <- do.simi(x, method='%s', seuil = seuil, p.type = '%s', layout.type = '%s', max.tree = %s, coeff.vertex=%s, coeff.edge = %s, minmaxeff = minmaxeff, vcexminmax = vcexminmax, cex = cex, coords = coords, communities = communities, halo = halo) + graph.simi <- do.simi(x, method='%s', seuil = seuil, p.type = '%s', layout.type = '%s', max.tree = %s, coeff.vertex=%s, coeff.edge = %s, minmaxeff = minmaxeff, vcexminmax = vcexminmax, cex = cex, coords = coords, communities = communities, halo = halo, index.word=index) """ % (method, type, layout, arbremax, coeff_tv, coeff_te) if self.parametres.get('bystar',False) : @@ -1127,6 +1166,17 @@ class PrintSimiScript(PrintRScript) : """ % svg txt += """ vertex.col <- cols + col.from.proto <- F + if (col.from.proto) { + proto.col <- read.table('/tmp/matcol.csv') + v.proto.names <- make.names(proto.col[,1]) + v.proto.col <- as.character(proto.col[,2]) + v.proto.col[which(v.proto.col=='black')] <- 'yellow' + v.names <- V(graph.simi$graph)$name + num.color <- sapply(v.names, function(x) {if (x %%in%% v.proto.names) {v.proto.col[which(v.proto.names==x)]} else {'pink'}}) + vertex.col <- num.color + V(graph.simi$graph)$proto.color <- vertex.col + } if (!is.null(graph.simi$com)) { com <- graph.simi$com colm <- rainbow(length(com)) @@ -1137,6 +1187,12 @@ class PrintSimiScript(PrintRScript) : vertex.label.color <- colm[membership(com)] } } + if (!length(graph.simi$elim)==0) { + vertex.label.color <- vertex.label.color[-graph.simi$elim] + if (length(label.cex > 1)) { + label.cex <- label.cex[-graph.simi$elim] + } + } coords <- plot.simi(graph.simi, p.type='%s',filename="%s", vertex.label = label.v, edge.label = label.e, vertex.col = vertex.col, vertex.label.color = vertex.label.color, vertex.label.cex=label.cex, vertex.size = vertex.size, edge.col = cola, leg=leg, width = width, height = height, alpha = alpha, movie = film, edge.curved = edge.curved, svg = svg) save.image(file="%s") """ % (type, self.filename, ffr(self.pathout['RData'])) @@ -1192,7 +1248,7 @@ class ProtoScript(PrintRScript) : } mat <- read.csv2("%s", header = FALSE, row.names=1, sep='\t', quote='"', dec='.') open_file_graph("%s",height=800, width=1000) - prototypical(mat, mfreq = %s, mrank = %s, cloud = FALSE, cexrange=c(1,2.4), cexalpha= c(0.4, 1), type = '%s') + prototypical(mat, mfreq = %s, mrank = %s, cloud = FALSE, cexrange=c(1,2.4), cexalpha= c(0.4, 1), type = '%s', mat.col.path='/tmp/matcol.csv') dev.off() """ % (ffr(self.analyse.pathout['table.csv']), ffr(self.analyse.pathout['proto.png']), self.parametres['limfreq'], self.parametres['limrang'], self.parametres['typegraph']) self.add(txt) @@ -1236,7 +1292,12 @@ class MergeGraphes(PrintRScript) : self.add(txt) self.sources([self.analyse.parent.RscriptsPath['simi']]) txt = """ - ng <- merge.graph(graphs) + merge.type <- 'proto' + if (merge.type == 'normal') { + ng <- merge.graph(graphs) + } else { + ng <- merge.graph.proto(graphs) + } ngraph <- list(graph=ng, layout=layout.fruchterman.reingold(ng, dim=3), labex.cex=V(ng)$weight) write.graph(ng, "%s", format = 'graphml') """ % ffr(self.parametres['grapheout']) @@ -1331,7 +1392,8 @@ class LabbeScript(PrintRScript) : """ % (ffr(self.pathout['distmat.csv']), ffr(self.pathout['labbe-tree.png'])) txt +=""" open_file_graph("%s", width=1000, height=1000, svg=F) - heatmap(as.matrix(dist.mat), symm = T, distfun=function(x) as.dist(x)) + par(mar=c(10,1,1,10)) + heatmap(as.matrix(dist.mat), symm = T, distfun=function(x) as.dist(x), margins=c(10,10)) dev.off() """ % ffr(self.pathout['labbe-heatmap.png']) txt += """ @@ -1341,12 +1403,27 @@ class LabbeScript(PrintRScript) : rn <- row.names(as.matrix(dist.mat)) open_file_graph("%s", width=1500, height=1000, svg=F) par(mar=c(10,10,3,3)) - image(1:dim, 1:dim, dst, axes = FALSE, xlab="", ylab="") + image(1:dim, 1:dim, dst, axes = FALSE, xlab="", ylab="", col=heat.colors(99), breaks=seq(0.01,1,0.01)) axis(1, 1:dim, rn, cex.axis = 0.9, las=3) axis(2, 1:dim, rn, cex.axis = 0.9, las=1) text(expand.grid(1:dim, 1:dim), sprintf("%%0.2f", dst), cex=0.6) dev.off() """ % ffr(self.pathout['labbe-matrix.png']) + txt += """ + library(igraph) + g <- graph.adjacency(as.matrix(1-dist.mat), mode="lower", weighted=T) + write.graph(g, file="%s", format='graphml') + open_file_graph("%s", width=1000, height=1000, svg=F) + plot(g) + dev.off() + E(g)$weight <- 1 - E(g)$weight + g <- minimum.spanning.tree(g) + E(g)$weight <- 1 - E(g)$weight + write.graph(g, file="%s", format='graphml') + open_file_graph("%s", width=1000, height=1000, svg=F) + plot(g) + dev.off() + """ % (ffr(self.pathout['graph_tot.graphml']), ffr(self.pathout['graph_tot.png']), ffr(self.pathout['graph_min.graphml']), ffr(self.pathout['graph_min.png'])) self.add(txt) self.write() @@ -1416,8 +1493,8 @@ class ChronoChi2Script(PrintRScript) : tree.toplot <- tree.cut1$tree.cl num.label <- as.numeric(tree.cut1$tree.cl$tip.label) col.tree <- rainbow(length(num.label))[num.label] - tree.toplot$tip.label <- paste('classe ', tree.toplot$tip.label) - plot.phylo(tree.toplot,label.offset=0.1, cex=1.1, no.margin=T, x.lim=20, tip.color = col.tree) + #tree.toplot$tip.label <- paste('classe ', tree.toplot$tip.label) + plot.phylo(tree.toplot,label.offset=0.1, cex=1.1, no.margin=T, tip.color = col.tree) for (i in clod) { print(i) par(mar=c(0,0,0,0)) @@ -1490,4 +1567,137 @@ class ChronoPropScript(PrintRScript) : self.add(txt) self.write() +class ChronoggScript(PrintRScript) : + def make_script(self) : + self.sources([self.analyse.parent.RscriptsPath['Rgraph']]) + print self.parametres + txt = """ + library(ggplot2) + inRData <- "%s" + dendrof <- "%s" + load(inRData) + load(dendrof) + """ % (ffr(self.pathout['RData.RData']), ffr(self.pathout['dendrogramme.RData'])) + txt += """ + svg <- %s + """ % self.parametres['svg'] + txt += """ + tc <- which(grepl("%s",rownames(chistabletot))) + rn <- rownames(chistabletot)[tc] + tc <- tc[order(rn)] + dpt <- chistabletot[tc,] + tot <- afctable[tc,] + tcp <- rowSums(tot) + ptc <- tcp/sum(tcp) + dpt <- t(dpt) + dd <- dpt + """ % self.parametres['var'].replace(u'*', u"\\\\*") + txt += """ + classes <- n1[,ncol(n1)] + tcl <- table(classes) + if ('0' %in% names(tcl)) { + to.vire <- which(names(tcl) == '0') + tcl <- tcl[-to.vire] + } + tclp <- tcl/sum(tcl) + ptt <- prop.table(as.matrix(tot), 1) + ptt <- ptt[,as.numeric(tree.cut1$tree.cl$tip.label)] + rownames(ptt) <- cumsum(ptc) + nptt<-as.data.frame(as.table(ptt)) + nptt[,1]<-as.numeric(as.character(nptt[,1])) + col <- rainbow(ncol(ptt))[as.numeric(tree.cut1$tree.cl$tip.label)] + """ + txt += """ + open_file_graph("%s", w=%i, h=%i, svg=svg) + """ % (ffr(self.parametres['tmpgraph']), self.parametres['width'], self.parametres['height']) + txt+= """ + par(mar=c(10,2,2,2)) + gg <- ggplot(data=nptt, aes(x=Var1,y=Freq,fill=Var2)) + geom_area(alpha=1 , size=0.5, colour="black") + gg + scale_fill_manual(values=col) + dev.off() + """ + self.add(txt) + self.write() + +class DendroScript(PrintRScript) : + def make_script(self) : + if self.parametres['svg'] : + typefile = '.svg' + else : + typefile = '.png' + fileout = self.parametres['fileout'] + width = self.parametres['width'] + height = self.parametres['height'] + type_dendro = self.parametres['dendro_type'] + if self.parametres['taille_classe'] : + tclasse = 'TRUE' + else : + tclasse = 'FALSE' + if self.parametres['color_nb'] == 0 : + bw = 'FALSE' + else : + bw = 'TRUE' + if self.parametres['type_tclasse'] == 0 : + histo='FALSE' + else : + histo = 'TRUE' + if self.parametres['svg'] : + svg = 'TRUE' + else : + svg = 'FALSE' + dendro_path = self.pathout['Rdendro'] + classe_path = self.pathout['uce'] + txt = """ + library(ape) + load("%s") + source("%s") + classes <- read.csv2("%s", row.names=1) + classes <- classes[,1] + """ % (ffr(dendro_path), ffr(self.parametres['Rgraph']), ffr(classe_path)) + if self.parametres['dendro'] == 'simple' : + txt += """ + open_file_graph("%s", width=%i, height=%i, svg=%s) + plot.dendropr(tree.cut1$tree.cl, classes, type.dendro="%s", histo=%s, bw=%s, lab=NULL, tclasse=%s) + """ % (ffr(fileout), width, height, svg, type_dendro, histo, bw, tclasse) + elif self.parametres['dendro'] == 'texte' : + txt += """ + load("%s") + source("%s") + if (is.null(debsup)) { + debsup <- debet + } + chistable <- chistabletot[1:(debsup-1),] + """ % (ffr(self.pathout['RData.RData']), ffr(self.parametres['Rgraph'])) + if self.parametres.get('translation', False) : + txt += """ + rn <- read.csv2("%s", header=FALSE, sep='\t') + rnchis <- row.names(chistable) + commun <- intersect(rnchis, unique(rn[,2])) + idrnchis <- sapply(commun, function(x) {which(rnchis==x)}) + idrn <- sapply(commun, function(x) {which(as.vector(rn[,2])==x)[1]}) + rownames(chistable)[idrnchis] <- as.vector(rn[idrn,1]) + """ % ffr(self.parametres['translation']) + txt += """ + open_file_graph("%s", width=%i, height=%i, svg = %s) + plot.dendro.prof(tree.cut1$tree.cl, classes, chistable, nbbycl = 60, type.dendro="%s", bw=%s, lab=NULL) + """ % (ffr(fileout), width, height, svg, type_dendro, bw) + elif self.parametres['dendro'] == 'cloud' : + txt += """ + load("%s") + source("%s") + if (is.null(debsup)) { + debsup <- debet + } + chistable <- chistabletot[1:(debsup-1),] + open_file_graph("%s", width=%i, height=%i, svg=%s) + plot.dendro.cloud(tree.cut1$tree.cl, classes, chistable, nbbycl = 300, type.dendro="%s", bw=%s, lab=NULL) + """ % (ffr(self.pathout['RData.RData']), ffr(self.parametres['Rgraph']), ffr(fileout), width, height, svg, type_dendro, bw) + self.add(txt) + self.write() + + +class ReDoProfScript(PrintRScript) : + def make_script(self) : + self.sources([self.analyse.parent.RscriptsPath['chdfunct.R']]) + print self.parametres