X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=PrintRScript.py;h=db2eeac22288e3e7a1c4e0a12cf0c9372293d2fa;hp=4d0fb40fcaad3634a92fd3ad88a1869a9d0234e4;hb=1fb687c23b19ae4cc88146acf393041356c1df3a;hpb=22cd27b2bbe9ab1ffa7ef06fa764b5147ae17dad diff --git a/PrintRScript.py b/PrintRScript.py index 4d0fb40..db2eeac 100644 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -17,6 +17,7 @@ class PrintRScript : log.info('Rscript') self.pathout = analyse.pathout self.analyse = analyse + self.parametres = analyse.parametres self.scriptout = self.pathout['temp'] self.script = u"#Script genere par IRaMuTeQ - %s" % datetime.now().ctime() @@ -34,6 +35,10 @@ class PrintRScript : for source in lsources : self.add('source("%s")' % source) + def packages(self, lpks) : + for pk in lpks : + self.add('library(%s)' % pk) + def load(self, l) : for val in l : self.add('load("%s")' % val) @@ -46,6 +51,8 @@ class PrintRScript : class chdtxt(PrintRScript) : pass +def Rcolor(color) : + return str(color).replace(')', ', max=255)') class Alceste2(PrintRScript) : def doscript(self) : @@ -141,8 +148,9 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False data2 <- as(data2, "dgCMatrix") row.names(data2) <- 1:nrow(data2) """ % DicoPath['TableUc2'] + #log.info('ATTENTION ############# MODEPATATE ####################') txt += """ - chd1<-CHD(data1, x = nbt, libsvdc = libsvdc, libsvdc.path = libsvdc.path) + chd1<-CHD(data1, x = nbt, mode.patate = FALSE, libsvdc = libsvdc, libsvdc.path = libsvdc.path) """ if classif_mode == 0: @@ -165,12 +173,12 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False """ % DicoPath['listeuce2'] txt += """ -# rm(data1) + rm(data1) """ if classif_mode == 0: txt += """ -# rm(data2) + rm(data2) """ txt += """ chd.result <- Rchdtxt("%s",mincl=%i,classif_mode=%i, nbt = nbt) @@ -199,7 +207,7 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False save(tree.cut1, file="%s") classes<-n1[,ncol(n1)] open_file_graph("%s", width = 600, height=400) - plot.dendropr(tree.cut1$tree.cl,classes) + plot.dendropr(tree.cut1$tree.cl,classes, histo=TRUE) open_file_graph("%s", width = 600, height=400) plot(tree.cut1$dendro_tot_cl) dev.off() @@ -358,33 +366,41 @@ write.csv2(gbcluster,file="%s") """ % (DictChdTxtOut['afc_facteur'], DictChdTxtOut['afc_col'], DictChdTxtOut['afc_row']) txt += """ - xlab <- paste('facteur 1 - ', round(afc$facteur[1,2],2), sep = '') - ylab <- paste('facteur 2 - ', round(afc$facteur[2,2],2), sep = '') - xlab <- paste(xlab, ' %', sep = '') - ylab <- paste(ylab, ' %', sep = '') + #xlab <- paste('facteur 1 - ', round(afc$facteur[1,2],2), sep = '') + #ylab <- paste('facteur 2 - ', round(afc$facteur[2,2],2), sep = '') + #xlab <- paste(xlab, ' %', sep = '') + #ylab <- paste(ylab, ' %', sep = '') """ txt += """ PARCEX<-%s + xmin <- min(afc$rowcoord[,1], na.rm = TRUE) + (0.1 * min(afc$rowcoord[,1], na.rm = TRUE)) + xmax <- max(afc$rowcoord[,1], na.rm = TRUE) + (0.1 * max(afc$rowcoord[,1], na.rm = TRUE)) + ymin <- min(afc$rowcoord[,2], na.rm = TRUE) + (0.1 * min(afc$rowcoord[,2], na.rm = TRUE)) + ymax <- max(afc$rowcoord[,2], na.rm = TRUE) + (0.1 * max(afc$rowcoord[,2], na.rm = TRUE)) + print(xmin) + print(xmax) + print(ymin) + print(ymax) """ % taillecar txt += """ - PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab) + PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax) """ % (DictChdTxtOut['AFC2DL_OUT']) txt += """ - PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debsup, fin=(debet-1), xlab = xlab, ylab = ylab) + PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debsup, fin=(debet-1), xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax) """ % (DictChdTxtOut['AFC2DSL_OUT']) txt += """ - PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debet, fin=fin, xlab = xlab, ylab = ylab) + PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debet, fin=fin, xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax) """ % (DictChdTxtOut['AFC2DEL_OUT']) txt += """ - PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col=TRUE, what='coord', xlab = xlab, ylab = ylab) + PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col=TRUE, what='coord', xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax) """ % (DictChdTxtOut['AFC2DCL_OUT']) - txt += """ - PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab) - PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=debsup, fin=(debet-1), xlab = xlab, ylab = ylab) - PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=debet, fin=fin, xlab = xlab, ylab = ylab) - PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col=TRUE, what='crl', xlab = xlab, ylab = ylab) - """ % (DictChdTxtOut['AFC2DCoul'], DictChdTxtOut['AFC2DCoulSup'], DictChdTxtOut['AFC2DCoulEt'], DictChdTxtOut['AFC2DCoulCl']) +# txt += """ + # PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab) + # PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=debsup, fin=(debet-1), xlab = xlab, ylab = ylab) + # PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=debet, fin=fin, xlab = xlab, ylab = ylab) + # PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col=TRUE, what='crl', xlab = xlab, ylab = ylab) + # """ % (DictChdTxtOut['AFC2DCoul'], DictChdTxtOut['AFC2DCoulSup'], DictChdTxtOut['AFC2DCoulEt'], DictChdTxtOut['AFC2DCoulCl']) txt += """ #rm(dataact) @@ -410,12 +426,18 @@ def write_afc_graph(self): if self.param['do_select_chi'] : do_select_chi = 'TRUE' else : do_select_chi = 'FALSE' + if self.param['do_select_chi_classe'] : do_select_chi_classe = 'TRUE' + else : do_select_chi_classe = 'FALSE' + if self.param['cex_txt'] : cex_txt = 'TRUE' else : cex_txt = 'FALSE' if self.param['tchi'] : tchi = 'TRUE' else : tchi = 'FALSE' + if self.param['svg'] : svg = 'TRUE' + else : svg = 'FALSE' + with open(self.RscriptsPath['afc_graph'], 'r') as f: txt = f.read() @@ -431,6 +453,8 @@ def write_afc_graph(self): self.param['select_nb'], \ do_select_chi, \ self.param['select_chi'], \ + do_select_chi_classe, \ + self.param['nbchic'], \ cex_txt, \ self.param['txt_min'], \ self.param['txt_max'], \ @@ -443,7 +467,8 @@ def write_afc_graph(self): tchi,\ self.param['tchi_min'],\ self.param['tchi_max'],\ - ffr(os.path.dirname(self.fileout))) + ffr(os.path.dirname(self.fileout)),\ + svg) return scripts def print_simi3d(self): @@ -453,10 +478,12 @@ def print_simi3d(self): movie = "'" + ffr(os.path.dirname(self.DictPathOut['RData'])) + "'" else : movie = 'NULL' - if self.section == 'chd_dist_quest' : - header = 'TRUE' - else : - header = 'FALSE' + + #if self.corpus.parametres['type'] == 'corpus' : + # header = 'TRUE' + #else : + # header = 'FALSE' + header = 'FALSE' txt += """ dm<-read.csv2("%s",row.names=1,header = %s) load("%s") @@ -516,9 +543,29 @@ def barplot(table, rownames, colnames, rgraph, tmpgraph, intxt = False) : if not intxt : #FIXME txt = """ - inf <- NA di <- matrix(data=%s, nrow=%i, byrow = TRUE) - di[is.na(di)] <- max(di, na.rm=TRUE) + 2 + toinf <- which(di == Inf) + tominf <- which(di == -Inf) + if (length(toinf)) { + di[toinf] <- NA + valmax <- max(di, na.rm = TRUE) + if (valmax <= 0) { + valmax <- 2 + } else { + valmax <- valmax + 2 + } + di[toinf] <- valmax + } + if (length(tominf)) { + di[tominf] <- NA + valmin <- min(di, na.rm = TRUE) + if (valmin >=0) { + valmin <- -2 + } else { + valmin <- valmin - 2 + } + di[tominf] <- valmin + } rownames(di)<- %s colnames(di) <- %s """ % (txttable, rownb, rownames, colnames) @@ -534,7 +581,20 @@ def barplot(table, rownames, colnames, rgraph, tmpgraph, intxt = False) : par(mar=c(0,0,0,0)) layout(matrix(c(1,2),1,2, byrow=TRUE),widths=c(3,lcm(7))) par(mar=c(2,2,1,0)) - coord <- barplot(as.matrix(di), beside = TRUE, col = color, space = c(0.1,0.6)) + yp = ifelse(length(toinf), 0.2, 0) + ym = ifelse(length(tominf), 0.2, 0) + ymin <- ifelse(!length(which(di < 0)), 0, min(di) - ym) + coord <- barplot(as.matrix(di), beside = TRUE, col = color, space = c(0.1,0.6), ylim=c(ymin, max(di) + yp)) + if (length(toinf)) { + coordinf <- coord[toinf] + valinf <- di[toinf] + text(x=coordinf, y=valinf + 0.1, 'i') + } + if (length(tominf)) { + coordinf <- coord[toinf] + valinf <- di[toinf] + text(x=coordinf, y=valinf - 0.1, 'i') + } c <- colMeans(coord) c1 <- c[-1] c2 <- c[-length(c)] @@ -620,3 +680,321 @@ def barplot(table, rownames, colnames, rgraph, tmpgraph, intxt = False) : # f.write(txt) # f.close() +class PrintSimiScript(PrintRScript) : + def make_script(self) : + self.txtgraph = '' + self.packages(['igraph', 'proxy', 'Matrix']) + self.sources([self.analyse.parent.RscriptsPath['simi'], self.analyse.parent.RscriptsPath['Rgraph']]) + txt = '' + if not self.parametres['keep_coord'] : + txt += """ + dm.path <- "%s" + cn.path <- "%s" + selected.col <- "%s" + """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv']) + txt += """ + dm <-readMM(dm.path) + cn <- read.table(cn.path, sep=';', quote='"') + colnames(dm) <- cn[,1] + sel.col <- read.csv2(selected.col) + dm <- dm[, sel.col[,1] + 1] + """ + else : + txt += """ + load("%s") + """ % self.pathout['RData.RData'] + + if self.parametres['coeff'] == 0 : + method = 'cooc' + if not self.parametres['keep_coord'] : + txt += """ + method <- 'cooc' + mat <- make.a(dm) + """ + else : + if not self.parametres['keep_coord'] : + txt += """ + dm <- as.matrix(dm) + """ + if self.parametres['coeff'] == 1 : + method = 'prcooc' + txt += """ + method <- 'Russel' + mat <- simil(dm, method = 'Russel', diag = TRUE, upper = TRUE, by_rows = FALSE) + """ + elif self.analyse.indices[self.parametres['coeff']] == 'binomial' : + method = 'binomial' + if not self.parametres['keep_coord'] : + txt += """ + method <- 'binomial' + mat <- binom.sim(dm) + """ + elif self.parametres['coeff'] != 0 : + method = self.analyse.indices[self.parametres['coeff']] + if not self.parametres['keep_coord'] : + txt += """ + method <-"%s" + mat <- simil(dm, method = method, diag = TRUE, upper = TRUE, by_rows = FALSE) + """ % self.analyse.indices[self.parametres['coeff']] + if not self.parametres['keep_coord'] : + txt += """ + mat <- as.matrix(stats::as.dist(mat,diag=TRUE,upper=TRUE)) + mat[is.na(mat)] <- 0 + mat[is.infinite(mat)] <- 0 + """ + if self.parametres['layout'] == 0 : layout = 'random' + if self.parametres['layout'] == 1 : layout = 'circle' + if self.parametres['layout'] == 2 : layout = 'frutch' + if self.parametres['layout'] == 3 : layout = 'kawa' + if self.parametres['layout'] == 4 : layout = 'graphopt' + + + self.filename='' + if self.parametres['type_graph'] == 0 : type = 'tkplot' + if self.parametres['type_graph'] == 1 : + graphnb = 1 + type = 'nplot' + dirout = os.path.dirname(self.pathout['mat01']) + while os.path.exists(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')): + graphnb +=1 + self.filename = ffr(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')) + if self.parametres['type_graph'] == 2 : type = 'rgl' + + if self.parametres['arbremax'] : + arbremax = 'TRUE' + self.txtgraph += ' - arbre maximum' + else : arbremax = 'FALSE' + + if self.parametres['coeff_tv'] : + coeff_tv = self.parametres['coeff_tv_nb'] + tvminmax = 'c(NULL,NULL)' + elif not self.parametres['coeff_tv'] or self.parametres.get('sformchi', False) : + coeff_tv = 'NULL' + tvminmax = 'c(%i, %i)' %(self.parametres['tvmin'], self.parametres['tvmax']) + if self.parametres['coeff_te'] : coeff_te = 'c(%i,%i)' % (self.parametres['coeff_temin'], self.parametres['coeff_temax']) + else : coeff_te = 'NULL' + + if self.parametres['vcex'] or self.parametres.get('cexfromchi', False) : + vcexminmax = 'c(%i/10,%i/10)' % (self.parametres['vcexmin'],self.parametres['vcexmax']) + else : + vcexminmax = 'c(NULL,NULL)' + if not self.parametres['label_v'] : label_v = 'FALSE' + else : label_v = 'TRUE' + + if not self.parametres['label_e'] : label_e = 'FALSE' + else : label_e = 'TRUE' + + if self.parametres['seuil_ok'] : seuil = str(self.parametres['seuil']) + else : seuil = 'NULL' + + cols = str(self.parametres['cols']).replace(')',', max=255)') + cola = str(self.parametres['cola']).replace(')',',max=255)') + + txt += """ + minmaxeff <- %s + """ % tvminmax + txt += """ + vcexminmax <- %s + """ % vcexminmax + txt += """ + cex = %i/10 + """ % self.parametres['cex'] + + if self.parametres['film'] : + txt += """ + film <- "%s" + """ % self.pathout['film'] + else : + txt += """ + film <- NULL + """ + txt += """ + seuil <- %s + """ % seuil + + txt += """ + label.v <- %s + label.e <- %s + """ % (label_v, label_e) + txt += """ + cols <- rgb%s + cola <- rgb%s + """ % (cols, cola) + txt += """ + width <- %i + height <- %i + """ % (self.parametres['width'], self.parametres['height']) + if self.parametres['keep_coord'] : + txt += """ + coords <- try(coords, TRUE) + if (!is.matrix(coords)) { + coords<-NULL + } + """ + else : + txt += """ + coords <- NULL + """ + txt += """ + alpha <- %i/100 + """ % self.parametres['alpha'] + txt += """ + alpha <- %i/100 + """ % self.parametres['alpha'] +############################################# + if self.parametres.get('bystar',False) : + txt += """ + et <- list() + """ + for i, line in enumerate(self.parametres['listet']) : + txt+= """ + et[[%i]] <- c(%s) + """ % (i+1, ','.join([`val + 1` for val in line])) + txt+= """ + unetoile <- c('%s') + """ % ("','".join([val for val in self.parametres['selectedstars']])) + txt += """ + fsum <- NULL + rs <- rowSums(dm) + for (i in 1:length(unetoile)) { + print(unetoile[i]) + tosum <- et[[i]] + if (length(tosum) > 1) { + fsum <- cbind(fsum, colSums(dm[tosum,])) + } else { + fsum <- cbind(fsum, dm[tosum,]) + } + } + source("%s") + lex <- AsLexico2(fsum, chip=TRUE) + dcol <- apply(lex[[4]],1,which.max) + toblack <- apply(lex[[4]],1,max) + gcol <- rainbow(length(unetoile)) + #gcol[2] <- 'orange' + vertex.label.color <- gcol[dcol] + vertex.label.color[which(toblack <= 3.84)] <- 'black' + leg <- list(unetoile=unetoile, gcol=gcol) + cols <- vertex.label.color + chivertex.size <- norm.vec(toblack, vcexminmax[1], vcexminmax[2]) + + """ % (self.analyse.parent.RscriptsPath['chdfunct']) + else : + txt += """ + vertex.label.color <- 'black' + chivertex.size <- 1 + leg<-NULL + """ +############################################# + +# txt += """ +# eff <- colSums(dm) +# g.ori <- graph.adjacency(mat, mode='lower', weighted = TRUE) +# w.ori <- E(g.ori)$weight +# if (max.tree) { +# if (method == 'cooc') { +# E(g.ori)$weight <- 1 / w.ori +# } else { +# E(g.ori)$weigth <- 1 - w.ori +# } +# g.max <- minimum.spanning.tree(g.ori) +# if (method == 'cooc') { +# E(g.max)$weight <- 1 / E(g.max)$weight +# } else { +# E(g.max)$weight <- 1 - E(g.max)$weight +# } +# g.toplot <- g.max +# } else { +# g.toplot <- g.ori +# } +# """ + txt += """ + eff <- colSums(dm) + x <- list(mat = mat, eff = eff) + graph.simi <- do.simi(x, method='%s', seuil = seuil, p.type = '%s', layout.type = '%s', max.tree = %s, coeff.vertex=%s, coeff.edge = %s, minmaxeff = minmaxeff, vcexminmax = vcexminmax, cex = cex, coords = coords) + """ % (method, type, layout, arbremax, coeff_tv, coeff_te) + + if self.parametres.get('bystar',False) : + if self.parametres.get('cexfromchi', False) : + txt+=""" + label.cex<-chivertex.size + """ + else : + txt+=""" + label.cex <- NULL + """ + if self.parametres.get('sfromchi', False) : + txt += """ + vertex.size <- norm.vec(toblack, minmaxeff[1], minmaxeff[2]) + """ + else : + txt += """ + vertex.size <- NULL + """ + else : + #FIXME + tmpchi = False + if tmpchi : + txt += """ + lchi <- read.table("%s") + lchi <- lchi[,1] + """ % ffr(tmpchi) + if 'selected_col' in dir(self.tableau) : + txt += """ + lchi <- lchi[c%s+1] + """ % datas + if tmpchi and self.parametres.get('cexfromchi', False) : + txt += """ + label.cex <- norm.vec(lchi, vcexminmax[1], vcexminmax[2]) + """ + else : + txt += """ + if (is.null(vcexminmax[1])) { + label.cex <- NULL + } else { + label.cex <- graph.simi$label.cex + } + """ + if tmpchi and self.parametres.get('sfromchi', False) : + txt += """ + vertex.size <- norm.vec(lchi, minmaxeff[1], minmaxeff[2]) + """ + else : + txt += """ + if (is.null(minmaxeff[1])) { + vertex.size <- NULL + } else { + vertex.size <- graph.simi$eff + } + """ + txt += """ vertex.size <- NULL """ + txt += """ + coords <- plot.simi(graph.simi, p.type='%s',filename="%s", vertex.label = label.v, edge.label = label.e, vertex.col = cols, vertex.label.color = vertex.label.color, vertex.label.cex=label.cex, vertex.size = vertex.size, edge.col = cola, leg=leg, width = width, height = height, alpha = alpha, movie = film) + save.image(file="%s") + """ % (type, self.filename, self.pathout['RData']) + + self.add(txt) + self.write() + +class WordCloudRScript(PrintRScript) : + def make_script(self) : + self.sources([self.analyse.parent.RscriptsPath['Rgraph']]) + self.packages(['wordcloud']) + bg_col = Rcolor(self.parametres['col_bg']) + txt_col = Rcolor(self.parametres['col_text']) + txt = """ + act <- read.csv2("%s", header = FALSE, row.names=1, sep='\t') + selected.col <- read.table("%s") + toprint <- as.matrix(act[selected.col[,1] + 1,]) + rownames(toprint) <- rownames(act)[selected.col[,1] + 1] + maxword <- %i + if (nrow(toprint) > maxword) { + toprint <- as.matrix(toprint[order(toprint[,1], decreasing=TRUE),]) + toprint <- as.matrix(toprint[1:maxword,]) + } + open_file_graph("%s", width = %i, height = %i) + par(bg=rgb%s) + wordcloud(row.names(toprint), toprint[,1], scale=c(%f,%f), random.order=FALSE, colors=rgb%s) + dev.off() + """ % (ffr(self.analyse.pathout['actives_eff.csv']), ffr(self.analyse.pathout['selected.csv']), self.parametres['maxword'], ffr(self.parametres['graphout']), self.parametres['width'], self.parametres['height'], bg_col, self.parametres['maxcex'], self.parametres['mincex'], txt_col) + self.add(txt) + self.write()