log.info('Rscript')
self.pathout = analyse.pathout
self.analyse = analyse
+ self.parametres = analyse.parametres
self.scriptout = self.pathout['temp']
self.script = u"#Script genere par IRaMuTeQ - %s" % datetime.now().ctime()
for source in lsources :
self.add('source("%s")' % source)
+ def packages(self, lpks) :
+ for pk in lpks :
+ self.add('library(%s)' % pk)
+
def load(self, l) :
for val in l :
self.add('load("%s")' % val)
class chdtxt(PrintRScript) :
pass
+def Rcolor(color) :
+ return str(color).replace(')', ', max=255)')
class Alceste2(PrintRScript) :
def doscript(self) :
data2 <- as(data2, "dgCMatrix")
row.names(data2) <- 1:nrow(data2)
""" % DicoPath['TableUc2']
+ #log.info('ATTENTION ############# MODEPATATE ####################')
txt += """
- chd1<-CHD(data1, x = nbt, libsvdc = libsvdc, libsvdc.path = libsvdc.path)
+ chd1<-CHD(data1, x = nbt, mode.patate = FALSE, libsvdc = libsvdc, libsvdc.path = libsvdc.path)
"""
if classif_mode == 0:
""" % DicoPath['listeuce2']
txt += """
-# rm(data1)
+ rm(data1)
"""
if classif_mode == 0:
txt += """
-# rm(data2)
+ rm(data2)
"""
txt += """
chd.result <- Rchdtxt("%s",mincl=%i,classif_mode=%i, nbt = nbt)
save(tree.cut1, file="%s")
classes<-n1[,ncol(n1)]
open_file_graph("%s", width = 600, height=400)
- plot.dendropr(tree.cut1$tree.cl,classes)
+ plot.dendropr(tree.cut1$tree.cl,classes, histo=TRUE)
open_file_graph("%s", width = 600, height=400)
plot(tree.cut1$dendro_tot_cl)
dev.off()
""" % (DictChdTxtOut['afc_facteur'], DictChdTxtOut['afc_col'], DictChdTxtOut['afc_row'])
txt += """
- xlab <- paste('facteur 1 - ', round(afc$facteur[1,2],2), sep = '')
- ylab <- paste('facteur 2 - ', round(afc$facteur[2,2],2), sep = '')
- xlab <- paste(xlab, ' %', sep = '')
- ylab <- paste(ylab, ' %', sep = '')
+ #xlab <- paste('facteur 1 - ', round(afc$facteur[1,2],2), sep = '')
+ #ylab <- paste('facteur 2 - ', round(afc$facteur[2,2],2), sep = '')
+ #xlab <- paste(xlab, ' %', sep = '')
+ #ylab <- paste(ylab, ' %', sep = '')
"""
txt += """
PARCEX<-%s
+ xmin <- min(afc$rowcoord[,1], na.rm = TRUE) + (0.1 * min(afc$rowcoord[,1], na.rm = TRUE))
+ xmax <- max(afc$rowcoord[,1], na.rm = TRUE) + (0.1 * max(afc$rowcoord[,1], na.rm = TRUE))
+ ymin <- min(afc$rowcoord[,2], na.rm = TRUE) + (0.1 * min(afc$rowcoord[,2], na.rm = TRUE))
+ ymax <- max(afc$rowcoord[,2], na.rm = TRUE) + (0.1 * max(afc$rowcoord[,2], na.rm = TRUE))
+ print(xmin)
+ print(xmax)
+ print(ymin)
+ print(ymax)
""" % taillecar
txt += """
- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab)
+ PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax)
""" % (DictChdTxtOut['AFC2DL_OUT'])
txt += """
- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debsup, fin=(debet-1), xlab = xlab, ylab = ylab)
+ PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debsup, fin=(debet-1), xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax)
""" % (DictChdTxtOut['AFC2DSL_OUT'])
txt += """
- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debet, fin=fin, xlab = xlab, ylab = ylab)
+ PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debet, fin=fin, xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax)
""" % (DictChdTxtOut['AFC2DEL_OUT'])
txt += """
- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col=TRUE, what='coord', xlab = xlab, ylab = ylab)
+ PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col=TRUE, what='coord', xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax)
""" % (DictChdTxtOut['AFC2DCL_OUT'])
- txt += """
- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab)
- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=debsup, fin=(debet-1), xlab = xlab, ylab = ylab)
- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=debet, fin=fin, xlab = xlab, ylab = ylab)
- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col=TRUE, what='crl', xlab = xlab, ylab = ylab)
- """ % (DictChdTxtOut['AFC2DCoul'], DictChdTxtOut['AFC2DCoulSup'], DictChdTxtOut['AFC2DCoulEt'], DictChdTxtOut['AFC2DCoulCl'])
+# txt += """
+ # PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab)
+ # PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=debsup, fin=(debet-1), xlab = xlab, ylab = ylab)
+ # PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='crl', deb=debet, fin=fin, xlab = xlab, ylab = ylab)
+ # PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col=TRUE, what='crl', xlab = xlab, ylab = ylab)
+ # """ % (DictChdTxtOut['AFC2DCoul'], DictChdTxtOut['AFC2DCoulSup'], DictChdTxtOut['AFC2DCoulEt'], DictChdTxtOut['AFC2DCoulCl'])
txt += """
#rm(dataact)
if self.param['do_select_chi'] : do_select_chi = 'TRUE'
else : do_select_chi = 'FALSE'
+ if self.param['do_select_chi_classe'] : do_select_chi_classe = 'TRUE'
+ else : do_select_chi_classe = 'FALSE'
+
if self.param['cex_txt'] : cex_txt = 'TRUE'
else : cex_txt = 'FALSE'
if self.param['tchi'] : tchi = 'TRUE'
else : tchi = 'FALSE'
+ if self.param['svg'] : svg = 'TRUE'
+ else : svg = 'FALSE'
+
with open(self.RscriptsPath['afc_graph'], 'r') as f:
txt = f.read()
self.param['select_nb'], \
do_select_chi, \
self.param['select_chi'], \
+ do_select_chi_classe, \
+ self.param['nbchic'], \
cex_txt, \
self.param['txt_min'], \
self.param['txt_max'], \
tchi,\
self.param['tchi_min'],\
self.param['tchi_max'],\
- ffr(os.path.dirname(self.fileout)))
+ ffr(os.path.dirname(self.fileout)),\
+ svg)
return scripts
def print_simi3d(self):
movie = "'" + ffr(os.path.dirname(self.DictPathOut['RData'])) + "'"
else :
movie = 'NULL'
- if self.section == 'chd_dist_quest' :
- header = 'TRUE'
- else :
- header = 'FALSE'
+
+ #if self.corpus.parametres['type'] == 'corpus' :
+ # header = 'TRUE'
+ #else :
+ # header = 'FALSE'
+ header = 'FALSE'
txt += """
dm<-read.csv2("%s",row.names=1,header = %s)
load("%s")
if not intxt :
#FIXME
txt = """
- inf <- NA
di <- matrix(data=%s, nrow=%i, byrow = TRUE)
- di[is.na(di)] <- max(di, na.rm=TRUE) + 2
+ toinf <- which(di == Inf)
+ tominf <- which(di == -Inf)
+ if (length(toinf)) {
+ di[toinf] <- NA
+ valmax <- max(di, na.rm = TRUE)
+ if (valmax <= 0) {
+ valmax <- 2
+ } else {
+ valmax <- valmax + 2
+ }
+ di[toinf] <- valmax
+ }
+ if (length(tominf)) {
+ di[tominf] <- NA
+ valmin <- min(di, na.rm = TRUE)
+ if (valmin >=0) {
+ valmin <- -2
+ } else {
+ valmin <- valmin - 2
+ }
+ di[tominf] <- valmin
+ }
rownames(di)<- %s
colnames(di) <- %s
""" % (txttable, rownb, rownames, colnames)
par(mar=c(0,0,0,0))
layout(matrix(c(1,2),1,2, byrow=TRUE),widths=c(3,lcm(7)))
par(mar=c(2,2,1,0))
- coord <- barplot(as.matrix(di), beside = TRUE, col = color, space = c(0.1,0.6))
+ yp = ifelse(length(toinf), 0.2, 0)
+ ym = ifelse(length(tominf), 0.2, 0)
+ ymin <- ifelse(!length(which(di < 0)), 0, min(di) - ym)
+ coord <- barplot(as.matrix(di), beside = TRUE, col = color, space = c(0.1,0.6), ylim=c(ymin, max(di) + yp))
+ if (length(toinf)) {
+ coordinf <- coord[toinf]
+ valinf <- di[toinf]
+ text(x=coordinf, y=valinf + 0.1, 'i')
+ }
+ if (length(tominf)) {
+ coordinf <- coord[toinf]
+ valinf <- di[toinf]
+ text(x=coordinf, y=valinf - 0.1, 'i')
+ }
c <- colMeans(coord)
c1 <- c[-1]
c2 <- c[-length(c)]
# f.write(txt)
# f.close()
+class PrintSimiScript(PrintRScript) :
+ def make_script(self) :
+ self.txtgraph = ''
+ self.packages(['igraph', 'proxy', 'Matrix'])
+ self.sources([self.analyse.parent.RscriptsPath['simi'], self.analyse.parent.RscriptsPath['Rgraph']])
+ txt = ''
+ if not self.parametres['keep_coord'] :
+ txt += """
+ dm.path <- "%s"
+ cn.path <- "%s"
+ selected.col <- "%s"
+ """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv'])
+ txt += """
+ dm <-readMM(dm.path)
+ cn <- read.table(cn.path, sep=';', quote='"')
+ colnames(dm) <- cn[,1]
+ sel.col <- read.csv2(selected.col)
+ dm <- dm[, sel.col[,1] + 1]
+ """
+ else :
+ txt += """
+ load("%s")
+ """ % self.pathout['RData.RData']
+
+ if self.parametres['coeff'] == 0 :
+ method = 'cooc'
+ if not self.parametres['keep_coord'] :
+ txt += """
+ method <- 'cooc'
+ mat <- make.a(dm)
+ """
+ else :
+ if not self.parametres['keep_coord'] :
+ txt += """
+ dm <- as.matrix(dm)
+ """
+ if self.parametres['coeff'] == 1 :
+ method = 'prcooc'
+ txt += """
+ method <- 'Russel'
+ mat <- simil(dm, method = 'Russel', diag = TRUE, upper = TRUE, by_rows = FALSE)
+ """
+ elif self.analyse.indices[self.parametres['coeff']] == 'binomial' :
+ method = 'binomial'
+ if not self.parametres['keep_coord'] :
+ txt += """
+ method <- 'binomial'
+ mat <- binom.sim(dm)
+ """
+ elif self.parametres['coeff'] != 0 :
+ method = self.analyse.indices[self.parametres['coeff']]
+ if not self.parametres['keep_coord'] :
+ txt += """
+ method <-"%s"
+ mat <- simil(dm, method = method, diag = TRUE, upper = TRUE, by_rows = FALSE)
+ """ % self.analyse.indices[self.parametres['coeff']]
+ if not self.parametres['keep_coord'] :
+ txt += """
+ mat <- as.matrix(stats::as.dist(mat,diag=TRUE,upper=TRUE))
+ mat[is.na(mat)] <- 0
+ mat[is.infinite(mat)] <- 0
+ """
+ if self.parametres['layout'] == 0 : layout = 'random'
+ if self.parametres['layout'] == 1 : layout = 'circle'
+ if self.parametres['layout'] == 2 : layout = 'frutch'
+ if self.parametres['layout'] == 3 : layout = 'kawa'
+ if self.parametres['layout'] == 4 : layout = 'graphopt'
+
+
+ self.filename=''
+ if self.parametres['type_graph'] == 0 : type = 'tkplot'
+ if self.parametres['type_graph'] == 1 :
+ graphnb = 1
+ type = 'nplot'
+ dirout = os.path.dirname(self.pathout['mat01'])
+ while os.path.exists(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')):
+ graphnb +=1
+ self.filename = ffr(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png'))
+ if self.parametres['type_graph'] == 2 : type = 'rgl'
+
+ if self.parametres['arbremax'] :
+ arbremax = 'TRUE'
+ self.txtgraph += ' - arbre maximum'
+ else : arbremax = 'FALSE'
+
+ if self.parametres['coeff_tv'] :
+ coeff_tv = self.parametres['coeff_tv_nb']
+ tvminmax = 'c(NULL,NULL)'
+ elif not self.parametres['coeff_tv'] or self.parametres.get('sformchi', False) :
+ coeff_tv = 'NULL'
+ tvminmax = 'c(%i, %i)' %(self.parametres['tvmin'], self.parametres['tvmax'])
+ if self.parametres['coeff_te'] : coeff_te = 'c(%i,%i)' % (self.parametres['coeff_temin'], self.parametres['coeff_temax'])
+ else : coeff_te = 'NULL'
+
+ if self.parametres['vcex'] or self.parametres.get('cexfromchi', False) :
+ vcexminmax = 'c(%i/10,%i/10)' % (self.parametres['vcexmin'],self.parametres['vcexmax'])
+ else :
+ vcexminmax = 'c(NULL,NULL)'
+ if not self.parametres['label_v'] : label_v = 'FALSE'
+ else : label_v = 'TRUE'
+
+ if not self.parametres['label_e'] : label_e = 'FALSE'
+ else : label_e = 'TRUE'
+
+ if self.parametres['seuil_ok'] : seuil = str(self.parametres['seuil'])
+ else : seuil = 'NULL'
+
+ cols = str(self.parametres['cols']).replace(')',', max=255)')
+ cola = str(self.parametres['cola']).replace(')',',max=255)')
+
+ txt += """
+ minmaxeff <- %s
+ """ % tvminmax
+ txt += """
+ vcexminmax <- %s
+ """ % vcexminmax
+ txt += """
+ cex = %i/10
+ """ % self.parametres['cex']
+
+ if self.parametres['film'] :
+ txt += """
+ film <- "%s"
+ """ % self.pathout['film']
+ else :
+ txt += """
+ film <- NULL
+ """
+ txt += """
+ seuil <- %s
+ """ % seuil
+
+ txt += """
+ label.v <- %s
+ label.e <- %s
+ """ % (label_v, label_e)
+ txt += """
+ cols <- rgb%s
+ cola <- rgb%s
+ """ % (cols, cola)
+ txt += """
+ width <- %i
+ height <- %i
+ """ % (self.parametres['width'], self.parametres['height'])
+ if self.parametres['keep_coord'] :
+ txt += """
+ coords <- try(coords, TRUE)
+ if (!is.matrix(coords)) {
+ coords<-NULL
+ }
+ """
+ else :
+ txt += """
+ coords <- NULL
+ """
+ txt += """
+ alpha <- %i/100
+ """ % self.parametres['alpha']
+ txt += """
+ alpha <- %i/100
+ """ % self.parametres['alpha']
+#############################################
+ if self.parametres.get('bystar',False) :
+ txt += """
+ et <- list()
+ """
+ for i, line in enumerate(self.parametres['listet']) :
+ txt+= """
+ et[[%i]] <- c(%s)
+ """ % (i+1, ','.join([`val + 1` for val in line]))
+ txt+= """
+ unetoile <- c('%s')
+ """ % ("','".join([val for val in self.parametres['selectedstars']]))
+ txt += """
+ fsum <- NULL
+ rs <- rowSums(dm)
+ for (i in 1:length(unetoile)) {
+ print(unetoile[i])
+ tosum <- et[[i]]
+ if (length(tosum) > 1) {
+ fsum <- cbind(fsum, colSums(dm[tosum,]))
+ } else {
+ fsum <- cbind(fsum, dm[tosum,])
+ }
+ }
+ source("%s")
+ lex <- AsLexico2(fsum, chip=TRUE)
+ dcol <- apply(lex[[4]],1,which.max)
+ toblack <- apply(lex[[4]],1,max)
+ gcol <- rainbow(length(unetoile))
+ #gcol[2] <- 'orange'
+ vertex.label.color <- gcol[dcol]
+ vertex.label.color[which(toblack <= 3.84)] <- 'black'
+ leg <- list(unetoile=unetoile, gcol=gcol)
+ cols <- vertex.label.color
+ chivertex.size <- norm.vec(toblack, vcexminmax[1], vcexminmax[2])
+
+ """ % (self.analyse.parent.RscriptsPath['chdfunct'])
+ else :
+ txt += """
+ vertex.label.color <- 'black'
+ chivertex.size <- 1
+ leg<-NULL
+ """
+#############################################
+
+# txt += """
+# eff <- colSums(dm)
+# g.ori <- graph.adjacency(mat, mode='lower', weighted = TRUE)
+# w.ori <- E(g.ori)$weight
+# if (max.tree) {
+# if (method == 'cooc') {
+# E(g.ori)$weight <- 1 / w.ori
+# } else {
+# E(g.ori)$weigth <- 1 - w.ori
+# }
+# g.max <- minimum.spanning.tree(g.ori)
+# if (method == 'cooc') {
+# E(g.max)$weight <- 1 / E(g.max)$weight
+# } else {
+# E(g.max)$weight <- 1 - E(g.max)$weight
+# }
+# g.toplot <- g.max
+# } else {
+# g.toplot <- g.ori
+# }
+# """
+ txt += """
+ eff <- colSums(dm)
+ x <- list(mat = mat, eff = eff)
+ graph.simi <- do.simi(x, method='%s', seuil = seuil, p.type = '%s', layout.type = '%s', max.tree = %s, coeff.vertex=%s, coeff.edge = %s, minmaxeff = minmaxeff, vcexminmax = vcexminmax, cex = cex, coords = coords)
+ """ % (method, type, layout, arbremax, coeff_tv, coeff_te)
+
+ if self.parametres.get('bystar',False) :
+ if self.parametres.get('cexfromchi', False) :
+ txt+="""
+ label.cex<-chivertex.size
+ """
+ else :
+ txt+="""
+ label.cex <- NULL
+ """
+ if self.parametres.get('sfromchi', False) :
+ txt += """
+ vertex.size <- norm.vec(toblack, minmaxeff[1], minmaxeff[2])
+ """
+ else :
+ txt += """
+ vertex.size <- NULL
+ """
+ else :
+ #FIXME
+ tmpchi = False
+ if tmpchi :
+ txt += """
+ lchi <- read.table("%s")
+ lchi <- lchi[,1]
+ """ % ffr(tmpchi)
+ if 'selected_col' in dir(self.tableau) :
+ txt += """
+ lchi <- lchi[c%s+1]
+ """ % datas
+ if tmpchi and self.parametres.get('cexfromchi', False) :
+ txt += """
+ label.cex <- norm.vec(lchi, vcexminmax[1], vcexminmax[2])
+ """
+ else :
+ txt += """
+ if (is.null(vcexminmax[1])) {
+ label.cex <- NULL
+ } else {
+ label.cex <- graph.simi$label.cex
+ }
+ """
+ if tmpchi and self.parametres.get('sfromchi', False) :
+ txt += """
+ vertex.size <- norm.vec(lchi, minmaxeff[1], minmaxeff[2])
+ """
+ else :
+ txt += """
+ if (is.null(minmaxeff[1])) {
+ vertex.size <- NULL
+ } else {
+ vertex.size <- graph.simi$eff
+ }
+ """
+ txt += """ vertex.size <- NULL """
+ txt += """
+ coords <- plot.simi(graph.simi, p.type='%s',filename="%s", vertex.label = label.v, edge.label = label.e, vertex.col = cols, vertex.label.color = vertex.label.color, vertex.label.cex=label.cex, vertex.size = vertex.size, edge.col = cola, leg=leg, width = width, height = height, alpha = alpha, movie = film)
+ save.image(file="%s")
+ """ % (type, self.filename, self.pathout['RData'])
+
+ self.add(txt)
+ self.write()
+
+class WordCloudRScript(PrintRScript) :
+ def make_script(self) :
+ self.sources([self.analyse.parent.RscriptsPath['Rgraph']])
+ self.packages(['wordcloud'])
+ bg_col = Rcolor(self.parametres['col_bg'])
+ txt_col = Rcolor(self.parametres['col_text'])
+ txt = """
+ act <- read.csv2("%s", header = FALSE, row.names=1, sep='\t')
+ selected.col <- read.table("%s")
+ toprint <- as.matrix(act[selected.col[,1] + 1,])
+ rownames(toprint) <- rownames(act)[selected.col[,1] + 1]
+ maxword <- %i
+ if (nrow(toprint) > maxword) {
+ toprint <- as.matrix(toprint[order(toprint[,1], decreasing=TRUE),])
+ toprint <- as.matrix(toprint[1:maxword,])
+ }
+ open_file_graph("%s", width = %i, height = %i)
+ par(bg=rgb%s)
+ wordcloud(row.names(toprint), toprint[,1], scale=c(%f,%f), random.order=FALSE, colors=rgb%s)
+ dev.off()
+ """ % (ffr(self.analyse.pathout['actives_eff.csv']), ffr(self.analyse.pathout['selected.csv']), self.parametres['maxword'], ffr(self.parametres['graphout']), self.parametres['width'], self.parametres['height'], bg_col, self.parametres['maxcex'], self.parametres['mincex'], txt_col)
+ self.add(txt)
+ self.write()