From: Pierre Date: Sun, 7 Oct 2012 13:05:08 +0000 (+0200) Subject: simitxt X-Git-Url: http://iramuteq.org/git?a=commitdiff_plain;h=0bb1e9556fdbb07e171b663ffcea149692a8a49f;p=iramuteq simitxt --- diff --git a/PrintRScript.py b/PrintRScript.py index 6089847..7aa30d6 100644 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -35,6 +35,10 @@ class PrintRScript : for source in lsources : self.add('source("%s")' % source) + def packages(self, lpks) : + for pk in lpks : + self.add('library(%s)' % pk) + def load(self, l) : for val in l : self.add('load("%s")' % val) @@ -621,3 +625,286 @@ def barplot(table, rownames, colnames, rgraph, tmpgraph, intxt = False) : # f.write(txt) # f.close() +class PrintSimiScript(PrintRScript) : + def make_script(self) : + self.txtgraph = '' + self.packages(['igraph', 'proxy', 'Matrix']) + self.sources([self.analyse.parent.RscriptsPath['simi'], self.analyse.parent.RscriptsPath['Rgraph']]) + txt = """ + dm.path <- "%s" + cn.path <- "%s" + selected.col <- "%s" + """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv']) + txt += """ + dm <-readMM(dm.path) + cn <- read.table(cn.path, sep=';', quote='"') + colnames(dm) <- cn[,1] + sel.col <- read.csv2(selected.col) + dm <- dm[, sel.col[,1] + 1] + """ + + if self.parametres['coeff'] == 0 : + method = 'cooc' + txt += """ + method <- 'cooc' + mat <- make.a(dm) + """ + else : + txt += """ + dm <- as.matrix(dm) + """ + if self.parametres['coeff'] == 1 : + method = 'prcooc' + txt += """ + method <- 'Russel' + mat <- simil(dm, method = 'Russel', diag = TRUE, upper = TRUE, by_rows = FALSE) + """ + elif self.analyse.indices[self.parametres['coeff']] == 'binomial' : + method = 'binomial' + txt += """ + method <- 'binomial' + mat <- binom.sim(dm) + """ + elif self.parametres['coeff'] != 0 : + method = self.analyse.indices[self.parametres['coeff']] + txt += """ + method <-"%s" + mat <- simil(dm, method = method, diag = TRUE, upper = TRUE, by_rows = FALSE) + """ % self.analyse.indices[self.parametres['coeff']] + txt += """ + mat <- as.matrix(stats::as.dist(mat,diag=TRUE,upper=TRUE)) + mat[is.na(mat)] <- 0 + mat[is.infinite(mat)] <- 0 + """ + if self.parametres['layout'] == 0 : layout = 'random' + if self.parametres['layout'] == 1 : layout = 'circle' + if self.parametres['layout'] == 2 : layout = 'frutch' + if self.parametres['layout'] == 3 : layout = 'kawa' + if self.parametres['layout'] == 4 : layout = 'graphopt' + + self.filename='' + if self.parametres['type_graph'] == 0 : type = 'tkplot' + if self.parametres['type_graph'] == 1 : + graphnb = 1 + type = 'nplot' + dirout = os.path.dirname(self.pathout['mat01']) + while os.path.exists(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')): + graphnb +=1 + self.filename = ffr(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')) + if self.parametres['type_graph'] == 2 : type = 'rgl' + + if self.parametres['arbremax'] : + arbremax = 'TRUE' + self.txtgraph += ' - arbre maximum' + else : arbremax = 'FALSE' + + if self.parametres['coeff_tv'] : + coeff_tv = self.parametres['coeff_tv_nb'] + tvminmax = 'c(NULL,NULL)' + elif not self.parametres['coeff_tv'] or self.parametres.get('sformchi', False) : + coeff_tv = 'NULL' + tvminmax = 'c(%i, %i)' %(self.parametres['tvmin'], self.parametres['tvmax']) + if self.parametres['coeff_te'] : coeff_te = 'c(%i,%i)' % (self.parametres['coeff_temin'], self.parametres['coeff_temax']) + else : coeff_te = 'NULL' + + if self.parametres['vcex'] or self.parametres.get('cexfromchi', False) : + vcexminmax = 'c(%i/10,%i/10)' % (self.parametres['vcexmin'],self.parametres['vcexmax']) + else : + vcexminmax = 'c(NULL,NULL)' + if not self.parametres['label_v'] : label_v = 'FALSE' + else : label_v = 'TRUE' + + if not self.parametres['label_e'] : label_e = 'FALSE' + else : label_e = 'TRUE' + + if self.parametres['seuil_ok'] : seuil = str(self.parametres['seuil']) + else : seuil = 'NULL' + + cols = str(self.parametres['cols']).replace(')',', max=255)') + cola = str(self.parametres['cola']).replace(')',',max=255)') + + txt += """ + minmaxeff <- %s + """ % tvminmax + txt += """ + vcexminmax <- %s + """ % vcexminmax + txt += """ + cex = %i/10 + """ % self.parametres['cex'] + + if self.parametres['film'] : + txt += """ + film <- "%s" + """ % self.pathout['film'] + else : + txt += """ + film <- NULL + """ + txt += """ + seuil <- %s + """ % seuil + + txt += """ + label.v <- %s + label.e <- %s + """ % (label_v, label_e) + txt += """ + cols <- rgb%s + cola <- rgb%s + """ % (cols, cola) + txt += """ + width <- %i + height <- %i + """ % (self.parametres['width'], self.parametres['height']) + if self.parametres['keep_coord'] : + txt += """ + coords <- try(coords, TRUE) + if (!is.matrix(coords)) { + coords<-NULL + } + """ + else : + txt += """ + coords <- NULL + """ + txt += """ + alpha <- %i/100 + """ % self.parametres['alpha'] + txt += """ + alpha <- %i/100 + """ % self.parametres['alpha'] +############################################# + if self.parametres.get('bystar',False) : + txt += """ + et <- list() + """ + for i,et in enumerate(self.tableau.etline) : + txt+= """ + et[[%i]] <- c(%s) + """ % (i+1, ','.join(et[1:])) + txt+= """ + unetoile <- c('%s') + """ % ("','".join([val[0] for val in self.tableau.etline])) + txt += """ + fsum <- NULL + rs <- rowSums(dm) + for (i in 1:length(unetoile)) { + print(unetoile[i]) + tosum <- et[[i]] + if (length(tosum) > 1) { + fsum <- cbind(fsum, colSums(dm[tosum,])) + } else { + fsum <- cbind(fsum, dm[tosum,]) + } + } + source("%s") + lex <- AsLexico2(fsum, chip=TRUE) + dcol <- apply(lex[[4]],1,which.max) + toblack <- apply(lex[[4]],1,max) + gcol <- rainbow(length(unetoile)) + #gcol[2] <- 'orange' + vertex.label.color <- gcol[dcol] + vertex.label.color[which(toblack <= 3.84)] <- 'black' + leg <- list(unetoile=unetoile, gcol=gcol) + cols <- vertex.label.color + chivertex.size <- norm.vec(toblack, vcexminmax[1], vcexminmax[2]) + + """ % (self.parent.RscriptsPath['chdfunct']) + else : + txt += """ + vertex.label.color <- 'black' + chivertex.size <- 1 + leg<-NULL + """ +############################################# + +# txt += """ +# eff <- colSums(dm) +# g.ori <- graph.adjacency(mat, mode='lower', weighted = TRUE) +# w.ori <- E(g.ori)$weight +# if (max.tree) { +# if (method == 'cooc') { +# E(g.ori)$weight <- 1 / w.ori +# } else { +# E(g.ori)$weigth <- 1 - w.ori +# } +# g.max <- minimum.spanning.tree(g.ori) +# if (method == 'cooc') { +# E(g.max)$weight <- 1 / E(g.max)$weight +# } else { +# E(g.max)$weight <- 1 - E(g.max)$weight +# } +# g.toplot <- g.max +# } else { +# g.toplot <- g.ori +# } +# """ + txt += """ + eff <- colSums(dm) + x <- list(mat = mat, eff = eff) + graph.simi <- do.simi(x, method='%s', seuil = seuil, p.type = '%s', layout.type = '%s', max.tree = %s, coeff.vertex=%s, coeff.edge = %s, minmaxeff = minmaxeff, vcexminmax = vcexminmax, cex = cex, coords = coords) + """ % (method, type, layout, arbremax, coeff_tv, coeff_te) + + if self.parametres.get('bystar',False) : + if self.parametres.get('cexfromchi', False) : + txt+=""" + label.cex<-chivertex.size + """ + else : + txt+=""" + label.cex <- NULL + """ + if self.parametres.get('sfromchi', False) : + txt += """ + vertex.size <- norm.vec(toblack, minmaxeff[1], minmaxeff[2]) + """ + else : + txt += """ + vertex.size <- NULL + """ + else : + #FIXME + tmpchi = False + if tmpchi : + txt += """ + lchi <- read.table("%s") + lchi <- lchi[,1] + """ % ffr(tmpchi) + if 'selected_col' in dir(self.tableau) : + txt += """ + lchi <- lchi[c%s+1] + """ % datas + if tmpchi and self.parametres.get('cexfromchi', False) : + txt += """ + label.cex <- norm.vec(lchi, vcexminmax[1], vcexminmax[2]) + """ + else : + txt += """ + if (is.null(vcexminmax[1])) { + label.cex <- NULL + } else { + label.cex <- graph.simi$label.cex + } + """ + if tmpchi and self.parametres.get('sfromchi', False) : + txt += """ + vertex.size <- norm.vec(lchi, minmaxeff[1], minmaxeff[2]) + """ + else : + txt += """ + if (is.null(minmaxeff[1])) { + vertex.size <- NULL + } else { + vertex.size <- graph.simi$eff + } + """ + txt += """ vertex.size <- NULL """ + txt += """ + coords <- plot.simi(graph.simi, p.type='%s',filename="%s", vertex.label = label.v, edge.label = label.e, vertex.col = cols, vertex.label.color = vertex.label.color, vertex.label.cex=label.cex, vertex.size = vertex.size, edge.col = cola, leg=leg, width = width, height = height, alpha = alpha, movie = film) + save.image(file="%s") + """ % (type, self.filename, self.pathout['RData']) + + self.add(txt) + self.write() + diff --git a/ProfList.py b/ProfList.py index db1dbd1..b89854e 100644 --- a/ProfList.py +++ b/ProfList.py @@ -588,11 +588,24 @@ class ProfListctrlPanel(wx.Panel, listmix.ColumnSorterMixin): del ntab ntab2.sort(reverse = True) ntab2 = ntab2[:limite] + nuces = [val[1] for val in ntab2] dlg.Update(3, u'concordancier...') - ucestxt = [corpus.ucis_paras_uces[val[1][0]][val[1][1]][val[1][2]] for val in ntab2] - ucestxt = [corpus.make_concord(self.la, ' '.join(uce), 'red') for uce in ucestxt] + #ucestxt = [corpus.ucis_paras_uces[val[1][0]][val[1][1]][val[1][2]] for val in ntab2] + ucestxt1 = [row for row in corpus.getconcorde(nuces)] + ucestxt = [] + ucis_txt = [] + for uce in ucestxt1 : + ucetxt = ' '+uce[1]+' ' + ucis_txt.append(' '.join(corpus.ucis[corpus.getucefromid(uce[0]).uci].etoiles) + '
') + for lem in self.la : + listmot = corpus.getlems()[lem].formes + for id in listmot : + forme = corpus.getforme(id).forme + ucetxt = ucetxt.replace(' '+forme+' ', ' ' + forme + ' ') + ucestxt.append(ucetxt) + #ucestxt = [corpus.make_concord(self.la, ' '.join(uce), 'red') for uce in ucestxt] dlg.Update(4, u'texte...') - ucis_txt = [' '.join(corpus.ucis[val[1][0]][0]) for val in ntab2] + #ucis_txt = [' '.join(corpus.ucis[val[1][0]][0]) for val in ntab2] win = message(self, -1, u"UCE caractéristiques - Classe %i" % self.cl, size=(600, 500), style=wx.DEFAULT_FRAME_STYLE) win.html = '\n' + '

'.join(['
'.join([ucis_txt[i], 'score : ' + str(ntab2[i][0]), ucestxt[i]]) for i in range(0,len(ucestxt))]) + '\n' win.HtmlPage.SetPage(win.html) diff --git a/Rscripts/chdfunct.R b/Rscripts/chdfunct.R index 2ca1eac..dd86dc1 100644 --- a/Rscripts/chdfunct.R +++ b/Rscripts/chdfunct.R @@ -294,91 +294,91 @@ AsLexico2<- function(mat, chip = FALSE) { } -#from textometrieR -#http://txm.sourceforge.net/doc/R/textometrieR-package.html -#Sylvain Loiseau -specificites.probabilities <- function (lexicaltable, types = NULL, parts = NULL) -{ - rowMargin <- rowSums(lexicaltable) - colMargin <- colSums(lexicaltable) - F <- sum(lexicaltable) - if (!is.null(types)) { - if (is.character(types)) { - if (is.null(rownames(lexicaltable))) - stop("The lexical table has no row names and the \"types\" argument is a character vector.") - if (!all(types %in% rownames(lexicaltable))) - stop(paste("Some requested types are not known in the lexical table: ", - paste(types[!(types %in% rownames(lexicaltable))], - collapse = " "))) - } - else { - if (any(types < 1)) - stop("The row index must be greater than 0.") - if (max(types) > nrow(lexicaltable)) - stop("Row index must be smaller than the number of rows.") - } - lexicaltable <- lexicaltable[types, , drop = FALSE] - rowMargin <- rowMargin[types] - } - if (!is.null(parts)) { - if (is.character(parts)) { - if (is.null(colnames(lexicaltable))) - stop("The lexical table has no col names and the \"parts\" argument is a character vector.") - if (!all(parts %in% colnames(lexicaltable))) - stop(paste("Some requested parts are not known in the lexical table: ", - paste(parts[!(parts %in% colnames(lexicaltable))], - collapse = " "))) - } - else { - if (max(parts) > ncol(lexicaltable)) - stop("Column index must be smaller than the number of cols.") - if (any(parts < 1)) - stop("The col index must be greater than 0.") - } - lexicaltable <- lexicaltable[, parts, drop = FALSE] - colMargin <- colMargin[parts] - } - if (nrow(lexicaltable) == 0 | ncol(lexicaltable) == 0) { - stop("The lexical table must contains at least one row and one column.") - } - specif <- matrix(0, nrow = nrow(lexicaltable), ncol = ncol(lexicaltable)) - for (i in 1:ncol(lexicaltable)) { - whiteDrawn <- lexicaltable[, i] - white <- rowMargin - black <- F - white - drawn <- colMargin[i] - independance <- (white * drawn)/F - specif_negative <- whiteDrawn < independance - specif_positive <- whiteDrawn >= independance - specif[specif_negative, i] <- phyper(whiteDrawn[specif_negative], - white[specif_negative], black[specif_negative], drawn) - specif[specif_positive, i] <- phyper(whiteDrawn[specif_positive] - - 1, white[specif_positive], black[specif_positive], - drawn) - } - dimnames(specif) <- dimnames(lexicaltable) - return(specif) -} - -#from textometrieR -#http://txm.sourceforge.net/doc/R/textometrieR-package.html -#Sylvain Loiseau -specificites <- function (lexicaltable, types = NULL, parts = NULL) -{ - spe <- specificites.probabilities(lexicaltable, types, parts) - spelog <- matrix(0, nrow = nrow(spe), ncol = ncol(spe)) - spelog[spe < 0.5] <- log10(spe[spe < 0.5]) - spelog[spe > 0.5] <- abs(log10(1 - spe[spe > 0.5])) - spelog[spe == 0.5] <- 0 - spelog[is.infinite(spe)] <- 0 - spelog <- round(spelog, digits = 4) - rownames(spelog) <- rownames(spe) - colnames(spelog) <- colnames(spe) - return(spelog) -} +##from textometrieR +##http://txm.sourceforge.net/doc/R/textometrieR-package.html +##Sylvain Loiseau +#specificites.probabilities <- function (lexicaltable, types = NULL, parts = NULL) +#{ +# rowMargin <- rowSums(lexicaltable) +# colMargin <- colSums(lexicaltable) +# F <- sum(lexicaltable) +# if (!is.null(types)) { +# if (is.character(types)) { +# if (is.null(rownames(lexicaltable))) +# stop("The lexical table has no row names and the \"types\" argument is a character vector.") +# if (!all(types %in% rownames(lexicaltable))) +# stop(paste("Some requested types are not known in the lexical table: ", +# paste(types[!(types %in% rownames(lexicaltable))], +# collapse = " "))) +# } +# else { +# if (any(types < 1)) +# stop("The row index must be greater than 0.") +# if (max(types) > nrow(lexicaltable)) +# stop("Row index must be smaller than the number of rows.") +# } +# lexicaltable <- lexicaltable[types, , drop = FALSE] +# rowMargin <- rowMargin[types] +# } +# if (!is.null(parts)) { +# if (is.character(parts)) { +# if (is.null(colnames(lexicaltable))) +# stop("The lexical table has no col names and the \"parts\" argument is a character vector.") +# if (!all(parts %in% colnames(lexicaltable))) +# stop(paste("Some requested parts are not known in the lexical table: ", +# paste(parts[!(parts %in% colnames(lexicaltable))], +# collapse = " "))) +# } +# else { +# if (max(parts) > ncol(lexicaltable)) +# stop("Column index must be smaller than the number of cols.") +# if (any(parts < 1)) +# stop("The col index must be greater than 0.") +# } +# lexicaltable <- lexicaltable[, parts, drop = FALSE] +# colMargin <- colMargin[parts] +# } +# if (nrow(lexicaltable) == 0 | ncol(lexicaltable) == 0) { +# stop("The lexical table must contains at least one row and one column.") +# } +# specif <- matrix(0, nrow = nrow(lexicaltable), ncol = ncol(lexicaltable)) +# for (i in 1:ncol(lexicaltable)) { +# whiteDrawn <- lexicaltable[, i] +# white <- rowMargin +# black <- F - white +# drawn <- colMargin[i] +# independance <- (white * drawn)/F +# specif_negative <- whiteDrawn < independance +# specif_positive <- whiteDrawn >= independance +# specif[specif_negative, i] <- phyper(whiteDrawn[specif_negative], +# white[specif_negative], black[specif_negative], drawn) +# specif[specif_positive, i] <- phyper(whiteDrawn[specif_positive] - +# 1, white[specif_positive], black[specif_positive], +# drawn) +# } +# dimnames(specif) <- dimnames(lexicaltable) +# return(specif) +#} +# +##from textometrieR +##http://txm.sourceforge.net/doc/R/textometrieR-package.html +##Sylvain Loiseau +#specificites <- function (lexicaltable, types = NULL, parts = NULL) +#{ +# spe <- specificites.probabilities(lexicaltable, types, parts) +# spelog <- matrix(0, nrow = nrow(spe), ncol = ncol(spe)) +# spelog[spe < 0.5] <- log10(spe[spe < 0.5]) +# spelog[spe > 0.5] <- abs(log10(1 - spe[spe > 0.5])) +# spelog[spe == 0.5] <- 0 +# spelog[is.infinite(spe)] <- 0 +# spelog <- round(spelog, digits = 4) +# rownames(spelog) <- rownames(spe) +# colnames(spelog) <- colnames(spe) +# return(spelog) +#} make.spec.hypergeo <- function(mat) { - #library(textometrieR) + library(textometrieR) spec <- specificites(mat) sumcol<-colSums(mat) eff_relatif<-round(t(apply(mat,1,function(x) {(x/t(as.matrix(sumcol))*1000)})),2) diff --git a/analysetxt.py b/analysetxt.py index 666a062..0402cdc 100644 --- a/analysetxt.py +++ b/analysetxt.py @@ -69,6 +69,7 @@ class AnalyseText : self.parametres['uuid'] = str(uuid4()) self.parametres['name'] = os.path.split(self.parametres['pathout'])[1] self.parametres['type'] = parametres['type'] + self.parametres['encoding'] = self.ira.syscoding self.t1 = time() #if self.corpus.lems is None : self.corpus.make_lems(lem = self.parametres['lem']) @@ -96,8 +97,8 @@ class AnalyseText : def make_config(self, config) : if config is not None : - if isinstance(config, basestring) : - return self.readconfig(config) + if not self.dlg : + return config else : return self.preferences() @@ -107,9 +108,6 @@ class AnalyseText : def preferences(self) : return {} - def doR(self): - pass - def printRscript(self) : pass diff --git a/chemins.py b/chemins.py index b7c288d..ec4ab2d 100644 --- a/chemins.py +++ b/chemins.py @@ -118,6 +118,7 @@ def ConstructConfigPath(AppliPath, user=True): 'preferences' : os.path.join(ConfigPath, 'iramuteq.cfg'), 'pam' : os.path.join(ConfigPath, 'pam.cfg'), 'history' : os.path.join(ConfigPath, 'history.db'), + 'corpus' : os.path.join(ConfigPath, 'corpus.cfg'), } return DictConfigPath @@ -139,6 +140,10 @@ def ConstructDicoPath(AppliPath): 'german_exp' : os.path.join(BasePath, 'expression_de.txt'), 'italian' : os.path.join(BasePath, 'lexique_it.txt'), 'italian_exp' : os.path.join(BasePath, 'expression_it.txt'), + 'swedish' : os.path.join(BasePath, 'lexique_sw.txt'), + 'swedish_exp' : os.path.join(BasePath, 'expression_sw.txt'), + 'portuguese' : os.path.join(BasePath, 'lexique_pt.txt'), + 'portuguese_exp': os.path.join(BasePath, 'expression_pt.txt'), } return DictPath @@ -276,3 +281,15 @@ def construct_simipath(pathout): 'corpus' : os.path.join(pathout, 'corpus.db'), } return d + +simipath = {'mat01' : 'mat01.csv', + 'matsimi' : 'matsimi.csv', + 'eff' : 'eff.csv', + 'RData' : 'RData.RData', + 'liste_graph' :'liste_graph.txt', + 'ira' : 'Analyse.ira', + 'film' : '', + 'db' : 'analyse.db', + 'corpus' : 'corpus.db', + } + diff --git a/configuration/corpus.cfg b/configuration/corpus.cfg index 327f4e8..52491e6 100644 --- a/configuration/corpus.cfg +++ b/configuration/corpus.cfg @@ -5,7 +5,7 @@ originalpath = encoding = utf8 lang = french douce = 1 -ucemethod = 0 +ucemethod = 1 ucesize = 35 keep_ponct = 0 tolist = 0 @@ -15,7 +15,7 @@ time = ucinb = ucenb = occurrences = -keep_caract = ^a-zA-Z0-9àÀâÂäÄáÁéÉèÈêÊëËìÌîÎïÏòÒôÔöÖùÙûÛüÜçÇßœŒ’ñ.:,;!?*'_- +keep_caract = ^a-zA-Z0-9àÀâÂäÄáÁåÅãéÉèÈêÊëËìÌîÎïÏíÍòÒôÔöÖõÕøØùÙûÛüÜúÚçÇßœŒ’ñ.:,;!?*'_- lower = 1 ucimark = 0 expressions = 1 diff --git a/corpusNG.py b/corpusNG.py index eb55b08..380b9a2 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -433,6 +433,16 @@ class Corpus : for line in f : ffin.write(line) os.remove(outfile + '~') + + def make_table_with_classe(self, uces, list_act) : + table_uce = [[0 for val in list_act] for line in range(0,len(uces))] + uces = dict([[uce, i] for i, uce in enumerate(uces)]) + for i, lem in enumerate(list_act) : + lemuces = list(set(self.getlemuces(lem)).intersection(uces)) + for uce in lemuces : + table_uce[uces[uce]][i] = 1 + table_uce.insert(0, list_act) + return table_uce def parse_active(self, gramact, gramsup = None) : log.info('parse actives') @@ -450,7 +460,7 @@ class Corpus : def make_actives_limit(self, limit) : if self.idformes is None : self.make_idformes() - return [lem for lem in self.lems if self.getlemeff(lem) >= limit] + return [lem for lem in self.lems if self.getlemeff(lem) >= limit and self.lems[lem].act == 1] def make_actives_nb(self, nbmax, key) : log.info('make_actives_nb : %i - %i' % (nbmax,key)) @@ -491,6 +501,27 @@ class Corpus : etoiles.update(uci.etoiles[1:] + uci.paras) return list(etoiles) + def make_etoiles_dict(self) : + etoiles = [et for uci in self.ucis for et in uci.etoiles[1:]] + det = {} + for etoile in etoiles : + et = etoile.split('_') + if et[0] in det : + try : + if et[1] in det[et[0]] : + det[et[0]][et[1]] += 1 + else : + det[et[0]][et[1]] = 1 + except IndexError : + det[et[0]] += 1 + else : + try : + det[et[0]] = {et[1] :1} + except IndexError : + det[et[0]] = 1 + print det + + def make_and_write_profile_et(self, ucecl, fileout) : log.info('etoiles/classes') etoiles = self.make_etoiles() @@ -590,6 +621,14 @@ class Corpus : f.write(txt) +class MakeUciStat : + def __init__(self, corpus) : + ucinb = corpus.getucinb() + ucisize = corpus.getucisize() + ucimean = float(sum(ucisize))/float(ucinb) + detoile = corpus.make_etoiles_dict() + + class Uci : def __init__(self, iduci, line, paraset = None) : self.ident = iduci @@ -648,7 +687,7 @@ def decouperlist(chaine, longueur, longueurOptimale) : try : indice = chaineTravail.index(u'$') trouve = True - iDecoupe = indice + iDecoupe = indice - 1 except ValueError : pass if not trouve: @@ -665,7 +704,7 @@ def decouperlist(chaine, longueur, longueurOptimale) : iDecoupe = nbCar else : if (float(dsep[' ']) / distance) > (float(meilleur[1]) / meilleureDistance) : - meilleur[0] = caractere + meilleur[0] = ' ' meilleur[1] = dsep[' '] meilleur[2] = nbCar trouve = True @@ -673,8 +712,12 @@ def decouperlist(chaine, longueur, longueurOptimale) : nbCar = nbCar - 1 # si on a trouvé if trouve: + #if meilleur[0] != ' ' : + # fin = chaine[iDecoupe + 1:] + # retour = chaineTravail[:iDecoupe] + #else : fin = chaine[iDecoupe + 1:] - retour = chaineTravail[:iDecoupe] + retour = chaineTravail[:iDecoupe + 1] return len(retour) > 0, retour, fin # si on a rien trouvé return False, chaine, '' @@ -911,13 +954,15 @@ class BuildFromAlceste(BuildCorpus) : self.backup_uce() def treattxt(self, txt, iduce, idpara, iduci) : - txt = ' '.join(txt) - #log.debug('ATTENTION CHINOIS -> charactères') - #clean_chinois = [self.firstclean, self.dolower, self.make_expression, self.doapos, self.dotiret] - #log.debug('ATTENTION CHINOIS -> list(text)') - #txt = ' '.join(list(txt)) - txt = self.make_cleans(txt)#, clean_chinois) - ucetxt = self.make_uces(txt, self.corpus.parametres['douce']) + if self.corpus.parametres.get('ucemethod', 0) == 2 and self.corpus.parametres['douce']: + txt = 'laphrasepoursplitter'.join(txt) + txt = self.make_cleans(txt) + txt = ' '.join([val for val in txt.split() if val not in self.ponctuation_espace]) + ucetxt = txt.split('laphrasepoursplitter') + else : + txt = ' '.join(txt) + txt = self.make_cleans(txt) + ucetxt = self.make_uces(txt, self.corpus.parametres['douce']) if self.corpus.ucis[-1].paras == [] : idpara += 1 for uce in ucetxt : diff --git a/dialog.py b/dialog.py index 8ff0e0f..d85a241 100755 --- a/dialog.py +++ b/dialog.py @@ -10,6 +10,7 @@ import wx.lib.filebrowsebutton as filebrowse import locale import os import sys +#from listlex import * from KeyFrame import AlcOptFrame #--------------------------------------------------------------------------- provider = wx.SimpleHelpProvider() @@ -191,8 +192,8 @@ class EncodeDialog(wx.Dialog): kwds["title"] = u'Encodage' wx.Dialog.__init__(self, *args, **kwds) self.label_dict = wx.StaticText(self, -1, u"Langue") - langues_n = [u'français', u'english', u'german (expérimentale)', u'italian (expérimentale)'] - self.langues = [u'french', u'english', u'german', 'italian'] + langues_n = [u'français', u'english', u'german (expérimentale)', u'italian (expérimentale)', u'swedish (exp.)', u'portuguese (exp.)'] + self.langues = [u'french', u'english', u'german', 'italian', 'swedish', 'portuguese'] self.choice_dict = wx.Choice(self, -1, choices = langues_n) self.encodages = encodages self.text = wx.StaticText(self, -1, u"Encodage du corpus : ") @@ -1360,7 +1361,7 @@ class PrefSimi ( wx.Dialog ): def __set_properties(self): self.choice1.SetSelection(self.paramsimi['coeff']) self.choice2.SetSelection(self.paramsimi['layout']) - self.choice3.SetSelection(self.paramsimi['type']) + self.choice3.SetSelection(self.paramsimi['type_graph']) if self.paramsimi['type'] != 2 : self.film.Enable(False) self.slider_sphere.Enable(False) @@ -2764,11 +2765,11 @@ class CorpusPref ( wx.Dialog ): def __init__( self, parent, parametres ): wx.Dialog.__init__ ( self, parent, id = wx.ID_ANY, title = u"Préférences", pos = wx.DefaultPosition, size = wx.DefaultSize, style = wx.DEFAULT_DIALOG_STYLE ) self.parent = parent - langues_n = [u'français', u'english', u'german (expérimentale)', u'italian (expérimentale)'] - self.langues = [u'french', u'english', u'german', 'italian'] + langues_n = [u'français', u'english', u'german (expérimentale)', u'italian (expérimentale)', u'swedish (exp.)', u'portuguese (exp.)'] + self.langues = [u'french', u'english', u'german', 'italian', 'swedish', u'portuguese'] self.encodages = encodages ucimark = [u'****', u'0000'] - ucemethod = [u'charactères', u'occurrences'] + ucemethod = [u'charactères', u'occurrences', u'paragraphe'] self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize ) @@ -3008,4 +3009,3 @@ class CorpusPref ( wx.Dialog ): else : parametres[val] = 0 return parametres - diff --git a/dictionnaires/lexique_fr.txt b/dictionnaires/lexique_fr.txt index a6a550b..941d436 100644 --- a/dictionnaires/lexique_fr.txt +++ b/dictionnaires/lexique_fr.txt @@ -4956,7 +4956,7 @@ amérindiens amérindien nom m p 0.14 0 0.08 0 amérique amérique nom f s 0.34 1.08 0.34 1.08 améthyste améthyste nom f s 0.22 0.81 0.2 0.27 améthystes améthyste nom f p 0.22 0.81 0.02 0.54 -an an nom m s 866.58 685.81 148.41 76.76 +an an nom_sup m s 866.58 685.81 148.41 76.76 ana ana nom m 4.91 0.14 4.91 0.14 anabaptiste anabaptiste adj s 0.01 0.41 0.01 0.34 anabaptistes anabaptiste nom p 0 0.34 0 0.34 @@ -5504,7 +5504,7 @@ anormaux anormal adj m p 7.4 7.91 0.76 0.61 anosmie anosmie nom f s 0.07 0 0.07 0 anosmique anosmique adj m s 0.01 0.07 0.01 0.07 anoxie anoxie nom f s 0.09 0 0.09 0 -ans an nom m p 866.58 685.81 718.17 609.05 +ans an nom_sup m p 866.58 685.81 718.17 609.05 anse anse nom f s 0.47 6.08 0.33 4.86 anses anse nom f p 0.47 6.08 0.14 1.22 ansée ansé adj f s 0.02 0.07 0.01 0 diff --git a/functions.py b/functions.py index 2165f33..2909a44 100644 --- a/functions.py +++ b/functions.py @@ -10,6 +10,7 @@ from ConfigParser import ConfigParser from subprocess import Popen, call, PIPE import thread import os +import ast import sys import csv import platform @@ -130,6 +131,12 @@ class DoConf : for option in self.conf.options(section) : if self.conf.get(section, option).isdigit() : parametres[option] = int(self.conf.get(section, option)) + elif self.conf.get(section, option) == 'False' : + parametres[option] = False + elif self.conf.get(section, option) == 'True' : + parametres[option] = True + elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') : + parametres[option] = ast.literal_eval(self.conf.get(section, option)) else : parametres[option] = self.conf.get(section, option) if 'type' not in parametres : @@ -145,6 +152,10 @@ class DoConf : self.conf.set(section, option, `parametres[i][option]`) elif isinstance(parametres[i][option], basestring) : self.conf.set(section, option, parametres[i][option].encode('utf8')) + elif isinstance(parametres[i][option], wx.Colour) : + self.conf.set(section, option, str(parametres[i][option])) + else : + self.conf.set(section, option, `parametres[i][option]`) if outfile is None : outfile = self.configfile print outfile diff --git a/guifunct.py b/guifunct.py index 2a15b22..bf8614b 100644 --- a/guifunct.py +++ b/guifunct.py @@ -8,6 +8,8 @@ import os import sys from copy import copy import dialog +from listlex import * + def OnOpen(self, type): if type == "Data": @@ -111,3 +113,90 @@ def getCorpus(page) : else : return None +class SelectColumn : + def __init__(self, parent, dictcol, actives, pathout, selected = None) : + self.ira = parent + dial = dialog.SelectColDial(self.ira) + listcol = ListForSpec(dial, self, dictcol, ['forme', 'eff']) + dial.bSizer2.Add( listcol, 2, wx.ALL|wx.EXPAND, 5 ) + dial.m_sdbSizer2.AddButton( dial.m_sdbSizer2OK ) + dial.m_sdbSizer2.Realize() + dial.bSizer2.Add( dial.m_sdbSizer2, 0, wx.EXPAND, 5 ) + dial.Layout() + if selected is None : + for row in xrange(listcol.list.GetItemCount()): + listcol.list.Select(row) + else : + orderlex = dict([[listcol.getColumnText(i,0),i] for i in range(0,listcol.list.GetItemCount())]) + for row in selected : + listcol.list.Select(orderlex[actives[row]]) + dial.CenterOnParent() + val = dial.ShowModal() + last = listcol.list.GetFirstSelected() + lastl = [listcol.list.GetFirstSelected()] + indexes = [listcol.getColumnText(listcol.list.GetFirstSelected(),0)] + while listcol.list.GetNextSelected(last) != -1: + last = listcol.list.GetNextSelected(last) + lastl.append(last) + indexes.append(listcol.getColumnText(last,0)) + dial.Destroy() + column = [actives.index(val) for val in indexes] + column.sort() + with open(pathout, 'w') as f : + f.write('\n'.join([`val` for val in column])) + +class PrepSimi : + def __init__(self, parent, parametres, indices_simi) : + self.parametres = parametres + self.dial = dialog.PrefSimi(parent, -1, self.parametres, indices_simi) + self.dial.CenterOnParent() + self.val = self.dial.ShowModal() + if self.val == wx.ID_OK : + self.make_param() + + def make_param(self) : + self.select = self.dial.check_colch.GetValue() + if self.parametres.get('first', True) : + keep_coord = False + else : + keep_coord = self.dial.check_coord.GetValue() + param = {'coeff' : self.dial.choice1.GetSelection(), + 'layout' : self.dial.choice2.GetSelection(), + 'type_graph' : self.dial.choice3.GetSelection(), + 'arbremax' : self.dial.check1.GetValue(), + 'coeff_tv' : self.dial.check_s_size.GetValue(), + 'coeff_tv_nb' : self.dial.spin_tv.GetValue(), + 'tvprop' : self.dial.check2.GetValue(), + 'tvmin' : self.dial.spin_tvmin.GetValue(), + 'tvmax' : self.dial.spin_tvmax.GetValue(), + 'coeff_te' : self.dial.check3.GetValue(), + 'coeff_temin' : self.dial.spin_temin.GetValue(), + 'coeff_temax' : self.dial.spin_temax.GetValue(), + 'label_e' : self.dial.check_elab.GetValue(), + 'label_v' : self.dial.check_vlab.GetValue(), + 'vcex' : self.dial.check_vcex.GetValue(), + 'vcexmin' : self.dial.spin_vcexmin.GetValue(), + 'vcexmax' : self.dial.spin_vcexmax.GetValue(), + 'cex' : self.dial.spin_cex.GetValue(), + 'seuil_ok' : self.dial.check_seuil.GetValue(), + 'seuil' : self.dial.spin_seuil.GetValue(), + 'cols' : self.dial.cols.GetColour(), + 'cola' : self.dial.cola.GetColour(), + 'width' : self.dial.spin_width.GetValue(), + 'height' : self.dial.spin_height.GetValue(), + 'first' : False, + 'keep_coord' : keep_coord, + 'alpha' : self.dial.slider_sphere.GetValue(), + 'film' : self.dial.film.GetValue() + } + if 'cexfromchi' in self.parametres : + param['cexfromchi'] = self.dial.checkit.GetValue() + if 'sfromchi' in self.parametres : + param['sfromchi'] = self.dial.checki.GetValue() + if 'vlabcolor' in self.parametres : + param['vlabcolor'] = self.parametres['vlabcolor'] + if 'check_bystar' in dir(self.dial) : + param['bystar'] = self.dial.check_bystar.GetValue() + param['stars'] = self.parametres['stars'] + self.parametres.update(param) + diff --git a/iracmd.py b/iracmd.py index 19b0880..244f41f 100644 --- a/iracmd.py +++ b/iracmd.py @@ -53,9 +53,9 @@ class CmdLine : parser = OptionParser() parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False) - parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default='False') + parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False) - parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration", metavar="CONF", default=False) + parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration", metavar="CONF", default=None) parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding()) parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french') parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False) @@ -63,8 +63,12 @@ class CmdLine : (options, args) = parser.parse_args() print args print options - if options.configfile : - self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile) + options.type_analyse + if options.configfile is not None: + config = DoConf(os.path.abspath(options.configfile)).getoptions() + elif options.type_analyse : + config = DoConf(self.ConfigPath[options.type_analyse]).getoptions() + #self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile) self.TEMPDIR = tempfile.mkdtemp('iramuteq') self.RscriptsPath = ConstructRscriptsPath(AppliPath) self.PathPath = ConfigParser() @@ -102,8 +106,16 @@ class CmdLine : corpus.conn_all() corpus.make_lems() corpus.parse_active(gramact, gramsup) - log.warning('ATTENTION gethapaxuces') - corpus.gethapaxuces() +# log.warning('ATTENTION gethapaxuces') +# MakeUciStat(corpus) +# qfqsdf + #corpus.gethapaxuces() + #ucisize = corpus.getucisize() + #ucisize = [`val` for val in ucisize] + #uciet = [uci.etoiles[1] for uci in corpus.ucis] + #res = zip(uciet, ucisize) + #with open('ucisize.csv', 'w') as f : + # f.write('\n'.join(['\t'.join(val) for val in res])) # self.content = f.read() #self.content = self.content.replace('\r','') if options.type_analyse == 'alceste' : @@ -112,7 +124,8 @@ class CmdLine : #zerzre #corpus.read_corpus() #corpus.parse_active(gramact, gramsup) - Alceste(self, corpus) + config['type'] = 'alceste' + Alceste(self, corpus, parametres = config) # self.Text = AnalyseAlceste(self, cmd = True, big = True) #self.Text = AnalyseAlceste(self, cmd = True) elif options.type_analyse == 'pam' : diff --git a/layout.py b/layout.py index 6550ced..669b278 100644 --- a/layout.py +++ b/layout.py @@ -8,18 +8,19 @@ import os import wx #import wx.lib.agw.aui as aui import agw.aui as aui -from chemins import ConstructPathOut, ChdTxtPathOut, FFF, ffr, PathOut, StatTxtPathOut +from chemins import ConstructPathOut, ChdTxtPathOut, FFF, ffr, PathOut, StatTxtPathOut, simipath from ConfigParser import ConfigParser -from functions import ReadProfileAsDico, GetTxtProfile, read_list_file, ReadList, exec_rcode, print_liste, BugReport, DoConf +from functions import ReadProfileAsDico, GetTxtProfile, read_list_file, ReadList, exec_rcode, print_liste, BugReport, DoConf, indices_simi from ProfList import * from guiparam3d import param3d, simi3d -from PrintRScript import write_afc_graph, print_simi3d +from PrintRScript import write_afc_graph, print_simi3d, PrintSimiScript from profile_segment import * from functions import ReadList from listlex import * from Liste import * from search_tools import SearchFrame from dialog import PrefGraph, PrefExport, PrefSimpleFile, PrefDendro +from guifunct import SelectColumn, PrepSimi from corpusNG import Corpus import datetime import sys @@ -27,6 +28,10 @@ import tempfile import shutil import webbrowser import codecs +import logging + +log = logging.getLogger('iramuteq.layout') + class GraphPanelAfc(wx.Panel): def __init__(self, parent, dico, list_graph, clnb, itempath = 'liste_graph_afc', coding = sys.getdefaultencoding()): @@ -849,6 +854,119 @@ class CopusPanel(wx.Panel) : text.Wrap( -1 ) self.fgSizer5.Add( text, 0, wx.ALL, 5 ) +class GraphPanelSimi(wx.Panel): + def __init__(self, parent, dico, list_graph): + wx.Panel.__init__(self,parent) + self.afcnb = 1 + self.Dict = dico + self.dirout = os.path.dirname(self.Dict['ira']) + self.parent = self.GetParent()#parent + self.SetFont(wx.Font(10, wx.DEFAULT, wx.NORMAL, wx.NORMAL, 0, "courier")) + self.labels = [] + self.listimg = [] + self.tabsimi = self.parent.GetParent() + self.ira = self.tabsimi.GetParent() + self.panel_1 = wx.ScrolledWindow(self, -1, style=wx.TAB_TRAVERSAL) + afc_img = wx.Image(os.path.join(self.ira.images_path,'button_simi.jpg'), wx.BITMAP_TYPE_ANY).ConvertToBitmap() + self.butafc = wx.BitmapButton(self, -1, afc_img) + export_img = wx.Image(os.path.join(self.ira.images_path,'button_export.jpg'), wx.BITMAP_TYPE_ANY).ConvertToBitmap() + self.butexport = wx.BitmapButton(self, -1, export_img) + + for i in range(0,len(list_graph)): + if os.path.exists(os.path.join(self.dirout,list_graph[i][0])) and list_graph[i][0] != '' : + self.listimg.append(wx.StaticBitmap(self.panel_1, -1, wx.Bitmap(os.path.join(self.dirout,list_graph[i][0]), wx.BITMAP_TYPE_ANY))) + self.labels.append(wx.StaticText(self.panel_1, -1, list_graph[i][1])) + + self.__set_properties() + self.__do_layout() + + def __set_properties(self): + self.panel_1.EnableScrolling(True,True) + #self.panel_1.SetSize((1000,1000)) + self.panel_1.SetScrollRate(20, 20) + + def __do_layout(self): + self.sizer_1 = wx.BoxSizer(wx.HORIZONTAL) + self.sizer_2 = wx.BoxSizer(wx.VERTICAL) + self.sizer_3 = wx.BoxSizer(wx.VERTICAL) + self.sizer_2.Add(self.butafc, 0, 0, 0) + self.sizer_2.Add(self.butexport, 0, 0, 0) + for i in range(0, len(self.listimg)): + self.sizer_3.Add(self.listimg[i], 0, wx.ALIGN_CENTER_HORIZONTAL, 0) + self.sizer_3.Add(self.labels[i], 0, wx.ALIGN_CENTER_HORIZONTAL, 0) + self.panel_1.SetSizer(self.sizer_3) + self.sizer_1.Add(self.sizer_2, 0, wx.EXPAND, 0) + self.sizer_1.Add(self.panel_1, 1, wx.EXPAND, 0) + self.SetSizer(self.sizer_1) + +class DefaultTextLayout : + def __init__(self, ira, corpus, parametres) : + self.pathout = PathOut(dirout = parametres['pathout']) + self.ira = ira + self.parent = ira + self.parametres = parametres + self.corpus = corpus + self.dolayout() + + def dolayout(self) : + log.info('no layout yet') + +class SimiLayout(DefaultTextLayout) : + def dolayout(self) : + self.pathout.basefiles(simipath) + self.actives = None + self.indices = indices_simi + if os.path.exists(self.pathout['liste_graph']) : + list_graph = read_list_file(self.pathout['liste_graph']) + else : + list_graph = [['','']] + notebook_flags = aui.AUI_NB_DEFAULT_STYLE | aui.AUI_NB_TAB_EXTERNAL_MOVE | aui.AUI_NB_TAB_MOVE | aui.AUI_NB_TAB_FLOAT + self.tabsimi = aui.AuiNotebook(self.ira.nb, -1, wx.DefaultPosition) + self.tabsimi.SetAGWWindowStyleFlag(notebook_flags) + self.tabsimi.SetArtProvider(aui.ChromeTabArt()) + self.tabsimi.corpus = self.corpus + self.tabsimi.parametres = self.parametres + self.graphpan = GraphPanelSimi(self.tabsimi, self.pathout, list_graph) + self.graphpan.Bind(wx.EVT_BUTTON, self.redosimi, self.graphpan.butafc) + self.graphpan.Bind(wx.EVT_BUTTON, self.export, self.graphpan.butexport) + self.tabsimi.AddPage(self.graphpan, 'Graph') + self.ira.nb.AddPage(self.tabsimi, 'Analyse de graph') + self.ira.ShowTab(True) + self.ira.nb.SetSelection(self.ira.nb.GetPageCount() - 1) + + def redosimi(self, evt) : + with open(self.pathout['selected.csv'],'r') as f : + selected = f.read() + selected = [int(val) for val in selected.splitlines()] + if self.actives is None : + with codecs.open(self.pathout['actives.csv'], 'r', self.parametres['encoding']) as f : + self.actives = f.read() + self.actives = [act for act in self.actives.splitlines()] + dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)]) + SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], selected = selected) + prep = PrepSimi(self.ira, self.parametres, indices_simi) + self.parametres = prep.parametres + script = PrintSimiScript(self) + script.make_script() + pid = exec_rcode(self.ira.RPath, script.scriptout, wait = True) + check_Rresult(self.ira, pid) + if self.parametres['type_graph'] == 1: + if os.path.exists(self.pathout['liste_graph']): + graph_simi = read_list_file(self.pathout['liste_graph']) + graph_simi.append([os.path.basename(script.filename), script.txtgraph]) + else : + graph_simi = [[os.path.basename(script.filename), script.txtgraph]] + print_liste(self.pathout['liste_graph'], graph_simi) + DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['Analyse.ira']) + if self.parametres['type_graph'] == 1: + self.graphpan.sizer_3.Add(wx.StaticBitmap(self.graphpan.panel_1, -1, wx.Bitmap(script.filename, wx.BITMAP_TYPE_ANY)), 0, wx.ALIGN_CENTER_HORIZONTAL, 0) + self.graphpan.sizer_3.Add(wx.StaticText(self.graphpan.panel_1,-1, script.txtgraph), 0, wx.ALIGN_CENTER_HORIZONTAL, 0) + self.graphpan.sizer_3.Fit(self.graphpan.panel_1) + self.graphpan.Layout() + self.graphpan.panel_1.Scroll(0,self.graphpan.panel_1.GetScrollRange(wx.VERTICAL)) + + def export(self, evt) : + pass # def read_result(self) : # #self.corpus.read_corpus_from_shelves(self.corpus.dictpathout['db']) # #self.corpus.make_et_table() diff --git a/openanalyse.py b/openanalyse.py index 1cf1a03..3290bc4 100644 --- a/openanalyse.py +++ b/openanalyse.py @@ -5,7 +5,7 @@ #Lisense: GNU/GPL from chemins import ChdTxtPathOut, StatTxtPathOut, construct_simipath -from layout import OpenCHDS, dolexlayout, StatLayout, WordCloudLayout, OpenCorpus +from layout import OpenCHDS, dolexlayout, StatLayout, WordCloudLayout, OpenCorpus, SimiLayout #from corpus import Corpus from corpusNG import Corpus, copycorpus from tableau import Tableau @@ -61,18 +61,18 @@ class OpenAnalyse(): self.parent.ShowMenu(_("Text analysis")) OpenCHDS(self.parent, corpus, self.conf, Alceste = True) elif self.conf['type'] == 'simitxt' : - self.tableau = Tableau(self.parent, self.conf['ira']) - self.DictPathOut=construct_simipath(self.conf['pathout']) - self.tableau.dictpathout = self.DictPathOut - self.tableau.read_tableau(self.tableau.dictpathout['db']) - if self.tableau.parametre.get('corpus', False) : - self.corpus=corpus + self.parent.ShowMenu(_("Text analysis")) + SimiLayout(self.parent, corpus, self.conf) + #self.tableau = Tableau(self.parent, self.conf['ira']) + #self.DictPathOut=construct_simipath(self.conf['pathout']) + #self.tableau.dictpathout = self.DictPathOut + #self.tableau.read_tableau(self.tableau.dictpathout['db']) + #if self.tableau.parametre.get('corpus', False) : + # self.corpus=corpus #self.corpus.read_corpus_from_shelves(self.DictPathOut['corpus']) - self.corpus.parametres['openpath'] = self.conf['pathout'] - self.parent.ShowMenu(_("Text analysis")) - DoSimi(self.parent, self.conf, isopen = True, filename = self.conf['ira'], gparent = self, openfromprof=False) + # self.corpus.parametres['openpath'] = self.conf['pathout'] + #DoSimi(self.parent, self.conf, isopen = True, filename = self.conf['ira'], gparent = self, openfromprof=False) - print 'simi' # try : # #if self.conf['type'] in ['analyse','lexico','stat','wordcloud'] : # # self.corpus = Corpus(parent) diff --git a/tabsimi.py b/tabsimi.py index 53974aa..896cc6f 100644 --- a/tabsimi.py +++ b/tabsimi.py @@ -33,7 +33,7 @@ class DoSimi(): else : self.paramsimi = {'coeff' : 0, 'layout' : 2, - 'type' : 1, + 'type_graph' : 1, 'arbremax' : 1, 'coeff_tv' : 0, 'coeff_tv_nb' : 10, @@ -183,7 +183,7 @@ class DoSimi(): paramsimi = {'coeff' : self.dial.choice1.GetSelection(), 'layout' : self.dial.choice2.GetSelection(), - 'type' : self.dial.choice3.GetSelection(), + 'type_graph' : self.dial.choice3.GetSelection(), 'arbremax' : self.dial.check1.GetValue(), 'coeff_tv' : self.dial.check_s_size.GetValue(), 'coeff_tv_nb' : self.dial.spin_tv.GetValue(), @@ -407,15 +407,15 @@ class DoSimi(): if self.paramsimi['layout'] == 4 : layout = 'graphopt' self.filename='' - if self.paramsimi['type'] == 0 : type = 'tkplot' - if self.paramsimi['type'] == 1 : + if self.paramsimi['type_graph'] == 0 : type = 'tkplot' + if self.paramsimi['type_graph'] == 1 : graphnb = 1 type = 'nplot' dirout = os.path.dirname(self.DictPathOut['mat01']) while os.path.exists(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')): graphnb +=1 self.filename = ffr(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')) - if self.paramsimi['type'] == 2 : type = 'rgl' + if self.paramsimi['type_graph'] == 2 : type = 'rgl' if self.paramsimi['arbremax'] : arbremax = 'TRUE' diff --git a/textsimi.py b/textsimi.py index efd3a67..72222ab 100644 --- a/textsimi.py +++ b/textsimi.py @@ -3,17 +3,18 @@ #Copyright (c) 2008-2011 Pierre Ratinaud #Lisense: GNU/GPL -from chemins import ConstructPathOut, construct_simipath +from chemins import ffr, simipath from corpus import Corpus import os from analysetxt import AnalyseText from ConfigParser import RawConfigParser from guifunct import getPage, getCorpus from dialog import StatDialog -from functions import indices_simi, progressbar, treat_var_mod +from guifunct import SelectColumn, PrepSimi +from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste from tableau import Tableau from tabsimi import DoSimi -from PrintRScript import PrintRScript +from PrintRScript import PrintSimiScript import wx from copy import copy @@ -21,17 +22,29 @@ import logging logger = logging.getLogger('iramuteq.textsimi') - - class SimiTxt(AnalyseText): def doanalyse(self) : + self.parametres['type'] = 'simitxt' + self.pathout.basefiles(simipath) self.indices = indices_simi self.makesimiparam() + #FIXME + self.actives = self.corpus.make_actives_limit(3) + dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)]) + SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv']) self.makefiles() prep = PrepSimi(self.ira, self.parametres, indices_simi) self.parametres = prep.parametres - script = PrintSimScript(self) - + script = PrintSimiScript(self) + script.make_script() + self.doR(script.scriptout) + if self.parametres['type_graph'] == 1: + if os.path.exists(self.pathout['liste_graph']): + graph_simi = read_list_file(self.pathout['liste_graph']) + graph_simi.append([os.path.basename(script.filename), script.txtgraph]) + else : + graph_simi = [[os.path.basename(script.filename), script.txtgraph]] + print_liste(self.pathout['liste_graph'], graph_simi) def preferences(self) : dial = StatDialog(self, self.parent) @@ -52,7 +65,7 @@ class SimiTxt(AnalyseText): def makesimiparam(self) : self.paramsimi = {'coeff' : 0, 'layout' : 2, - 'type' : 1, + 'type_graph' : 1, 'arbremax' : 1, 'coeff_tv' : 1, 'coeff_tv_nb' : 0, @@ -84,8 +97,8 @@ class SimiTxt(AnalyseText): } self.parametres.update(self.paramsimi) - def makefiles(self) : - self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1) + def makefiles(self, lim=3) : + #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1) self.parametres['eff_min_forme'] = lim self.parametres['nbactives'] = len(self.actives) self.parametres['fromprof'] = True @@ -98,144 +111,19 @@ class SimiTxt(AnalyseText): self.parametres['stars'] = copy(self.listet) self.parametres['sfromchi'] = False -class PrepSimi : - def _init_(self, parent, parametres, indices_simi) : - self.parametres = parametres - self.dial = PrefSimi(parent, -1, self.parametres, indices_simi) - self.dial.CenterOnParent() - self.val = self.dial.ShowModal() - if self.val == wx.ID_OK : - self.make_param() - - def make_param(self) : - self.select = self.dial.check_colch.GetValue() - param = {'coeff' : self.dial.choice1.GetSelection(), - 'layout' : self.dial.choice2.GetSelection(), - 'type' : self.dial.choice3.GetSelection(), - 'arbremax' : self.dial.check1.GetValue(), - 'coeff_tv' : self.dial.check_s_size.GetValue(), - 'coeff_tv_nb' : self.dial.spin_tv.GetValue(), - 'tvprop' : self.dial.check2.GetValue(), - 'tvmin' : self.dial.spin_tvmin.GetValue(), - 'tvmax' : self.dial.spin_tvmax.GetValue(), - 'coeff_te' : self.dial.check3.GetValue(), - 'coeff_temin' : self.dial.spin_temin.GetValue(), - 'coeff_temax' : self.dial.spin_temax.GetValue(), - 'label_e' : self.dial.check_elab.GetValue(), - 'label_v' : self.dial.check_vlab.GetValue(), - 'vcex' : self.dial.check_vcex.GetValue(), - 'vcexmin' : self.dial.spin_vcexmin.GetValue(), - 'vcexmax' : self.dial.spin_vcexmax.GetValue(), - 'cex' : self.dial.spin_cex.GetValue(), - 'seuil_ok' : self.dial.check_seuil.GetValue(), - 'seuil' : self.dial.spin_seuil.GetValue(), - 'cols' : self.dial.cols.GetColour(), - 'cola' : self.dial.cola.GetColour(), - 'width' : self.dial.spin_width.GetValue(), - 'height' : self.dial.spin_height.GetValue(), - 'first' : False, - 'keep_coord' : keep_coord, - 'alpha' : self.dial.slider_sphere.GetValue(), - 'film' : self.dial.film.GetValue() - } - if 'cexfromchi' in self.parametres : - param['cexfromchi'] = self.dial.checkit.GetValue() - if 'sfromchi' in self.parametres : - param['sfromchi'] = self.dial.checki.GetValue() - if 'vlabcolor' in self.parametres : - param['vlabcolor'] = self.parametres['vlabcolor'] - if 'check_bystar' in dir(self.dial) : - param['bystar'] = self.dial.check_bystar.GetValue() - param['stars'] = self.parametres['stars'] - self.parametres.update(param) - -class PrintSimiScript(PrintRScript) : - def make_script(self) : - self.load(['igraph', 'proxy', 'Matrix']) - self.source([self.analyse.parent.RscriptsPath['simi'], self.analyse.parent.RscriptsPath['Rgraph']]) - txt = """ - dm.path <- "%s" - cn.path <- "%s" - selected.col <- "%s" - """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv']) - - txt += """ - dm <- dm[, selected.col+1] - """ - if self.parametres['coeff'] == 0 : - method = 'cooc' - txt += """ - method <- 'cooc' - mat <- make.a(dm) - """ - else : - txt += """ - dm <- as.matrix(dm) - """ - if self.parametres['coeff'] == 1 : - method = 'prcooc' - txt += """ - method <- 'Russel' - mat <- simil(dm, method = 'Russel', diag = TRUE, upper = TRUE, by_rows = FALSE) - """ - elif self.analyses.indices[self.parametres['coeff']] == 'binomial' : - method = 'binomial' - txt += """ - method <- 'binomial' - mat <- binom.sim(dm) - """ - else : - method = self.types[self.paramsimi['coeff']] - txt += """ - method <-"%s" - mat <- simil(dm, method = method, diag = TRUE, upper = TRUE, by_rows = FALSE) - """ % self.analyse.indices[self.parametres['coeff']] - txt += """ - mat <- as.matrix(stats::as.dist(mat,diag=TRUE,upper=TRUE)) - mat[is.na(mat)] <- 0 - mat[is.infinite(mat)] <- 0 - """ - if self.parametres['layout'] == 0 : layout = 'random' - if self.parametres['layout'] == 1 : layout = 'circle' - if self.parametres['layout'] == 2 : layout = 'frutch' - if self.parametres['layout'] == 3 : layout = 'kawa' - if self.parametres['layout'] == 4 : layout = 'graphopt' - - txt += """ - eff <- colSums(dm) - g.ori <- graph.adjacency(mat, mode='lower', weighted = TRUE) - w.ori <- E(g.ori)$weight - if (max.tree) { - if (method == 'cooc') { - E(g.ori)$weight <- 1 / w.ori - } else { - E(g.ori)$weigth <- 1 - w.ori - } - g.max <- minimum.spanning.tree(g.ori) - if (method == 'cooc') { - E(g.max)$weight <- 1 / E(g.max)$weight - } else { - E(g.max)$weight <- 1 - E(g.max)$weight - } - g.toplot <- g.max - } else { - g.toplot <- g.ori - } - """ - - self.tableau = Tableau(self.parent, '') - self.tableau.listactives = self.actives - self.tableau.parametre['fromtxt'] = True - self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives]) - #print('ATTENTION ETOILES') - #self.paramsimi['bystar'] = True - self.tableau.listet = copy(self.listet) - #self.paramsimi['cexfromchi'] = True - #self.paramsimi['vlabcolor'] = True - self.tableau.actives = copy(self.corpus.lems_eff) - DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout) +# self.tableau = Tableau(self.parent, '') +# self.tableau.listactives = self.actives +# self.tableau.parametre['fromtxt'] = True +# self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives]) +# #print('ATTENTION ETOILES') +# #self.paramsimi['bystar'] = True +# self.tableau.listet = copy(self.listet) +# #self.paramsimi['cexfromchi'] = True +# #self.paramsimi['vlabcolor'] = True +# self.tableau.actives = copy(self.corpus.lems_eff) +# DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout) #class SimiTxt : # def __init__(self, parent, cmd = False, param = None):