for source in lsources :
self.add('source("%s")' % source)
+ def packages(self, lpks) :
+ for pk in lpks :
+ self.add('library(%s)' % pk)
+
def load(self, l) :
for val in l :
self.add('load("%s")' % val)
# f.write(txt)
# f.close()
+class PrintSimiScript(PrintRScript) :
+ def make_script(self) :
+ self.txtgraph = ''
+ self.packages(['igraph', 'proxy', 'Matrix'])
+ self.sources([self.analyse.parent.RscriptsPath['simi'], self.analyse.parent.RscriptsPath['Rgraph']])
+ txt = """
+ dm.path <- "%s"
+ cn.path <- "%s"
+ selected.col <- "%s"
+ """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv'])
+ txt += """
+ dm <-readMM(dm.path)
+ cn <- read.table(cn.path, sep=';', quote='"')
+ colnames(dm) <- cn[,1]
+ sel.col <- read.csv2(selected.col)
+ dm <- dm[, sel.col[,1] + 1]
+ """
+
+ if self.parametres['coeff'] == 0 :
+ method = 'cooc'
+ txt += """
+ method <- 'cooc'
+ mat <- make.a(dm)
+ """
+ else :
+ txt += """
+ dm <- as.matrix(dm)
+ """
+ if self.parametres['coeff'] == 1 :
+ method = 'prcooc'
+ txt += """
+ method <- 'Russel'
+ mat <- simil(dm, method = 'Russel', diag = TRUE, upper = TRUE, by_rows = FALSE)
+ """
+ elif self.analyse.indices[self.parametres['coeff']] == 'binomial' :
+ method = 'binomial'
+ txt += """
+ method <- 'binomial'
+ mat <- binom.sim(dm)
+ """
+ elif self.parametres['coeff'] != 0 :
+ method = self.analyse.indices[self.parametres['coeff']]
+ txt += """
+ method <-"%s"
+ mat <- simil(dm, method = method, diag = TRUE, upper = TRUE, by_rows = FALSE)
+ """ % self.analyse.indices[self.parametres['coeff']]
+ txt += """
+ mat <- as.matrix(stats::as.dist(mat,diag=TRUE,upper=TRUE))
+ mat[is.na(mat)] <- 0
+ mat[is.infinite(mat)] <- 0
+ """
+ if self.parametres['layout'] == 0 : layout = 'random'
+ if self.parametres['layout'] == 1 : layout = 'circle'
+ if self.parametres['layout'] == 2 : layout = 'frutch'
+ if self.parametres['layout'] == 3 : layout = 'kawa'
+ if self.parametres['layout'] == 4 : layout = 'graphopt'
+
+ self.filename=''
+ if self.parametres['type_graph'] == 0 : type = 'tkplot'
+ if self.parametres['type_graph'] == 1 :
+ graphnb = 1
+ type = 'nplot'
+ dirout = os.path.dirname(self.pathout['mat01'])
+ while os.path.exists(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')):
+ graphnb +=1
+ self.filename = ffr(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png'))
+ if self.parametres['type_graph'] == 2 : type = 'rgl'
+
+ if self.parametres['arbremax'] :
+ arbremax = 'TRUE'
+ self.txtgraph += ' - arbre maximum'
+ else : arbremax = 'FALSE'
+
+ if self.parametres['coeff_tv'] :
+ coeff_tv = self.parametres['coeff_tv_nb']
+ tvminmax = 'c(NULL,NULL)'
+ elif not self.parametres['coeff_tv'] or self.parametres.get('sformchi', False) :
+ coeff_tv = 'NULL'
+ tvminmax = 'c(%i, %i)' %(self.parametres['tvmin'], self.parametres['tvmax'])
+ if self.parametres['coeff_te'] : coeff_te = 'c(%i,%i)' % (self.parametres['coeff_temin'], self.parametres['coeff_temax'])
+ else : coeff_te = 'NULL'
+
+ if self.parametres['vcex'] or self.parametres.get('cexfromchi', False) :
+ vcexminmax = 'c(%i/10,%i/10)' % (self.parametres['vcexmin'],self.parametres['vcexmax'])
+ else :
+ vcexminmax = 'c(NULL,NULL)'
+ if not self.parametres['label_v'] : label_v = 'FALSE'
+ else : label_v = 'TRUE'
+
+ if not self.parametres['label_e'] : label_e = 'FALSE'
+ else : label_e = 'TRUE'
+
+ if self.parametres['seuil_ok'] : seuil = str(self.parametres['seuil'])
+ else : seuil = 'NULL'
+
+ cols = str(self.parametres['cols']).replace(')',', max=255)')
+ cola = str(self.parametres['cola']).replace(')',',max=255)')
+
+ txt += """
+ minmaxeff <- %s
+ """ % tvminmax
+ txt += """
+ vcexminmax <- %s
+ """ % vcexminmax
+ txt += """
+ cex = %i/10
+ """ % self.parametres['cex']
+
+ if self.parametres['film'] :
+ txt += """
+ film <- "%s"
+ """ % self.pathout['film']
+ else :
+ txt += """
+ film <- NULL
+ """
+ txt += """
+ seuil <- %s
+ """ % seuil
+
+ txt += """
+ label.v <- %s
+ label.e <- %s
+ """ % (label_v, label_e)
+ txt += """
+ cols <- rgb%s
+ cola <- rgb%s
+ """ % (cols, cola)
+ txt += """
+ width <- %i
+ height <- %i
+ """ % (self.parametres['width'], self.parametres['height'])
+ if self.parametres['keep_coord'] :
+ txt += """
+ coords <- try(coords, TRUE)
+ if (!is.matrix(coords)) {
+ coords<-NULL
+ }
+ """
+ else :
+ txt += """
+ coords <- NULL
+ """
+ txt += """
+ alpha <- %i/100
+ """ % self.parametres['alpha']
+ txt += """
+ alpha <- %i/100
+ """ % self.parametres['alpha']
+#############################################
+ if self.parametres.get('bystar',False) :
+ txt += """
+ et <- list()
+ """
+ for i,et in enumerate(self.tableau.etline) :
+ txt+= """
+ et[[%i]] <- c(%s)
+ """ % (i+1, ','.join(et[1:]))
+ txt+= """
+ unetoile <- c('%s')
+ """ % ("','".join([val[0] for val in self.tableau.etline]))
+ txt += """
+ fsum <- NULL
+ rs <- rowSums(dm)
+ for (i in 1:length(unetoile)) {
+ print(unetoile[i])
+ tosum <- et[[i]]
+ if (length(tosum) > 1) {
+ fsum <- cbind(fsum, colSums(dm[tosum,]))
+ } else {
+ fsum <- cbind(fsum, dm[tosum,])
+ }
+ }
+ source("%s")
+ lex <- AsLexico2(fsum, chip=TRUE)
+ dcol <- apply(lex[[4]],1,which.max)
+ toblack <- apply(lex[[4]],1,max)
+ gcol <- rainbow(length(unetoile))
+ #gcol[2] <- 'orange'
+ vertex.label.color <- gcol[dcol]
+ vertex.label.color[which(toblack <= 3.84)] <- 'black'
+ leg <- list(unetoile=unetoile, gcol=gcol)
+ cols <- vertex.label.color
+ chivertex.size <- norm.vec(toblack, vcexminmax[1], vcexminmax[2])
+
+ """ % (self.parent.RscriptsPath['chdfunct'])
+ else :
+ txt += """
+ vertex.label.color <- 'black'
+ chivertex.size <- 1
+ leg<-NULL
+ """
+#############################################
+
+# txt += """
+# eff <- colSums(dm)
+# g.ori <- graph.adjacency(mat, mode='lower', weighted = TRUE)
+# w.ori <- E(g.ori)$weight
+# if (max.tree) {
+# if (method == 'cooc') {
+# E(g.ori)$weight <- 1 / w.ori
+# } else {
+# E(g.ori)$weigth <- 1 - w.ori
+# }
+# g.max <- minimum.spanning.tree(g.ori)
+# if (method == 'cooc') {
+# E(g.max)$weight <- 1 / E(g.max)$weight
+# } else {
+# E(g.max)$weight <- 1 - E(g.max)$weight
+# }
+# g.toplot <- g.max
+# } else {
+# g.toplot <- g.ori
+# }
+# """
+ txt += """
+ eff <- colSums(dm)
+ x <- list(mat = mat, eff = eff)
+ graph.simi <- do.simi(x, method='%s', seuil = seuil, p.type = '%s', layout.type = '%s', max.tree = %s, coeff.vertex=%s, coeff.edge = %s, minmaxeff = minmaxeff, vcexminmax = vcexminmax, cex = cex, coords = coords)
+ """ % (method, type, layout, arbremax, coeff_tv, coeff_te)
+
+ if self.parametres.get('bystar',False) :
+ if self.parametres.get('cexfromchi', False) :
+ txt+="""
+ label.cex<-chivertex.size
+ """
+ else :
+ txt+="""
+ label.cex <- NULL
+ """
+ if self.parametres.get('sfromchi', False) :
+ txt += """
+ vertex.size <- norm.vec(toblack, minmaxeff[1], minmaxeff[2])
+ """
+ else :
+ txt += """
+ vertex.size <- NULL
+ """
+ else :
+ #FIXME
+ tmpchi = False
+ if tmpchi :
+ txt += """
+ lchi <- read.table("%s")
+ lchi <- lchi[,1]
+ """ % ffr(tmpchi)
+ if 'selected_col' in dir(self.tableau) :
+ txt += """
+ lchi <- lchi[c%s+1]
+ """ % datas
+ if tmpchi and self.parametres.get('cexfromchi', False) :
+ txt += """
+ label.cex <- norm.vec(lchi, vcexminmax[1], vcexminmax[2])
+ """
+ else :
+ txt += """
+ if (is.null(vcexminmax[1])) {
+ label.cex <- NULL
+ } else {
+ label.cex <- graph.simi$label.cex
+ }
+ """
+ if tmpchi and self.parametres.get('sfromchi', False) :
+ txt += """
+ vertex.size <- norm.vec(lchi, minmaxeff[1], minmaxeff[2])
+ """
+ else :
+ txt += """
+ if (is.null(minmaxeff[1])) {
+ vertex.size <- NULL
+ } else {
+ vertex.size <- graph.simi$eff
+ }
+ """
+ txt += """ vertex.size <- NULL """
+ txt += """
+ coords <- plot.simi(graph.simi, p.type='%s',filename="%s", vertex.label = label.v, edge.label = label.e, vertex.col = cols, vertex.label.color = vertex.label.color, vertex.label.cex=label.cex, vertex.size = vertex.size, edge.col = cola, leg=leg, width = width, height = height, alpha = alpha, movie = film)
+ save.image(file="%s")
+ """ % (type, self.filename, self.pathout['RData'])
+
+ self.add(txt)
+ self.write()
+
del ntab
ntab2.sort(reverse = True)
ntab2 = ntab2[:limite]
+ nuces = [val[1] for val in ntab2]
dlg.Update(3, u'concordancier...')
- ucestxt = [corpus.ucis_paras_uces[val[1][0]][val[1][1]][val[1][2]] for val in ntab2]
- ucestxt = [corpus.make_concord(self.la, ' '.join(uce), 'red') for uce in ucestxt]
+ #ucestxt = [corpus.ucis_paras_uces[val[1][0]][val[1][1]][val[1][2]] for val in ntab2]
+ ucestxt1 = [row for row in corpus.getconcorde(nuces)]
+ ucestxt = []
+ ucis_txt = []
+ for uce in ucestxt1 :
+ ucetxt = ' '+uce[1]+' '
+ ucis_txt.append(' '.join(corpus.ucis[corpus.getucefromid(uce[0]).uci].etoiles) + '<br>')
+ for lem in self.la :
+ listmot = corpus.getlems()[lem].formes
+ for id in listmot :
+ forme = corpus.getforme(id).forme
+ ucetxt = ucetxt.replace(' '+forme+' ', '<font color=red> ' + forme + ' </font>')
+ ucestxt.append(ucetxt)
+ #ucestxt = [corpus.make_concord(self.la, ' '.join(uce), 'red') for uce in ucestxt]
dlg.Update(4, u'texte...')
- ucis_txt = [' '.join(corpus.ucis[val[1][0]][0]) for val in ntab2]
+ #ucis_txt = [' '.join(corpus.ucis[val[1][0]][0]) for val in ntab2]
win = message(self, -1, u"UCE caractéristiques - Classe %i" % self.cl, size=(600, 500), style=wx.DEFAULT_FRAME_STYLE)
win.html = '<html>\n' + '<br><br>'.join(['<br>'.join([ucis_txt[i], 'score : ' + str(ntab2[i][0]), ucestxt[i]]) for i in range(0,len(ucestxt))]) + '\n</html>'
win.HtmlPage.SetPage(win.html)
}
-#from textometrieR
-#http://txm.sourceforge.net/doc/R/textometrieR-package.html
-#Sylvain Loiseau
-specificites.probabilities <- function (lexicaltable, types = NULL, parts = NULL)
-{
- rowMargin <- rowSums(lexicaltable)
- colMargin <- colSums(lexicaltable)
- F <- sum(lexicaltable)
- if (!is.null(types)) {
- if (is.character(types)) {
- if (is.null(rownames(lexicaltable)))
- stop("The lexical table has no row names and the \"types\" argument is a character vector.")
- if (!all(types %in% rownames(lexicaltable)))
- stop(paste("Some requested types are not known in the lexical table: ",
- paste(types[!(types %in% rownames(lexicaltable))],
- collapse = " ")))
- }
- else {
- if (any(types < 1))
- stop("The row index must be greater than 0.")
- if (max(types) > nrow(lexicaltable))
- stop("Row index must be smaller than the number of rows.")
- }
- lexicaltable <- lexicaltable[types, , drop = FALSE]
- rowMargin <- rowMargin[types]
- }
- if (!is.null(parts)) {
- if (is.character(parts)) {
- if (is.null(colnames(lexicaltable)))
- stop("The lexical table has no col names and the \"parts\" argument is a character vector.")
- if (!all(parts %in% colnames(lexicaltable)))
- stop(paste("Some requested parts are not known in the lexical table: ",
- paste(parts[!(parts %in% colnames(lexicaltable))],
- collapse = " ")))
- }
- else {
- if (max(parts) > ncol(lexicaltable))
- stop("Column index must be smaller than the number of cols.")
- if (any(parts < 1))
- stop("The col index must be greater than 0.")
- }
- lexicaltable <- lexicaltable[, parts, drop = FALSE]
- colMargin <- colMargin[parts]
- }
- if (nrow(lexicaltable) == 0 | ncol(lexicaltable) == 0) {
- stop("The lexical table must contains at least one row and one column.")
- }
- specif <- matrix(0, nrow = nrow(lexicaltable), ncol = ncol(lexicaltable))
- for (i in 1:ncol(lexicaltable)) {
- whiteDrawn <- lexicaltable[, i]
- white <- rowMargin
- black <- F - white
- drawn <- colMargin[i]
- independance <- (white * drawn)/F
- specif_negative <- whiteDrawn < independance
- specif_positive <- whiteDrawn >= independance
- specif[specif_negative, i] <- phyper(whiteDrawn[specif_negative],
- white[specif_negative], black[specif_negative], drawn)
- specif[specif_positive, i] <- phyper(whiteDrawn[specif_positive] -
- 1, white[specif_positive], black[specif_positive],
- drawn)
- }
- dimnames(specif) <- dimnames(lexicaltable)
- return(specif)
-}
-
-#from textometrieR
-#http://txm.sourceforge.net/doc/R/textometrieR-package.html
-#Sylvain Loiseau
-specificites <- function (lexicaltable, types = NULL, parts = NULL)
-{
- spe <- specificites.probabilities(lexicaltable, types, parts)
- spelog <- matrix(0, nrow = nrow(spe), ncol = ncol(spe))
- spelog[spe < 0.5] <- log10(spe[spe < 0.5])
- spelog[spe > 0.5] <- abs(log10(1 - spe[spe > 0.5]))
- spelog[spe == 0.5] <- 0
- spelog[is.infinite(spe)] <- 0
- spelog <- round(spelog, digits = 4)
- rownames(spelog) <- rownames(spe)
- colnames(spelog) <- colnames(spe)
- return(spelog)
-}
+##from textometrieR
+##http://txm.sourceforge.net/doc/R/textometrieR-package.html
+##Sylvain Loiseau
+#specificites.probabilities <- function (lexicaltable, types = NULL, parts = NULL)
+#{
+# rowMargin <- rowSums(lexicaltable)
+# colMargin <- colSums(lexicaltable)
+# F <- sum(lexicaltable)
+# if (!is.null(types)) {
+# if (is.character(types)) {
+# if (is.null(rownames(lexicaltable)))
+# stop("The lexical table has no row names and the \"types\" argument is a character vector.")
+# if (!all(types %in% rownames(lexicaltable)))
+# stop(paste("Some requested types are not known in the lexical table: ",
+# paste(types[!(types %in% rownames(lexicaltable))],
+# collapse = " ")))
+# }
+# else {
+# if (any(types < 1))
+# stop("The row index must be greater than 0.")
+# if (max(types) > nrow(lexicaltable))
+# stop("Row index must be smaller than the number of rows.")
+# }
+# lexicaltable <- lexicaltable[types, , drop = FALSE]
+# rowMargin <- rowMargin[types]
+# }
+# if (!is.null(parts)) {
+# if (is.character(parts)) {
+# if (is.null(colnames(lexicaltable)))
+# stop("The lexical table has no col names and the \"parts\" argument is a character vector.")
+# if (!all(parts %in% colnames(lexicaltable)))
+# stop(paste("Some requested parts are not known in the lexical table: ",
+# paste(parts[!(parts %in% colnames(lexicaltable))],
+# collapse = " ")))
+# }
+# else {
+# if (max(parts) > ncol(lexicaltable))
+# stop("Column index must be smaller than the number of cols.")
+# if (any(parts < 1))
+# stop("The col index must be greater than 0.")
+# }
+# lexicaltable <- lexicaltable[, parts, drop = FALSE]
+# colMargin <- colMargin[parts]
+# }
+# if (nrow(lexicaltable) == 0 | ncol(lexicaltable) == 0) {
+# stop("The lexical table must contains at least one row and one column.")
+# }
+# specif <- matrix(0, nrow = nrow(lexicaltable), ncol = ncol(lexicaltable))
+# for (i in 1:ncol(lexicaltable)) {
+# whiteDrawn <- lexicaltable[, i]
+# white <- rowMargin
+# black <- F - white
+# drawn <- colMargin[i]
+# independance <- (white * drawn)/F
+# specif_negative <- whiteDrawn < independance
+# specif_positive <- whiteDrawn >= independance
+# specif[specif_negative, i] <- phyper(whiteDrawn[specif_negative],
+# white[specif_negative], black[specif_negative], drawn)
+# specif[specif_positive, i] <- phyper(whiteDrawn[specif_positive] -
+# 1, white[specif_positive], black[specif_positive],
+# drawn)
+# }
+# dimnames(specif) <- dimnames(lexicaltable)
+# return(specif)
+#}
+#
+##from textometrieR
+##http://txm.sourceforge.net/doc/R/textometrieR-package.html
+##Sylvain Loiseau
+#specificites <- function (lexicaltable, types = NULL, parts = NULL)
+#{
+# spe <- specificites.probabilities(lexicaltable, types, parts)
+# spelog <- matrix(0, nrow = nrow(spe), ncol = ncol(spe))
+# spelog[spe < 0.5] <- log10(spe[spe < 0.5])
+# spelog[spe > 0.5] <- abs(log10(1 - spe[spe > 0.5]))
+# spelog[spe == 0.5] <- 0
+# spelog[is.infinite(spe)] <- 0
+# spelog <- round(spelog, digits = 4)
+# rownames(spelog) <- rownames(spe)
+# colnames(spelog) <- colnames(spe)
+# return(spelog)
+#}
make.spec.hypergeo <- function(mat) {
- #library(textometrieR)
+ library(textometrieR)
spec <- specificites(mat)
sumcol<-colSums(mat)
eff_relatif<-round(t(apply(mat,1,function(x) {(x/t(as.matrix(sumcol))*1000)})),2)
self.parametres['uuid'] = str(uuid4())
self.parametres['name'] = os.path.split(self.parametres['pathout'])[1]
self.parametres['type'] = parametres['type']
+ self.parametres['encoding'] = self.ira.syscoding
self.t1 = time()
#if self.corpus.lems is None :
self.corpus.make_lems(lem = self.parametres['lem'])
def make_config(self, config) :
if config is not None :
- if isinstance(config, basestring) :
- return self.readconfig(config)
+ if not self.dlg :
+ return config
else :
return self.preferences()
def preferences(self) :
return {}
- def doR(self):
- pass
-
def printRscript(self) :
pass
'preferences' : os.path.join(ConfigPath, 'iramuteq.cfg'),
'pam' : os.path.join(ConfigPath, 'pam.cfg'),
'history' : os.path.join(ConfigPath, 'history.db'),
+ 'corpus' : os.path.join(ConfigPath, 'corpus.cfg'),
}
return DictConfigPath
'german_exp' : os.path.join(BasePath, 'expression_de.txt'),
'italian' : os.path.join(BasePath, 'lexique_it.txt'),
'italian_exp' : os.path.join(BasePath, 'expression_it.txt'),
+ 'swedish' : os.path.join(BasePath, 'lexique_sw.txt'),
+ 'swedish_exp' : os.path.join(BasePath, 'expression_sw.txt'),
+ 'portuguese' : os.path.join(BasePath, 'lexique_pt.txt'),
+ 'portuguese_exp': os.path.join(BasePath, 'expression_pt.txt'),
}
return DictPath
'corpus' : os.path.join(pathout, 'corpus.db'),
}
return d
+
+simipath = {'mat01' : 'mat01.csv',
+ 'matsimi' : 'matsimi.csv',
+ 'eff' : 'eff.csv',
+ 'RData' : 'RData.RData',
+ 'liste_graph' :'liste_graph.txt',
+ 'ira' : 'Analyse.ira',
+ 'film' : '',
+ 'db' : 'analyse.db',
+ 'corpus' : 'corpus.db',
+ }
+
encoding = utf8
lang = french
douce = 1
-ucemethod = 0
+ucemethod = 1
ucesize = 35
keep_ponct = 0
tolist = 0
ucinb =
ucenb =
occurrences =
-keep_caract = ^a-zA-Z0-9àÃ\80âÃ\82äÃ\84áÃ\81éÃ\89èÃ\88êÃ\8aëÃ\8bìÃ\8cîÃ\8eïÃ\8fòÃ\92ôÃ\94öÃ\96ùÃ\99ûÃ\9büÃ\9cçÇßœŒ’ñ.:,;!?*'_-
+keep_caract = ^a-zA-Z0-9àÃ\80âÃ\82äÃ\84áÃ\81Ã¥Ã\85ãéÃ\89èÃ\88êÃ\8aëÃ\8bìÃ\8cîÃ\8eïÃ\8fÃÂÃ\8dòÃ\92ôÃ\94öÃ\96õÃ\95øÃ\98ùÃ\99ûÃ\9büÃ\9cúÃ\9açÇßœŒ’ñ.:,;!?*'_-
lower = 1
ucimark = 0
expressions = 1
for line in f :
ffin.write(line)
os.remove(outfile + '~')
+
+ def make_table_with_classe(self, uces, list_act) :
+ table_uce = [[0 for val in list_act] for line in range(0,len(uces))]
+ uces = dict([[uce, i] for i, uce in enumerate(uces)])
+ for i, lem in enumerate(list_act) :
+ lemuces = list(set(self.getlemuces(lem)).intersection(uces))
+ for uce in lemuces :
+ table_uce[uces[uce]][i] = 1
+ table_uce.insert(0, list_act)
+ return table_uce
def parse_active(self, gramact, gramsup = None) :
log.info('parse actives')
def make_actives_limit(self, limit) :
if self.idformes is None :
self.make_idformes()
- return [lem for lem in self.lems if self.getlemeff(lem) >= limit]
+ return [lem for lem in self.lems if self.getlemeff(lem) >= limit and self.lems[lem].act == 1]
def make_actives_nb(self, nbmax, key) :
log.info('make_actives_nb : %i - %i' % (nbmax,key))
etoiles.update(uci.etoiles[1:] + uci.paras)
return list(etoiles)
+ def make_etoiles_dict(self) :
+ etoiles = [et for uci in self.ucis for et in uci.etoiles[1:]]
+ det = {}
+ for etoile in etoiles :
+ et = etoile.split('_')
+ if et[0] in det :
+ try :
+ if et[1] in det[et[0]] :
+ det[et[0]][et[1]] += 1
+ else :
+ det[et[0]][et[1]] = 1
+ except IndexError :
+ det[et[0]] += 1
+ else :
+ try :
+ det[et[0]] = {et[1] :1}
+ except IndexError :
+ det[et[0]] = 1
+ print det
+
+
def make_and_write_profile_et(self, ucecl, fileout) :
log.info('etoiles/classes')
etoiles = self.make_etoiles()
f.write(txt)
+class MakeUciStat :
+ def __init__(self, corpus) :
+ ucinb = corpus.getucinb()
+ ucisize = corpus.getucisize()
+ ucimean = float(sum(ucisize))/float(ucinb)
+ detoile = corpus.make_etoiles_dict()
+
+
class Uci :
def __init__(self, iduci, line, paraset = None) :
self.ident = iduci
try :
indice = chaineTravail.index(u'$')
trouve = True
- iDecoupe = indice
+ iDecoupe = indice - 1
except ValueError :
pass
if not trouve:
iDecoupe = nbCar
else :
if (float(dsep[' ']) / distance) > (float(meilleur[1]) / meilleureDistance) :
- meilleur[0] = caractere
+ meilleur[0] = ' '
meilleur[1] = dsep[' ']
meilleur[2] = nbCar
trouve = True
nbCar = nbCar - 1
# si on a trouvé
if trouve:
+ #if meilleur[0] != ' ' :
+ # fin = chaine[iDecoupe + 1:]
+ # retour = chaineTravail[:iDecoupe]
+ #else :
fin = chaine[iDecoupe + 1:]
- retour = chaineTravail[:iDecoupe]
+ retour = chaineTravail[:iDecoupe + 1]
return len(retour) > 0, retour, fin
# si on a rien trouvé
return False, chaine, ''
self.backup_uce()
def treattxt(self, txt, iduce, idpara, iduci) :
- txt = ' '.join(txt)
- #log.debug('ATTENTION CHINOIS -> charactères')
- #clean_chinois = [self.firstclean, self.dolower, self.make_expression, self.doapos, self.dotiret]
- #log.debug('ATTENTION CHINOIS -> list(text)')
- #txt = ' '.join(list(txt))
- txt = self.make_cleans(txt)#, clean_chinois)
- ucetxt = self.make_uces(txt, self.corpus.parametres['douce'])
+ if self.corpus.parametres.get('ucemethod', 0) == 2 and self.corpus.parametres['douce']:
+ txt = 'laphrasepoursplitter'.join(txt)
+ txt = self.make_cleans(txt)
+ txt = ' '.join([val for val in txt.split() if val not in self.ponctuation_espace])
+ ucetxt = txt.split('laphrasepoursplitter')
+ else :
+ txt = ' '.join(txt)
+ txt = self.make_cleans(txt)
+ ucetxt = self.make_uces(txt, self.corpus.parametres['douce'])
if self.corpus.ucis[-1].paras == [] :
idpara += 1
for uce in ucetxt :
import locale
import os
import sys
+#from listlex import *
from KeyFrame import AlcOptFrame
#---------------------------------------------------------------------------
provider = wx.SimpleHelpProvider()
kwds["title"] = u'Encodage'
wx.Dialog.__init__(self, *args, **kwds)
self.label_dict = wx.StaticText(self, -1, u"Langue")
- langues_n = [u'français', u'english', u'german (expérimentale)', u'italian (expérimentale)']
- self.langues = [u'french', u'english', u'german', 'italian']
+ langues_n = [u'français', u'english', u'german (expérimentale)', u'italian (expérimentale)', u'swedish (exp.)', u'portuguese (exp.)']
+ self.langues = [u'french', u'english', u'german', 'italian', 'swedish', 'portuguese']
self.choice_dict = wx.Choice(self, -1, choices = langues_n)
self.encodages = encodages
self.text = wx.StaticText(self, -1, u"Encodage du corpus : ")
def __set_properties(self):
self.choice1.SetSelection(self.paramsimi['coeff'])
self.choice2.SetSelection(self.paramsimi['layout'])
- self.choice3.SetSelection(self.paramsimi['type'])
+ self.choice3.SetSelection(self.paramsimi['type_graph'])
if self.paramsimi['type'] != 2 :
self.film.Enable(False)
self.slider_sphere.Enable(False)
def __init__( self, parent, parametres ):
wx.Dialog.__init__ ( self, parent, id = wx.ID_ANY, title = u"Préférences", pos = wx.DefaultPosition, size = wx.DefaultSize, style = wx.DEFAULT_DIALOG_STYLE )
self.parent = parent
- langues_n = [u'français', u'english', u'german (expérimentale)', u'italian (expérimentale)']
- self.langues = [u'french', u'english', u'german', 'italian']
+ langues_n = [u'français', u'english', u'german (expérimentale)', u'italian (expérimentale)', u'swedish (exp.)', u'portuguese (exp.)']
+ self.langues = [u'french', u'english', u'german', 'italian', 'swedish', u'portuguese']
self.encodages = encodages
ucimark = [u'****', u'0000']
- ucemethod = [u'charactères', u'occurrences']
+ ucemethod = [u'charactères', u'occurrences', u'paragraphe']
self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize )
else :
parametres[val] = 0
return parametres
-
amérique amérique nom f s 0.34 1.08 0.34 1.08
améthyste améthyste nom f s 0.22 0.81 0.2 0.27
améthystes améthyste nom f p 0.22 0.81 0.02 0.54
-an an nom m s 866.58 685.81 148.41 76.76
+an an nom_sup m s 866.58 685.81 148.41 76.76
ana ana nom m 4.91 0.14 4.91 0.14
anabaptiste anabaptiste adj s 0.01 0.41 0.01 0.34
anabaptistes anabaptiste nom p 0 0.34 0 0.34
anosmie anosmie nom f s 0.07 0 0.07 0
anosmique anosmique adj m s 0.01 0.07 0.01 0.07
anoxie anoxie nom f s 0.09 0 0.09 0
-ans an nom m p 866.58 685.81 718.17 609.05
+ans an nom_sup m p 866.58 685.81 718.17 609.05
anse anse nom f s 0.47 6.08 0.33 4.86
anses anse nom f p 0.47 6.08 0.14 1.22
ansée ansé adj f s 0.02 0.07 0.01 0
from subprocess import Popen, call, PIPE
import thread
import os
+import ast
import sys
import csv
import platform
for option in self.conf.options(section) :
if self.conf.get(section, option).isdigit() :
parametres[option] = int(self.conf.get(section, option))
+ elif self.conf.get(section, option) == 'False' :
+ parametres[option] = False
+ elif self.conf.get(section, option) == 'True' :
+ parametres[option] = True
+ elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
+ parametres[option] = ast.literal_eval(self.conf.get(section, option))
else :
parametres[option] = self.conf.get(section, option)
if 'type' not in parametres :
self.conf.set(section, option, `parametres[i][option]`)
elif isinstance(parametres[i][option], basestring) :
self.conf.set(section, option, parametres[i][option].encode('utf8'))
+ elif isinstance(parametres[i][option], wx.Colour) :
+ self.conf.set(section, option, str(parametres[i][option]))
+ else :
+ self.conf.set(section, option, `parametres[i][option]`)
if outfile is None :
outfile = self.configfile
print outfile
import sys
from copy import copy
import dialog
+from listlex import *
+
def OnOpen(self, type):
if type == "Data":
else :
return None
+class SelectColumn :
+ def __init__(self, parent, dictcol, actives, pathout, selected = None) :
+ self.ira = parent
+ dial = dialog.SelectColDial(self.ira)
+ listcol = ListForSpec(dial, self, dictcol, ['forme', 'eff'])
+ dial.bSizer2.Add( listcol, 2, wx.ALL|wx.EXPAND, 5 )
+ dial.m_sdbSizer2.AddButton( dial.m_sdbSizer2OK )
+ dial.m_sdbSizer2.Realize()
+ dial.bSizer2.Add( dial.m_sdbSizer2, 0, wx.EXPAND, 5 )
+ dial.Layout()
+ if selected is None :
+ for row in xrange(listcol.list.GetItemCount()):
+ listcol.list.Select(row)
+ else :
+ orderlex = dict([[listcol.getColumnText(i,0),i] for i in range(0,listcol.list.GetItemCount())])
+ for row in selected :
+ listcol.list.Select(orderlex[actives[row]])
+ dial.CenterOnParent()
+ val = dial.ShowModal()
+ last = listcol.list.GetFirstSelected()
+ lastl = [listcol.list.GetFirstSelected()]
+ indexes = [listcol.getColumnText(listcol.list.GetFirstSelected(),0)]
+ while listcol.list.GetNextSelected(last) != -1:
+ last = listcol.list.GetNextSelected(last)
+ lastl.append(last)
+ indexes.append(listcol.getColumnText(last,0))
+ dial.Destroy()
+ column = [actives.index(val) for val in indexes]
+ column.sort()
+ with open(pathout, 'w') as f :
+ f.write('\n'.join([`val` for val in column]))
+
+class PrepSimi :
+ def __init__(self, parent, parametres, indices_simi) :
+ self.parametres = parametres
+ self.dial = dialog.PrefSimi(parent, -1, self.parametres, indices_simi)
+ self.dial.CenterOnParent()
+ self.val = self.dial.ShowModal()
+ if self.val == wx.ID_OK :
+ self.make_param()
+
+ def make_param(self) :
+ self.select = self.dial.check_colch.GetValue()
+ if self.parametres.get('first', True) :
+ keep_coord = False
+ else :
+ keep_coord = self.dial.check_coord.GetValue()
+ param = {'coeff' : self.dial.choice1.GetSelection(),
+ 'layout' : self.dial.choice2.GetSelection(),
+ 'type_graph' : self.dial.choice3.GetSelection(),
+ 'arbremax' : self.dial.check1.GetValue(),
+ 'coeff_tv' : self.dial.check_s_size.GetValue(),
+ 'coeff_tv_nb' : self.dial.spin_tv.GetValue(),
+ 'tvprop' : self.dial.check2.GetValue(),
+ 'tvmin' : self.dial.spin_tvmin.GetValue(),
+ 'tvmax' : self.dial.spin_tvmax.GetValue(),
+ 'coeff_te' : self.dial.check3.GetValue(),
+ 'coeff_temin' : self.dial.spin_temin.GetValue(),
+ 'coeff_temax' : self.dial.spin_temax.GetValue(),
+ 'label_e' : self.dial.check_elab.GetValue(),
+ 'label_v' : self.dial.check_vlab.GetValue(),
+ 'vcex' : self.dial.check_vcex.GetValue(),
+ 'vcexmin' : self.dial.spin_vcexmin.GetValue(),
+ 'vcexmax' : self.dial.spin_vcexmax.GetValue(),
+ 'cex' : self.dial.spin_cex.GetValue(),
+ 'seuil_ok' : self.dial.check_seuil.GetValue(),
+ 'seuil' : self.dial.spin_seuil.GetValue(),
+ 'cols' : self.dial.cols.GetColour(),
+ 'cola' : self.dial.cola.GetColour(),
+ 'width' : self.dial.spin_width.GetValue(),
+ 'height' : self.dial.spin_height.GetValue(),
+ 'first' : False,
+ 'keep_coord' : keep_coord,
+ 'alpha' : self.dial.slider_sphere.GetValue(),
+ 'film' : self.dial.film.GetValue()
+ }
+ if 'cexfromchi' in self.parametres :
+ param['cexfromchi'] = self.dial.checkit.GetValue()
+ if 'sfromchi' in self.parametres :
+ param['sfromchi'] = self.dial.checki.GetValue()
+ if 'vlabcolor' in self.parametres :
+ param['vlabcolor'] = self.parametres['vlabcolor']
+ if 'check_bystar' in dir(self.dial) :
+ param['bystar'] = self.dial.check_bystar.GetValue()
+ param['stars'] = self.parametres['stars']
+ self.parametres.update(param)
+
parser = OptionParser()
parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False)
- parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default='False')
+ parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False)
- parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration", metavar="CONF", default=False)
+ parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration", metavar="CONF", default=None)
parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding())
parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french')
parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False)
(options, args) = parser.parse_args()
print args
print options
- if options.configfile :
- self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile)
+ options.type_analyse
+ if options.configfile is not None:
+ config = DoConf(os.path.abspath(options.configfile)).getoptions()
+ elif options.type_analyse :
+ config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
+ #self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile)
self.TEMPDIR = tempfile.mkdtemp('iramuteq')
self.RscriptsPath = ConstructRscriptsPath(AppliPath)
self.PathPath = ConfigParser()
corpus.conn_all()
corpus.make_lems()
corpus.parse_active(gramact, gramsup)
- log.warning('ATTENTION gethapaxuces')
- corpus.gethapaxuces()
+# log.warning('ATTENTION gethapaxuces')
+# MakeUciStat(corpus)
+# qfqsdf
+ #corpus.gethapaxuces()
+ #ucisize = corpus.getucisize()
+ #ucisize = [`val` for val in ucisize]
+ #uciet = [uci.etoiles[1] for uci in corpus.ucis]
+ #res = zip(uciet, ucisize)
+ #with open('ucisize.csv', 'w') as f :
+ # f.write('\n'.join(['\t'.join(val) for val in res]))
# self.content = f.read()
#self.content = self.content.replace('\r','')
if options.type_analyse == 'alceste' :
#zerzre
#corpus.read_corpus()
#corpus.parse_active(gramact, gramsup)
- Alceste(self, corpus)
+ config['type'] = 'alceste'
+ Alceste(self, corpus, parametres = config)
# self.Text = AnalyseAlceste(self, cmd = True, big = True)
#self.Text = AnalyseAlceste(self, cmd = True)
elif options.type_analyse == 'pam' :
import wx
#import wx.lib.agw.aui as aui
import agw.aui as aui
-from chemins import ConstructPathOut, ChdTxtPathOut, FFF, ffr, PathOut, StatTxtPathOut
+from chemins import ConstructPathOut, ChdTxtPathOut, FFF, ffr, PathOut, StatTxtPathOut, simipath
from ConfigParser import ConfigParser
-from functions import ReadProfileAsDico, GetTxtProfile, read_list_file, ReadList, exec_rcode, print_liste, BugReport, DoConf
+from functions import ReadProfileAsDico, GetTxtProfile, read_list_file, ReadList, exec_rcode, print_liste, BugReport, DoConf, indices_simi
from ProfList import *
from guiparam3d import param3d, simi3d
-from PrintRScript import write_afc_graph, print_simi3d
+from PrintRScript import write_afc_graph, print_simi3d, PrintSimiScript
from profile_segment import *
from functions import ReadList
from listlex import *
from Liste import *
from search_tools import SearchFrame
from dialog import PrefGraph, PrefExport, PrefSimpleFile, PrefDendro
+from guifunct import SelectColumn, PrepSimi
from corpusNG import Corpus
import datetime
import sys
import shutil
import webbrowser
import codecs
+import logging
+
+log = logging.getLogger('iramuteq.layout')
+
class GraphPanelAfc(wx.Panel):
def __init__(self, parent, dico, list_graph, clnb, itempath = 'liste_graph_afc', coding = sys.getdefaultencoding()):
text.Wrap( -1 )
self.fgSizer5.Add( text, 0, wx.ALL, 5 )
+class GraphPanelSimi(wx.Panel):
+ def __init__(self, parent, dico, list_graph):
+ wx.Panel.__init__(self,parent)
+ self.afcnb = 1
+ self.Dict = dico
+ self.dirout = os.path.dirname(self.Dict['ira'])
+ self.parent = self.GetParent()#parent
+ self.SetFont(wx.Font(10, wx.DEFAULT, wx.NORMAL, wx.NORMAL, 0, "courier"))
+ self.labels = []
+ self.listimg = []
+ self.tabsimi = self.parent.GetParent()
+ self.ira = self.tabsimi.GetParent()
+ self.panel_1 = wx.ScrolledWindow(self, -1, style=wx.TAB_TRAVERSAL)
+ afc_img = wx.Image(os.path.join(self.ira.images_path,'button_simi.jpg'), wx.BITMAP_TYPE_ANY).ConvertToBitmap()
+ self.butafc = wx.BitmapButton(self, -1, afc_img)
+ export_img = wx.Image(os.path.join(self.ira.images_path,'button_export.jpg'), wx.BITMAP_TYPE_ANY).ConvertToBitmap()
+ self.butexport = wx.BitmapButton(self, -1, export_img)
+
+ for i in range(0,len(list_graph)):
+ if os.path.exists(os.path.join(self.dirout,list_graph[i][0])) and list_graph[i][0] != '' :
+ self.listimg.append(wx.StaticBitmap(self.panel_1, -1, wx.Bitmap(os.path.join(self.dirout,list_graph[i][0]), wx.BITMAP_TYPE_ANY)))
+ self.labels.append(wx.StaticText(self.panel_1, -1, list_graph[i][1]))
+
+ self.__set_properties()
+ self.__do_layout()
+
+ def __set_properties(self):
+ self.panel_1.EnableScrolling(True,True)
+ #self.panel_1.SetSize((1000,1000))
+ self.panel_1.SetScrollRate(20, 20)
+
+ def __do_layout(self):
+ self.sizer_1 = wx.BoxSizer(wx.HORIZONTAL)
+ self.sizer_2 = wx.BoxSizer(wx.VERTICAL)
+ self.sizer_3 = wx.BoxSizer(wx.VERTICAL)
+ self.sizer_2.Add(self.butafc, 0, 0, 0)
+ self.sizer_2.Add(self.butexport, 0, 0, 0)
+ for i in range(0, len(self.listimg)):
+ self.sizer_3.Add(self.listimg[i], 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
+ self.sizer_3.Add(self.labels[i], 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
+ self.panel_1.SetSizer(self.sizer_3)
+ self.sizer_1.Add(self.sizer_2, 0, wx.EXPAND, 0)
+ self.sizer_1.Add(self.panel_1, 1, wx.EXPAND, 0)
+ self.SetSizer(self.sizer_1)
+
+class DefaultTextLayout :
+ def __init__(self, ira, corpus, parametres) :
+ self.pathout = PathOut(dirout = parametres['pathout'])
+ self.ira = ira
+ self.parent = ira
+ self.parametres = parametres
+ self.corpus = corpus
+ self.dolayout()
+
+ def dolayout(self) :
+ log.info('no layout yet')
+
+class SimiLayout(DefaultTextLayout) :
+ def dolayout(self) :
+ self.pathout.basefiles(simipath)
+ self.actives = None
+ self.indices = indices_simi
+ if os.path.exists(self.pathout['liste_graph']) :
+ list_graph = read_list_file(self.pathout['liste_graph'])
+ else :
+ list_graph = [['','']]
+ notebook_flags = aui.AUI_NB_DEFAULT_STYLE | aui.AUI_NB_TAB_EXTERNAL_MOVE | aui.AUI_NB_TAB_MOVE | aui.AUI_NB_TAB_FLOAT
+ self.tabsimi = aui.AuiNotebook(self.ira.nb, -1, wx.DefaultPosition)
+ self.tabsimi.SetAGWWindowStyleFlag(notebook_flags)
+ self.tabsimi.SetArtProvider(aui.ChromeTabArt())
+ self.tabsimi.corpus = self.corpus
+ self.tabsimi.parametres = self.parametres
+ self.graphpan = GraphPanelSimi(self.tabsimi, self.pathout, list_graph)
+ self.graphpan.Bind(wx.EVT_BUTTON, self.redosimi, self.graphpan.butafc)
+ self.graphpan.Bind(wx.EVT_BUTTON, self.export, self.graphpan.butexport)
+ self.tabsimi.AddPage(self.graphpan, 'Graph')
+ self.ira.nb.AddPage(self.tabsimi, 'Analyse de graph')
+ self.ira.ShowTab(True)
+ self.ira.nb.SetSelection(self.ira.nb.GetPageCount() - 1)
+
+ def redosimi(self, evt) :
+ with open(self.pathout['selected.csv'],'r') as f :
+ selected = f.read()
+ selected = [int(val) for val in selected.splitlines()]
+ if self.actives is None :
+ with codecs.open(self.pathout['actives.csv'], 'r', self.parametres['encoding']) as f :
+ self.actives = f.read()
+ self.actives = [act for act in self.actives.splitlines()]
+ dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
+ SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], selected = selected)
+ prep = PrepSimi(self.ira, self.parametres, indices_simi)
+ self.parametres = prep.parametres
+ script = PrintSimiScript(self)
+ script.make_script()
+ pid = exec_rcode(self.ira.RPath, script.scriptout, wait = True)
+ check_Rresult(self.ira, pid)
+ if self.parametres['type_graph'] == 1:
+ if os.path.exists(self.pathout['liste_graph']):
+ graph_simi = read_list_file(self.pathout['liste_graph'])
+ graph_simi.append([os.path.basename(script.filename), script.txtgraph])
+ else :
+ graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
+ print_liste(self.pathout['liste_graph'], graph_simi)
+ DoConf().makeoptions([self.parametres['type']], [self.parametres], self.pathout['Analyse.ira'])
+ if self.parametres['type_graph'] == 1:
+ self.graphpan.sizer_3.Add(wx.StaticBitmap(self.graphpan.panel_1, -1, wx.Bitmap(script.filename, wx.BITMAP_TYPE_ANY)), 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
+ self.graphpan.sizer_3.Add(wx.StaticText(self.graphpan.panel_1,-1, script.txtgraph), 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
+ self.graphpan.sizer_3.Fit(self.graphpan.panel_1)
+ self.graphpan.Layout()
+ self.graphpan.panel_1.Scroll(0,self.graphpan.panel_1.GetScrollRange(wx.VERTICAL))
+
+ def export(self, evt) :
+ pass
# def read_result(self) :
# #self.corpus.read_corpus_from_shelves(self.corpus.dictpathout['db'])
# #self.corpus.make_et_table()
#Lisense: GNU/GPL
from chemins import ChdTxtPathOut, StatTxtPathOut, construct_simipath
-from layout import OpenCHDS, dolexlayout, StatLayout, WordCloudLayout, OpenCorpus
+from layout import OpenCHDS, dolexlayout, StatLayout, WordCloudLayout, OpenCorpus, SimiLayout
#from corpus import Corpus
from corpusNG import Corpus, copycorpus
from tableau import Tableau
self.parent.ShowMenu(_("Text analysis"))
OpenCHDS(self.parent, corpus, self.conf, Alceste = True)
elif self.conf['type'] == 'simitxt' :
- self.tableau = Tableau(self.parent, self.conf['ira'])
- self.DictPathOut=construct_simipath(self.conf['pathout'])
- self.tableau.dictpathout = self.DictPathOut
- self.tableau.read_tableau(self.tableau.dictpathout['db'])
- if self.tableau.parametre.get('corpus', False) :
- self.corpus=corpus
+ self.parent.ShowMenu(_("Text analysis"))
+ SimiLayout(self.parent, corpus, self.conf)
+ #self.tableau = Tableau(self.parent, self.conf['ira'])
+ #self.DictPathOut=construct_simipath(self.conf['pathout'])
+ #self.tableau.dictpathout = self.DictPathOut
+ #self.tableau.read_tableau(self.tableau.dictpathout['db'])
+ #if self.tableau.parametre.get('corpus', False) :
+ # self.corpus=corpus
#self.corpus.read_corpus_from_shelves(self.DictPathOut['corpus'])
- self.corpus.parametres['openpath'] = self.conf['pathout']
- self.parent.ShowMenu(_("Text analysis"))
- DoSimi(self.parent, self.conf, isopen = True, filename = self.conf['ira'], gparent = self, openfromprof=False)
+ # self.corpus.parametres['openpath'] = self.conf['pathout']
+ #DoSimi(self.parent, self.conf, isopen = True, filename = self.conf['ira'], gparent = self, openfromprof=False)
- print 'simi'
# try :
# #if self.conf['type'] in ['analyse','lexico','stat','wordcloud'] :
# # self.corpus = Corpus(parent)
else :
self.paramsimi = {'coeff' : 0,
'layout' : 2,
- 'type' : 1,
+ 'type_graph' : 1,
'arbremax' : 1,
'coeff_tv' : 0,
'coeff_tv_nb' : 10,
paramsimi = {'coeff' : self.dial.choice1.GetSelection(),
'layout' : self.dial.choice2.GetSelection(),
- 'type' : self.dial.choice3.GetSelection(),
+ 'type_graph' : self.dial.choice3.GetSelection(),
'arbremax' : self.dial.check1.GetValue(),
'coeff_tv' : self.dial.check_s_size.GetValue(),
'coeff_tv_nb' : self.dial.spin_tv.GetValue(),
if self.paramsimi['layout'] == 4 : layout = 'graphopt'
self.filename=''
- if self.paramsimi['type'] == 0 : type = 'tkplot'
- if self.paramsimi['type'] == 1 :
+ if self.paramsimi['type_graph'] == 0 : type = 'tkplot'
+ if self.paramsimi['type_graph'] == 1 :
graphnb = 1
type = 'nplot'
dirout = os.path.dirname(self.DictPathOut['mat01'])
while os.path.exists(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png')):
graphnb +=1
self.filename = ffr(os.path.join(dirout,'graph_simi_'+str(graphnb)+'.png'))
- if self.paramsimi['type'] == 2 : type = 'rgl'
+ if self.paramsimi['type_graph'] == 2 : type = 'rgl'
if self.paramsimi['arbremax'] :
arbremax = 'TRUE'
#Copyright (c) 2008-2011 Pierre Ratinaud
#Lisense: GNU/GPL
-from chemins import ConstructPathOut, construct_simipath
+from chemins import ffr, simipath
from corpus import Corpus
import os
from analysetxt import AnalyseText
from ConfigParser import RawConfigParser
from guifunct import getPage, getCorpus
from dialog import StatDialog
-from functions import indices_simi, progressbar, treat_var_mod
+from guifunct import SelectColumn, PrepSimi
+from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
from tableau import Tableau
from tabsimi import DoSimi
-from PrintRScript import PrintRScript
+from PrintRScript import PrintSimiScript
import wx
from copy import copy
logger = logging.getLogger('iramuteq.textsimi')
-
-
class SimiTxt(AnalyseText):
def doanalyse(self) :
+ self.parametres['type'] = 'simitxt'
+ self.pathout.basefiles(simipath)
self.indices = indices_simi
self.makesimiparam()
+ #FIXME
+ self.actives = self.corpus.make_actives_limit(3)
+ dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)])
+ SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'])
self.makefiles()
prep = PrepSimi(self.ira, self.parametres, indices_simi)
self.parametres = prep.parametres
- script = PrintSimScript(self)
-
+ script = PrintSimiScript(self)
+ script.make_script()
+ self.doR(script.scriptout)
+ if self.parametres['type_graph'] == 1:
+ if os.path.exists(self.pathout['liste_graph']):
+ graph_simi = read_list_file(self.pathout['liste_graph'])
+ graph_simi.append([os.path.basename(script.filename), script.txtgraph])
+ else :
+ graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
+ print_liste(self.pathout['liste_graph'], graph_simi)
def preferences(self) :
dial = StatDialog(self, self.parent)
def makesimiparam(self) :
self.paramsimi = {'coeff' : 0,
'layout' : 2,
- 'type' : 1,
+ 'type_graph' : 1,
'arbremax' : 1,
'coeff_tv' : 1,
'coeff_tv_nb' : 0,
}
self.parametres.update(self.paramsimi)
- def makefiles(self) :
- self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
+ def makefiles(self, lim=3) :
+ #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
self.parametres['eff_min_forme'] = lim
self.parametres['nbactives'] = len(self.actives)
self.parametres['fromprof'] = True
self.parametres['stars'] = copy(self.listet)
self.parametres['sfromchi'] = False
-class PrepSimi :
- def _init_(self, parent, parametres, indices_simi) :
- self.parametres = parametres
- self.dial = PrefSimi(parent, -1, self.parametres, indices_simi)
- self.dial.CenterOnParent()
- self.val = self.dial.ShowModal()
- if self.val == wx.ID_OK :
- self.make_param()
-
- def make_param(self) :
- self.select = self.dial.check_colch.GetValue()
- param = {'coeff' : self.dial.choice1.GetSelection(),
- 'layout' : self.dial.choice2.GetSelection(),
- 'type' : self.dial.choice3.GetSelection(),
- 'arbremax' : self.dial.check1.GetValue(),
- 'coeff_tv' : self.dial.check_s_size.GetValue(),
- 'coeff_tv_nb' : self.dial.spin_tv.GetValue(),
- 'tvprop' : self.dial.check2.GetValue(),
- 'tvmin' : self.dial.spin_tvmin.GetValue(),
- 'tvmax' : self.dial.spin_tvmax.GetValue(),
- 'coeff_te' : self.dial.check3.GetValue(),
- 'coeff_temin' : self.dial.spin_temin.GetValue(),
- 'coeff_temax' : self.dial.spin_temax.GetValue(),
- 'label_e' : self.dial.check_elab.GetValue(),
- 'label_v' : self.dial.check_vlab.GetValue(),
- 'vcex' : self.dial.check_vcex.GetValue(),
- 'vcexmin' : self.dial.spin_vcexmin.GetValue(),
- 'vcexmax' : self.dial.spin_vcexmax.GetValue(),
- 'cex' : self.dial.spin_cex.GetValue(),
- 'seuil_ok' : self.dial.check_seuil.GetValue(),
- 'seuil' : self.dial.spin_seuil.GetValue(),
- 'cols' : self.dial.cols.GetColour(),
- 'cola' : self.dial.cola.GetColour(),
- 'width' : self.dial.spin_width.GetValue(),
- 'height' : self.dial.spin_height.GetValue(),
- 'first' : False,
- 'keep_coord' : keep_coord,
- 'alpha' : self.dial.slider_sphere.GetValue(),
- 'film' : self.dial.film.GetValue()
- }
- if 'cexfromchi' in self.parametres :
- param['cexfromchi'] = self.dial.checkit.GetValue()
- if 'sfromchi' in self.parametres :
- param['sfromchi'] = self.dial.checki.GetValue()
- if 'vlabcolor' in self.parametres :
- param['vlabcolor'] = self.parametres['vlabcolor']
- if 'check_bystar' in dir(self.dial) :
- param['bystar'] = self.dial.check_bystar.GetValue()
- param['stars'] = self.parametres['stars']
- self.parametres.update(param)
-
-class PrintSimiScript(PrintRScript) :
- def make_script(self) :
- self.load(['igraph', 'proxy', 'Matrix'])
- self.source([self.analyse.parent.RscriptsPath['simi'], self.analyse.parent.RscriptsPath['Rgraph']])
- txt = """
- dm.path <- "%s"
- cn.path <- "%s"
- selected.col <- "%s"
- """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv'])
-
- txt += """
- dm <- dm[, selected.col+1]
- """
- if self.parametres['coeff'] == 0 :
- method = 'cooc'
- txt += """
- method <- 'cooc'
- mat <- make.a(dm)
- """
- else :
- txt += """
- dm <- as.matrix(dm)
- """
- if self.parametres['coeff'] == 1 :
- method = 'prcooc'
- txt += """
- method <- 'Russel'
- mat <- simil(dm, method = 'Russel', diag = TRUE, upper = TRUE, by_rows = FALSE)
- """
- elif self.analyses.indices[self.parametres['coeff']] == 'binomial' :
- method = 'binomial'
- txt += """
- method <- 'binomial'
- mat <- binom.sim(dm)
- """
- else :
- method = self.types[self.paramsimi['coeff']]
- txt += """
- method <-"%s"
- mat <- simil(dm, method = method, diag = TRUE, upper = TRUE, by_rows = FALSE)
- """ % self.analyse.indices[self.parametres['coeff']]
- txt += """
- mat <- as.matrix(stats::as.dist(mat,diag=TRUE,upper=TRUE))
- mat[is.na(mat)] <- 0
- mat[is.infinite(mat)] <- 0
- """
- if self.parametres['layout'] == 0 : layout = 'random'
- if self.parametres['layout'] == 1 : layout = 'circle'
- if self.parametres['layout'] == 2 : layout = 'frutch'
- if self.parametres['layout'] == 3 : layout = 'kawa'
- if self.parametres['layout'] == 4 : layout = 'graphopt'
-
- txt += """
- eff <- colSums(dm)
- g.ori <- graph.adjacency(mat, mode='lower', weighted = TRUE)
- w.ori <- E(g.ori)$weight
- if (max.tree) {
- if (method == 'cooc') {
- E(g.ori)$weight <- 1 / w.ori
- } else {
- E(g.ori)$weigth <- 1 - w.ori
- }
- g.max <- minimum.spanning.tree(g.ori)
- if (method == 'cooc') {
- E(g.max)$weight <- 1 / E(g.max)$weight
- } else {
- E(g.max)$weight <- 1 - E(g.max)$weight
- }
- g.toplot <- g.max
- } else {
- g.toplot <- g.ori
- }
- """
-
- self.tableau = Tableau(self.parent, '')
- self.tableau.listactives = self.actives
- self.tableau.parametre['fromtxt'] = True
- self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
- #print('ATTENTION ETOILES')
- #self.paramsimi['bystar'] = True
- self.tableau.listet = copy(self.listet)
- #self.paramsimi['cexfromchi'] = True
- #self.paramsimi['vlabcolor'] = True
- self.tableau.actives = copy(self.corpus.lems_eff)
- DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
+# self.tableau = Tableau(self.parent, '')
+# self.tableau.listactives = self.actives
+# self.tableau.parametre['fromtxt'] = True
+# self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
+# #print('ATTENTION ETOILES')
+# #self.paramsimi['bystar'] = True
+# self.tableau.listet = copy(self.listet)
+# #self.paramsimi['cexfromchi'] = True
+# #self.paramsimi['vlabcolor'] = True
+# self.tableau.actives = copy(self.corpus.lems_eff)
+# DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
#class SimiTxt :
# def __init__(self, parent, cmd = False, param = None):