From 44b2ba75167b1ed5508e2f110130805bfe43a3bd Mon Sep 17 00:00:00 2001 From: Pierre Date: Sun, 7 Oct 2012 16:58:08 +0200 Subject: [PATCH] wdc --- PrintRScript.py | 19 +++ iramuteq.py | 4 +- textsimi.py | 2 +- textwordcloud.py | 358 +++++++++++++++++++++++++++++++------------------------ tree.py | 5 + 5 files changed, 226 insertions(+), 162 deletions(-) diff --git a/PrintRScript.py b/PrintRScript.py index 7aa30d6..3817335 100644 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -51,6 +51,8 @@ class PrintRScript : class chdtxt(PrintRScript) : pass +def Rcolor(color) : + return str(color).replace(')', ', max=255)') class Alceste2(PrintRScript) : def doscript(self) : @@ -908,3 +910,20 @@ class PrintSimiScript(PrintRScript) : self.add(txt) self.write() +WordCloudRScript(PrintRScript) : + def make_script(self) : + self.Source([self.analyse.parent.RscriptsPath['Rgraph']]) + self.packages(['wordcloud']) + bg_col = Rcolor(self.parametres['col_bg']) + txt_col = Rcolor(self.parametres['col_text']) + txt = """ + act <- read.csv2("%s", header = FALSE, row.names=1, sep='\t') + selected.col <- read.table("%s") + toprint <- act[selected.col + 1,] + open_file_graph("%s", width = %i, height = %i) + par(bg=rgb%s) + wordcloud(row.names(toprint), toprint[,1], scale=c(%f,%f), random.order=FALSE, colors=rgb%s) + dev.off() + """ % (self.parametres['actives_eff.csv'], self.parametres['selected.csv'], self.parametres['graphout'], self.parametres['width'], self.parametres['height'], bg_col, self.parametres['maxcex'], self.parametres['mincex'], txt_col) + self.add(txt) + self.write() diff --git a/iramuteq.py b/iramuteq.py index db25305..01cbcb6 100644 --- a/iramuteq.py +++ b/iramuteq.py @@ -854,10 +854,10 @@ Voulez-vous fermer quand même ?""" #except : # BugReport(self) - def OnWordCloud(self, evt) : + def OnWordCloud(self, evt, corpus = None) : # print 'PLUS DE BUG SUR WORDCLOUD' try : - self.Text = WordCloud(self) + self.Text = WordCloud(self, corpus, parametres = {'type' : 'wordcloud'}, dlg = progressbar(self, 3)) if self.Text.val == wx.ID_OK : PlaySound(self) except : diff --git a/textsimi.py b/textsimi.py index 72222ab..18da495 100644 --- a/textsimi.py +++ b/textsimi.py @@ -4,7 +4,7 @@ #Lisense: GNU/GPL from chemins import ffr, simipath -from corpus import Corpus +#from corpus import Corpus import os from analysetxt import AnalyseText from ConfigParser import RawConfigParser diff --git a/textwordcloud.py b/textwordcloud.py index fce0e17..b56d16d 100644 --- a/textwordcloud.py +++ b/textwordcloud.py @@ -5,178 +5,218 @@ #Lisense: GNU/GPL from chemins import ConstructPathOut, StatTxtPathOut, ffr, FFF -from corpus import Corpus -from guifunct import getPage, getCorpus +#from corpus import Corpus +from analysetxt import AnalyseText +from guifunct import getPage, getCorpus, SelectColumn from ConfigParser import RawConfigParser from functions import sortedby, progressbar, CreateIraFile, exec_rcode, check_Rresult, MessageImage from dialog import StatDialog, PrefWordCloud -from openanalyse import OpenAnalyse +#from openanalyse import OpenAnalyse #from ttparser import * import tempfile from time import sleep import wx import os +#from corpusNG import Corpus +import logging -class WordCloud(): - def __init__(self, parent, cmd = False, lem = True, exp = True): -#################################################################### - self.conf = None - self.parent = parent - self.type = 'alceste' - self.cmd = cmd - self.ConfigPath = parent.ConfigPath - self.DictPath = parent.DictPath - self.KeyConf = RawConfigParser() - self.KeyConf.read(self.ConfigPath['key']) - page = getPage(self.parent) - if page is not None : - self.corpus = getCorpus(page) - if self.corpus is not None : - self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'WordCloud') - self.dictpathout = StatTxtPathOut(self.pathout) - self.val = wx.ID_OK - else : - self.corpus = Corpus(parent) - self.corpus.parametre['encodage'] = parent.corpus_encodage - self.corpus.parametre['lang'] = parent.corpus_lang - self.corpus.parametre['filename'] = parent.filename - self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'WordCloud') - self.dictpathout = StatTxtPathOut(self.pathout) - self.corpus.dictpathout = self.dictpathout - if not self.cmd : - dial = StatDialog(self,parent) - dial.CenterOnParent() - self.val = dial.ShowModal() - else : - self.val = wx.ID_OK - if self.val == wx.ID_OK : - if not self.cmd : - if dial.radio_lem.GetSelection() == 0 : lem = True - else : lem = False - if dial.exp.GetSelection() == 0 : exp = True - else : exp = False - self.make_uce = dial.check_uce.GetValue() - self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue() - self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue() - self.corpus.parametre['eff_min_uce'] = self.corpus.parametre['nbforme_uce'] - else : - lem = True - exp = True - self.make_uce = False - self.corpus.parametre['nbforme_uce'] = None - self.corpus.parametre['eff_min_uce'] = None - self.corpus.parametre['lem'] = lem - self.corpus.parametre['expressions'] = exp - self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] - self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] - self.make_corpus() +logger = logging.getLogger('iramuteq.textwordcloud') - if self.val == wx.ID_OK : - if 'supplementaires' not in dir(self.corpus) : - self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] - if 'typeactive' not in dir(self.corpus) : - self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] - self.make_stats() - def make_corpus(self) : - if not self.cmd : - self.dlg = progressbar(self, 7) - else : - self.dlg = None - self.corpus.content = self.parent.content - ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = self.dlg, cmd = self.cmd, fromtt = False) - del ucis_txt - - if not self.cmd : - self.dlg.Update(5, '%i UCI...' % len(ucis_paras_txt)) - self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = self.make_uce) - del ucis_paras_txt - - if self.corpus.para_coords != [[] for val in self.corpus.para_coords] : - self.corpus.parametre['para'] = True - else : - self.corpus.parametre['para'] = False - self.corpus.make_etoiles(self.corpus.para_coords) - - if not self.cmd : - self.dlg.Update(6, u'Dictionnaires') - uces, orderuces = self.corpus.make_forms_and_uces() - self.corpus.make_lems(self.parent.lexique) - - def make_stats(self): - if not self.cmd : - if not 'dlg' in dir(self) : - self.dlg = progressbar(self, 7) - if not self.corpus.parametre['lem'] : - formes = self.corpus.formes - else : - formes = self.corpus.make_lem_eff() - act = [[forme, formes[forme][0], formes[forme][2]] for forme in formes if formes[forme][2] in - self.corpus.typeactive] - act = sortedby(act, 2, 1) - act = [[val[0], `val[1]`, val[2]] for val in act] - with open(os.path.join(self.pathout, 'actives.csv'), 'w') as f: - f.write('\n'.join([';'.join(line) for line in act])) - #act = [[i, val] for i, val in enumerate(act)] - self.corpus.dictpathout = self.dictpathout - #self.corpus.make_type_tot() - - if not self.cmd : - self.dlg.Destroy() - pref = PrefWordCloud(self.parent) - pref.CenterOnParent() - res = pref.ShowModal() - else : - res = wx.ID_OK +class WordCloud(AnalyseText): + def doanalyse(self) : + self.parametres['type'] = 'wordcloud' + #FIXME + self.actives = self.corpus.make_actives_limit(3) + dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)]) + SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv']) + self.make_option() + self.make_wordcloud() + script = WordCloudRScript(self) + script.make_script() + self.doR(script.scriptout) + + def make_option(self) : + dial = PrefWordCloud(self.ira) + dial.CenterOnParent() + res = dial.ShowModal() if res == wx.ID_OK : - self.dlg = progressbar(self, 3) - width = pref.spin_L.GetValue() - height = pref.spin_H.GetValue() - maxword = pref.spin_maxword.GetValue() - mincex = float(pref.spin_mincex.GetValue())/float(10) - maxcex = float(pref.spin_maxcex.GetValue())/float(10) - col_text = pref.color_text.GetColour() - col_bg = pref.color_bg.GetColour() - col_text = str(col_text).replace(')', ', max=255)') - col_bg = str(col_bg).replace(')', ', max=255)') - outgraph = os.path.join(os.path.dirname(self.dictpathout['zipf']), 'nuage_') + self.parametres['width'] = pref.spin_L.GetValue() + self.parametres['height'] = pref.spin_H.GetValue() + #maxword = pref.spin_maxword.GetValue() + self.parametres['mincex'] = float(pref.spin_mincex.GetValue())/float(10) + self.parametres['maxcex'] = float(pref.spin_maxcex.GetValue())/float(10) + self.parametres['col_text'] = pref.color_text.GetColour() + self.parametres['col_bg'] = pref.color_bg.GetColour() + outgraph = os.path.join(os.path.dirname(self.pathout['zipf.png']), 'nuage_') nb = 1 while os.path.exists(outgraph + str(nb) + '.png') : nb += 1 - outgraph = outgraph + str(nb) + '.png' + self.parametres['outgraph'] = outgraph + str(nb) + '.png' + - txt = """ - source("%s") - library(wordcloud) - act <- read.csv2("%s", header = FALSE, row.names = 1) - maxword <- %i - maxtoprint <- ifelse(nrow(act) > maxword, maxword, nrow(act)) - toprint <- act[1:maxtoprint,] - open_file_graph("%s", width = %i, height = %i) - par(bg=rgb%s) - wordcloud(row.names(toprint), toprint[,1], scale=c(%f,%f), random.order=FALSE, colors=rgb%s) - dev.off() - """ % (self.parent.RscriptsPath['Rgraph'], ffr(os.path.join(self.pathout, 'actives.csv')), maxword, ffr(outgraph), width, height, col_bg, maxcex, mincex, col_text) - tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR) - with open(tmpscript, 'w') as f : - f.write(txt) - pid = exec_rcode(self.parent.RPath, tmpscript, wait = False) - while pid.poll() == None : - self.dlg.Pulse('R...') - sleep(0.2) - check_Rresult(self.parent, pid) - self.corpus.save_corpus(self.dictpathout['db']) - CreateIraFile(self.dictpathout, 0, corpname = os.path.basename(self.corpus.parametre['filename']), section = - 'wordcloud') - #win = MessageImage(self.parent, -1, u"Graphique", size=(700, 500),style = wx.DEFAULT_FRAME_STYLE) - #win.imagename = "nuage.png" - #win.addsaveimage(outgraph) - #txt = "" % FFF(outgraph).replace('//','/') - #win.HtmlPage.SetPage(txt) - #win.Show(True) - if not self.cmd : - #OpenAnalyse(self.parent, self.dictpathout['ira']) - #self.DoLayout(self.parent) - OpenAnalyse(self.parent, self.dictpathout['ira']) - self.dlg.Update(7, 'fini') - self.dlg.Destroy() + def make_wordcloud(self) : + act = [[act, self.corpus.getlemeff(act)] for act in self.actives] + with open(self.pathout['actives_eff.csv'], 'w') as f : + f.write('\n'.join('\t'.join([[val[0], `val[1]`] for val in act]))) + #################################################################### +# self.conf = None +# self.parent = parent +# self.type = 'alceste' +# self.cmd = cmd +# self.ConfigPath = parent.ConfigPath +# self.DictPath = parent.DictPath +# self.KeyConf = RawConfigParser() +# self.KeyConf.read(self.ConfigPath['key']) +# page = getPage(self.parent) +# if page is not None : +# self.corpus = getCorpus(page) +# if self.corpus is not None : +# self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'WordCloud') +# self.dictpathout = StatTxtPathOut(self.pathout) +# self.val = wx.ID_OK +# else : +# self.corpus = Corpus(parent) +# self.corpus.parametre['encodage'] = parent.corpus_encodage +# self.corpus.parametre['lang'] = parent.corpus_lang +# self.corpus.parametre['filename'] = parent.filename +# self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'WordCloud') +# self.dictpathout = StatTxtPathOut(self.pathout) +# self.corpus.dictpathout = self.dictpathout +# if not self.cmd : +# dial = StatDialog(self,parent) +# dial.CenterOnParent() +# self.val = dial.ShowModal() +# else : +# self.val = wx.ID_OK +# if self.val == wx.ID_OK : +# if not self.cmd : +# if dial.radio_lem.GetSelection() == 0 : lem = True +# else : lem = False +# if dial.exp.GetSelection() == 0 : exp = True +# else : exp = False +# self.make_uce = dial.check_uce.GetValue() +# self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue() +# self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue() +# self.corpus.parametre['eff_min_uce'] = self.corpus.parametre['nbforme_uce'] +# else : +# lem = True +# exp = True +# self.make_uce = False +# self.corpus.parametre['nbforme_uce'] = None +# self.corpus.parametre['eff_min_uce'] = None +# self.corpus.parametre['lem'] = lem +# self.corpus.parametre['expressions'] = exp +# self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] +# self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] +# self.make_corpus() +# +# if self.val == wx.ID_OK : +# if 'supplementaires' not in dir(self.corpus) : +# self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] +# if 'typeactive' not in dir(self.corpus) : +# self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] +# self.make_stats() +# +# def make_corpus(self) : +# if not self.cmd : +# self.dlg = progressbar(self, 7) +# else : +# self.dlg = None +# self.corpus.content = self.parent.content +# ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = self.dlg, cmd = self.cmd, fromtt = False) +# del ucis_txt +# +# if not self.cmd : +# self.dlg.Update(5, '%i UCI...' % len(ucis_paras_txt)) +# self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = self.make_uce) +# del ucis_paras_txt +# +# if self.corpus.para_coords != [[] for val in self.corpus.para_coords] : +# self.corpus.parametre['para'] = True +# else : +# self.corpus.parametre['para'] = False +# self.corpus.make_etoiles(self.corpus.para_coords) +# +# if not self.cmd : +# self.dlg.Update(6, u'Dictionnaires') +# uces, orderuces = self.corpus.make_forms_and_uces() +# self.corpus.make_lems(self.parent.lexique) +# +# def make_stats(self): +# if not self.cmd : +# if not 'dlg' in dir(self) : +# self.dlg = progressbar(self, 7) +# if not self.corpus.parametre['lem'] : +# formes = self.corpus.formes +# else : +# formes = self.corpus.make_lem_eff() +# act = [[forme, formes[forme][0], formes[forme][2]] for forme in formes if formes[forme][2] in +# self.corpus.typeactive] +# act = sortedby(act, 2, 1) +# act = [[val[0], `val[1]`, val[2]] for val in act] +# with open(os.path.join(self.pathout, 'actives.csv'), 'w') as f: +# f.write('\n'.join([';'.join(line) for line in act])) +# #act = [[i, val] for i, val in enumerate(act)] +# self.corpus.dictpathout = self.dictpathout +# #self.corpus.make_type_tot() +# +# if not self.cmd : +# self.dlg.Destroy() +# pref = PrefWordCloud(self.parent) +# pref.CenterOnParent() +# res = pref.ShowModal() +# else : +# res = wx.ID_OK +# if res == wx.ID_OK : +# self.dlg = progressbar(self, 3) +# width = pref.spin_L.GetValue() +# height = pref.spin_H.GetValue() +# maxword = pref.spin_maxword.GetValue() +# mincex = float(pref.spin_mincex.GetValue())/float(10) +# maxcex = float(pref.spin_maxcex.GetValue())/float(10) +# col_text = pref.color_text.GetColour() +# col_bg = pref.color_bg.GetColour() +# col_text = str(col_text).replace(')', ', max=255)') +# col_bg = str(col_bg).replace(')', ', max=255)') +# outgraph = os.path.join(os.path.dirname(self.dictpathout['zipf']), 'nuage_') +# nb = 1 +# while os.path.exists(outgraph + str(nb) + '.png') : +# nb += 1 +# outgraph = outgraph + str(nb) + '.png' +# +# txt = """ +# source("%s") +# library(wordcloud) +# act <- read.csv2("%s", header = FALSE, row.names = 1) +# maxword <- %i +# maxtoprint <- ifelse(nrow(act) > maxword, maxword, nrow(act)) +# toprint <- act[1:maxtoprint,] +# open_file_graph("%s", width = %i, height = %i) +# par(bg=rgb%s) +# wordcloud(row.names(toprint), toprint[,1], scale=c(%f,%f), random.order=FALSE, colors=rgb%s) +# dev.off() +# """ % (self.parent.RscriptsPath['Rgraph'], ffr(os.path.join(self.pathout, 'actives.csv')), maxword, ffr(outgraph), width, height, col_bg, maxcex, mincex, col_text) +# tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR) +# with open(tmpscript, 'w') as f : +# f.write(txt) +# pid = exec_rcode(self.parent.RPath, tmpscript, wait = False) +# while pid.poll() == None : +# self.dlg.Pulse('R...') +# sleep(0.2) +# check_Rresult(self.parent, pid) +# self.corpus.save_corpus(self.dictpathout['db']) +# CreateIraFile(self.dictpathout, 0, corpname = os.path.basename(self.corpus.parametre['filename']), section = +# 'wordcloud') +# #win = MessageImage(self.parent, -1, u"Graphique", size=(700, 500),style = wx.DEFAULT_FRAME_STYLE) +# #win.imagename = "nuage.png" +# #win.addsaveimage(outgraph) +# #txt = "" % FFF(outgraph).replace('//','/') +# #win.HtmlPage.SetPage(txt) +# #win.Show(True) +# if not self.cmd : +# #OpenAnalyse(self.parent, self.dictpathout['ira']) +# #self.DoLayout(self.parent) +# OpenAnalyse(self.parent, self.dictpathout['ira']) +# self.dlg.Update(7, 'fini') +# self.dlg.Destroy() diff --git a/tree.py b/tree.py index aac691a..bde076a 100644 --- a/tree.py +++ b/tree.py @@ -316,12 +316,14 @@ class LeftTree(CT.CustomTreeCtrl): pam = classification.Append(wx.ID_ANY, u"Par matrice des distances") menu.AppendMenu(-1, u"Classification", classification) simi = menu.Append(wx.ID_ANY, u"Analyse de similitude") + wdc = menu.Append(wx.ID_ANY, u"Nuage de mots") menu.AppendSeparator() self.Bind(wx.EVT_MENU, self.OnAlceste, alceste) self.Bind(wx.EVT_MENU, self.OnPam, pam) self.Bind(wx.EVT_MENU, self.OnStat, stat) self.Bind(wx.EVT_MENU, self.OnSpec, spec) self.Bind(wx.EVT_MENU, self.OnSimiTxt, simi) + self.Bind(wx.EVT_MENU, self.OnWordCloud, wdc) itemdelete = menu.Append(wx.ID_ANY, "Supprimer de l'historique") if item == self.GetRootItem(): @@ -366,6 +368,9 @@ class LeftTree(CT.CustomTreeCtrl): def OnSimiTxt(self, evt) : self.parent.OnSimiTxt(evt, self.getcorpus(self.itemdict)) + def OnWordCloud(self, evt) : + self.parent.OnWordCloud(evt, self.getcorpus(self.itemdict)) + def OnItemBackground(self, event): colourdata = wx.ColourData() -- 2.7.4