X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=textwordcloud.py;h=6f75498c546f6210f95a7d88f4aeced14616b180;hp=fce0e17f221c8311c8eba1ef102e8dbc5513e343;hb=287f9e72c3e3d666b016dff0fa3dc39419adfcc2;hpb=22cd27b2bbe9ab1ffa7ef06fa764b5147ae17dad diff --git a/textwordcloud.py b/textwordcloud.py index fce0e17..6f75498 100644 --- a/textwordcloud.py +++ b/textwordcloud.py @@ -2,181 +2,111 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2009 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL -from chemins import ConstructPathOut, StatTxtPathOut, ffr, FFF -from corpus import Corpus -from guifunct import getPage, getCorpus -from ConfigParser import RawConfigParser -from functions import sortedby, progressbar, CreateIraFile, exec_rcode, check_Rresult, MessageImage +from analysetxt import AnalyseText +from guifunct import getPage, getCorpus, SelectColumn +#from ConfigParser import RawConfigParser +from functions import sortedby, progressbar from dialog import StatDialog, PrefWordCloud -from openanalyse import OpenAnalyse +from PrintRScript import WordCloudRScript #from ttparser import * import tempfile -from time import sleep +#from time import sleep import wx import os +import logging -class WordCloud(): - def __init__(self, parent, cmd = False, lem = True, exp = True): -#################################################################### - self.conf = None - self.parent = parent - self.type = 'alceste' - self.cmd = cmd - self.ConfigPath = parent.ConfigPath - self.DictPath = parent.DictPath - self.KeyConf = RawConfigParser() - self.KeyConf.read(self.ConfigPath['key']) - page = getPage(self.parent) - if page is not None : - self.corpus = getCorpus(page) - if self.corpus is not None : - self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'WordCloud') - self.dictpathout = StatTxtPathOut(self.pathout) - self.val = wx.ID_OK - else : - self.corpus = Corpus(parent) - self.corpus.parametre['encodage'] = parent.corpus_encodage - self.corpus.parametre['lang'] = parent.corpus_lang - self.corpus.parametre['filename'] = parent.filename - self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'WordCloud') - self.dictpathout = StatTxtPathOut(self.pathout) - self.corpus.dictpathout = self.dictpathout - if not self.cmd : - dial = StatDialog(self,parent) - dial.CenterOnParent() - self.val = dial.ShowModal() - else : - self.val = wx.ID_OK - if self.val == wx.ID_OK : - if not self.cmd : - if dial.radio_lem.GetSelection() == 0 : lem = True - else : lem = False - if dial.exp.GetSelection() == 0 : exp = True - else : exp = False - self.make_uce = dial.check_uce.GetValue() - self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue() - self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue() - self.corpus.parametre['eff_min_uce'] = self.corpus.parametre['nbforme_uce'] - else : - lem = True - exp = True - self.make_uce = False - self.corpus.parametre['nbforme_uce'] = None - self.corpus.parametre['eff_min_uce'] = None - self.corpus.parametre['lem'] = lem - self.corpus.parametre['expressions'] = exp - self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] - self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] - self.make_corpus() - - if self.val == wx.ID_OK : - if 'supplementaires' not in dir(self.corpus) : - self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"] - if 'typeactive' not in dir(self.corpus) : - self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"] - self.make_stats() - - def make_corpus(self) : - if not self.cmd : - self.dlg = progressbar(self, 7) - else : - self.dlg = None - self.corpus.content = self.parent.content - ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = self.dlg, cmd = self.cmd, fromtt = False) - del ucis_txt - - if not self.cmd : - self.dlg.Update(5, '%i UCI...' % len(ucis_paras_txt)) - self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = self.make_uce) - del ucis_paras_txt +logger = logging.getLogger('iramuteq.textwordcloud') - if self.corpus.para_coords != [[] for val in self.corpus.para_coords] : - self.corpus.parametre['para'] = True - else : - self.corpus.parametre['para'] = False - self.corpus.make_etoiles(self.corpus.para_coords) - - if not self.cmd : - self.dlg.Update(6, u'Dictionnaires') - uces, orderuces = self.corpus.make_forms_and_uces() - self.corpus.make_lems(self.parent.lexique) - - def make_stats(self): - if not self.cmd : - if not 'dlg' in dir(self) : - self.dlg = progressbar(self, 7) - if not self.corpus.parametre['lem'] : - formes = self.corpus.formes - else : - formes = self.corpus.make_lem_eff() - act = [[forme, formes[forme][0], formes[forme][2]] for forme in formes if formes[forme][2] in - self.corpus.typeactive] - act = sortedby(act, 2, 1) - act = [[val[0], `val[1]`, val[2]] for val in act] - with open(os.path.join(self.pathout, 'actives.csv'), 'w') as f: - f.write('\n'.join([';'.join(line) for line in act])) - #act = [[i, val] for i, val in enumerate(act)] - self.corpus.dictpathout = self.dictpathout - #self.corpus.make_type_tot() - if not self.cmd : - self.dlg.Destroy() - pref = PrefWordCloud(self.parent) - pref.CenterOnParent() - res = pref.ShowModal() +class WordCloud(AnalyseText): + def doanalyse(self) : + self.parametres['type'] = 'wordcloud' + #FIXME + limit = 3 + self.dlg.Destroy() + res = self.make_option() + if res == wx.ID_OK : + if self.parametres['mode'] == 2 : + self.actives = self.corpus.make_actives_limit(limit, 1) + self.actives += self.corpus.make_actives_limit(limit, 2) + elif self.parametres['mode'] == 0 : + self.actives = self.corpus.make_actives_limit(limit, 1) + elif self.parametres['mode'] == 1 : + self.actives = self.corpus.make_actives_limit(limit, 2) + dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)]) + selectcol = SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = True) + if selectcol.ok : + self.dlg = progressbar(self.ira, 2) + self.make_wordcloud() + script = WordCloudRScript(self) + script.make_script() + self.doR(script.scriptout, dlg = self.dlg, message = 'R...') + else : + return 'NOK' else : - res = wx.ID_OK + return 'NOK' + + def make_option(self, fromcluster = False) : + dial = PrefWordCloud(self.ira, fromcluster) + dial.CenterOnParent() + res = dial.ShowModal() if res == wx.ID_OK : - self.dlg = progressbar(self, 3) - width = pref.spin_L.GetValue() - height = pref.spin_H.GetValue() - maxword = pref.spin_maxword.GetValue() - mincex = float(pref.spin_mincex.GetValue())/float(10) - maxcex = float(pref.spin_maxcex.GetValue())/float(10) - col_text = pref.color_text.GetColour() - col_bg = pref.color_bg.GetColour() - col_text = str(col_text).replace(')', ', max=255)') - col_bg = str(col_bg).replace(')', ', max=255)') - outgraph = os.path.join(os.path.dirname(self.dictpathout['zipf']), 'nuage_') + if dial.format.GetSelection() == 0 : + svg = 0 + else : + svg = 1 + self.parametres['width'] = dial.spin_L.GetValue() + self.parametres['height'] = dial.spin_H.GetValue() + self.parametres['maxword'] = dial.spin_maxword.GetValue() + self.parametres['mincex'] = float(dial.spin_mincex.GetValue())/float(10) + self.parametres['maxcex'] = float(dial.spin_maxcex.GetValue())/float(10) + self.parametres['col_text'] = dial.color_text.GetColour() + self.parametres['col_bg'] = dial.color_bg.GetColour() + self.parametres['mode'] = dial.typeformeschoice.GetSelection() + self.parametres['svg'] = svg + if fromcluster : + self.parametres['indice'] = dial.indice.GetSelection() + outgraph = os.path.join(os.path.dirname(self.pathout['zipf.png']), 'nuage_') nb = 1 - while os.path.exists(outgraph + str(nb) + '.png') : + if svg : + end = '.svg' + else : + end = '.png' + while os.path.exists(outgraph + str(nb) + end) : nb += 1 - outgraph = outgraph + str(nb) + '.png' + self.parametres['graphout'] = outgraph + str(nb) + end + dial.Destroy() + return res - txt = """ - source("%s") - library(wordcloud) - act <- read.csv2("%s", header = FALSE, row.names = 1) - maxword <- %i - maxtoprint <- ifelse(nrow(act) > maxword, maxword, nrow(act)) - toprint <- act[1:maxtoprint,] - open_file_graph("%s", width = %i, height = %i) - par(bg=rgb%s) - wordcloud(row.names(toprint), toprint[,1], scale=c(%f,%f), random.order=FALSE, colors=rgb%s) - dev.off() - """ % (self.parent.RscriptsPath['Rgraph'], ffr(os.path.join(self.pathout, 'actives.csv')), maxword, ffr(outgraph), width, height, col_bg, maxcex, mincex, col_text) - tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR) - with open(tmpscript, 'w') as f : - f.write(txt) - pid = exec_rcode(self.parent.RPath, tmpscript, wait = False) - while pid.poll() == None : - self.dlg.Pulse('R...') - sleep(0.2) - check_Rresult(self.parent, pid) - self.corpus.save_corpus(self.dictpathout['db']) - CreateIraFile(self.dictpathout, 0, corpname = os.path.basename(self.corpus.parametre['filename']), section = - 'wordcloud') - #win = MessageImage(self.parent, -1, u"Graphique", size=(700, 500),style = wx.DEFAULT_FRAME_STYLE) - #win.imagename = "nuage.png" - #win.addsaveimage(outgraph) - #txt = "" % FFF(outgraph).replace('//','/') - #win.HtmlPage.SetPage(txt) - #win.Show(True) - if not self.cmd : - #OpenAnalyse(self.parent, self.dictpathout['ira']) - #self.DoLayout(self.parent) - OpenAnalyse(self.parent, self.dictpathout['ira']) - self.dlg.Update(7, 'fini') - self.dlg.Destroy() + def make_wordcloud(self) : + act = ['\t'.join([act, `self.corpus.getlemeff(act)`]) for act in self.actives] + with open(self.pathout['actives_eff.csv'], 'w') as f : + f.write('\n'.join(act).encode(self.ira.syscoding)) + + +class ClusterCloud(WordCloud) : + def doanalyse(self) : + self.parametres['type'] = 'clustercloud' + #FIXME + limit = 2 + res = self.make_option(True) + if res == wx.ID_OK : + prof = self.parametres['clusterprof'] + del self.parametres['clusterprof'] + if self.parametres['indice'] == 0 : + tokeep = 1 + else : + tokeep = 2 + prof = [[val[0], int(round(val[tokeep]))] for val in prof] + with open(self.pathout['actives_eff.csv'], 'w') as f : + f.write('\n'.join(['\t'.join([val[0], `val[1]`]) for val in prof]).encode(self.ira.syscoding)) + dictcol = dict([[i, val] for i, val in enumerate(prof)]) + self.actives = [val[0] for val in prof] + SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'], dlg = True) + script = WordCloudRScript(self) + script.make_script() + self.doR(script.scriptout, dlg = self.dlg, message = 'R...') + else : + return 'NOK'