...
[iramuteq] / textsimi.py
index 18da495..55e2f2b 100644 (file)
@@ -1,66 +1,68 @@
 # -*- coding: utf-8 -*-
 #Author: Pierre Ratinaud
-#Copyright (c) 2008-2011 Pierre Ratinaud
-#Lisense: GNU/GPL
+#Copyright (c) 2008-2013 Pierre Ratinaud
+#License: GNU/GPL
 
 from chemins import ffr, simipath
-#from corpus import Corpus
 import os
 from analysetxt import AnalyseText
-from ConfigParser import RawConfigParser
-from guifunct import getPage, getCorpus
-from dialog import StatDialog
-from guifunct import SelectColumn, PrepSimi
+from guifunct import PrepSimi
 from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste
-from tableau import Tableau
-from tabsimi import DoSimi
 from PrintRScript import PrintSimiScript
 import wx
 from copy import copy
-
 import logging
 
-logger = logging.getLogger('iramuteq.textsimi')
+log = logging.getLogger('iramuteq.textsimi')
 
 class SimiTxt(AnalyseText): 
     def doanalyse(self) :
         self.parametres['type'] = 'simitxt'
         self.pathout.basefiles(simipath)
         self.indices = indices_simi
-        self.makesimiparam()
+        if self.dlg :
+            self.makesimiparam()
         #FIXME
         self.actives = self.corpus.make_actives_limit(3)
         dictcol = dict([[i, [act, self.corpus.getlemeff(act)]] for i, act in enumerate(self.actives)]) 
-        SelectColumn(self.ira, dictcol, self.actives, self.pathout['selected.csv'])
-        self.makefiles()
-        prep = PrepSimi(self.ira, self.parametres, indices_simi)
-        self.parametres = prep.parametres
-        script = PrintSimiScript(self)
-        script.make_script()
-        self.doR(script.scriptout)
-        if self.parametres['type_graph'] == 1:
-            if os.path.exists(self.pathout['liste_graph']):
-                graph_simi = read_list_file(self.pathout['liste_graph'])
-                graph_simi.append([os.path.basename(script.filename), script.txtgraph])
-            else :
-                graph_simi = [[os.path.basename(script.filename), script.txtgraph]]
-            print_liste(self.pathout['liste_graph'], graph_simi)
-
-    def preferences(self) :
-        dial = StatDialog(self, self.parent)
-        dial.CenterOnParent()
-        val = dial.ShowModal()
-        if val == 5100 :
-            if dial.radio_lem.GetSelection() == 0 :
-                lem = 1
+        continu = False
+        if self.dlg :
+            self.listet = self.corpus.make_etoiles()
+            self.listet.sort()
+            self.stars = copy(self.listet)
+            self.parametres['stars'] = copy(self.listet)
+            self.parametres['sfromchi'] = False
+            self.dlg.Destroy()
+            prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
+            if prep.val == wx.ID_OK :
+                continu = True
+                self.parametres = prep.parametres
+                self.dlg = progressbar(self.ira, 4)
             else :
-                lem = 0            
-            self.parametres['lem'] = lem
-            dial.Destroy()
-            return self.parametres
+                return False
         else :
-            dial.Destroy()
-            return None
+            continu = True
+        if continu :
+            self.makefiles()
+            script = PrintSimiScript(self)
+            script.make_script()
+            if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') :
+                log.info('Problem')
+                return False
+            if self.parametres['type_graph'] == 1:
+                if self.parametres['svg'] :
+                    filename, ext = os.path.splitext(script.filename)
+                    fileout = filename + '.svg'                    
+                else :
+                    fileout = script.filename
+                if os.path.exists(self.pathout['liste_graph']):
+                    graph_simi = read_list_file(self.pathout['liste_graph'])
+                    graph_simi.append([os.path.basename(fileout), script.txtgraph])
+                else :
+                    graph_simi = [[os.path.basename(fileout), script.txtgraph]]
+                print_liste(self.pathout['liste_graph'], graph_simi)
+        else : 
+            return False
 
     def makesimiparam(self) :
         self.paramsimi = {'coeff' : 0,
@@ -90,9 +92,13 @@ class SimiTxt(AnalyseText):
                           'height' : 1000,
                           'bystar' : False,
                           'first' : True,
-                          'keep_coord' : True,
+                          'keep_coord' : False,
                           'alpha' : 20,
                           'film': False,
+                          'svg' : 0,
+                          'com' : 0,
+                          'communities' : 0,
+                          'halo' : 0,
                           #'ira' : self.pathout['Analyse.ira']
                           }
         self.parametres.update(self.paramsimi)
@@ -101,180 +107,78 @@ class SimiTxt(AnalyseText):
         #self.actives, lim = self.corpus.make_actives_nb(self.parametres.get('max_actives',1500), 1)
         self.parametres['eff_min_forme'] = lim
         self.parametres['nbactives'] = len(self.actives)
-        self.parametres['fromprof'] = True
+        self.parametres['fromprof'] = False
         self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
         with open(self.pathout['actives.csv'], 'w') as f :
             f.write('\n'.join(self.actives).encode(self.ira.syscoding))
 
-        self.listet = self.corpus.make_etoiles()
-        self.listet.sort()
-        self.parametres['stars'] = copy(self.listet)
-        self.parametres['sfromchi'] = False
-
+class SimiFromCluster(SimiTxt) :
+    def __init__(self, ira, corpus, actives, lfreq, lchi, numcluster, parametres = None, dlg = False) :
+        self.actives = actives
+        self.numcluster = numcluster
+        self.lfreq = lfreq
+        self.lchi = lchi
+        parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
+        dlg.Destroy()
+        SimiTxt.__init__(self, ira, corpus, parametres, dlg=True, lemdial = False)
+    
+    def preferences(self) :
+        return self.parametres
 
+    def doanalyse(self) :
+        self.parametres['type'] = 'clustersimitxt'
+        self.pathout.basefiles(simipath)
+        self.indices = indices_simi
+        if self.dlg  :
+            self.makesimiparam()
+        if 'bystar' in self.parametres :
+            del self.parametres['bystar']
+        dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)]) 
+        continu = True
+        #print self.dlg
+        if self.dlg :
+            self.dlg.Destroy()
+            self.stars = []#copy(self.listet)
+            self.parametres['stars'] = 0#copy(self.listet)
+            self.parametres['sfromchi'] = 1
+            prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol)
+            if prep.val == wx.ID_OK :
+                continu = True
+                self.parametres = prep.parametres
+            else :
+                continu = False
+        if continu :
+            self.dlg = progressbar(self.parent, 3)
+            self.makefiles()
+            self.parametres['type'] = 'clustersimitxt'
+            script = PrintSimiScript(self)
+            script.make_script()
+            if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') :
+                return False
+            if self.parametres['type_graph'] == 1:
+                if self.parametres['svg'] :
+                    filename, ext = os.path.splitext(script.filename)
+                    fileout = filename + '.svg'                    
+                else :
+                    fileout = script.filename
+                if os.path.exists(self.pathout['liste_graph']):
+                    graph_simi = read_list_file(self.pathout['liste_graph'])
+                    graph_simi.append([os.path.basename(fileout), script.txtgraph])
+                else :
+                    graph_simi = [[os.path.basename(fileout), script.txtgraph]]
+                print_liste(self.pathout['liste_graph'], graph_simi)
+        else : 
+            return False
 
-#        self.tableau = Tableau(self.parent, '')
-#        self.tableau.listactives = self.actives
-#        self.tableau.parametre['fromtxt'] = True
-#        self.corpus.lems_eff = dict([[lem,[self.corpus.lems[lem].freq]] for lem in self.actives])
-#        #print('ATTENTION  ETOILES')
-#        #self.paramsimi['bystar'] = True
-#        self.tableau.listet = copy(self.listet)
-#        #self.paramsimi['cexfromchi'] = True
-#        #self.paramsimi['vlabcolor'] = True
-#        self.tableau.actives = copy(self.corpus.lems_eff)
-#        DoSimi(self, fromprof = self.pathout['mat01.csv'], param = self.paramsimi, pathout = self.pathout.dirout)
+    def makefiles(self) :
+        self.parametres['eff_min_forme'] = 3
+        self.parametres['nbactives'] = len(self.actives)
+        self.parametres['fromprof'] = True
+        self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
+        with open(self.pathout['actives.csv'], 'w') as f :
+            f.write('\n'.join(self.actives).encode(self.ira.syscoding))        
+        with open(self.pathout['actives_nb.csv'], 'w') as f :
+            f.write('\n'.join([`val` for val in self.lfreq]))
+        with open(self.pathout['actives_chi.csv'], 'w') as f :
+            f.write('\n'.join([`val` for val in self.lchi]))
 
-#class SimiTxt :
-#    def __init__(self, parent, cmd = False, param = None):
-#        self.parent = parent
-#        self.cmd = cmd
-#        self.ConfigPath = parent.ConfigPath
-#        self.DictPath = parent.DictPath
-#        self.KeyConf = RawConfigParser()
-#        self.KeyConf.read(self.ConfigPath['key'])
-#        self.indices = indices_simi
-#        self.paramsimi = {'coeff' : 0,
-#                          'layout' : 2,
-#                          'type' : 1,
-#                          'arbremax' : 1,
-#                          'coeff_tv' : 1,
-#                          'coeff_tv_nb' : 0,
-#                          'tvprop' : 0,
-#                          'tvmin' : 5,
-#                          'tvmax' : 30,
-#                          'coeff_te' : 1,
-#                          'coeff_temin' : 1,
-#                          'coeff_temax' : 10,
-#                          'label_v': 1,
-#                          'label_e': 0,
-#                          'vcex' : 1,
-#                          'cexfromchi' : False,
-#                          'vcexmin' : 10,
-#                          'vcexmax' : 25,
-#                          'cex' : 10,
-#                          'seuil_ok' : 0,
-#                          'seuil' : 1,
-#                          'cols' : (255,0,0),
-#                          'cola' : (200,200,200),
-#                          'width' : 1000,
-#                          'height' : 1000,
-#                          'bystar' : False,
-#                          'first' : True,
-#                          'keep_coord' : True,
-#                          'alpha' : 20,
-#                          'film': False,
-#                          }
-#        page = getPage(self.parent)
-#        if page is not None :
-#            self.corpus = getCorpus(page)
-#            if self.corpus is not None :
-#                self.pathout = ConstructPathOut(self.corpus.parametre['openpath'], 'simitxt')
-#                self.dictpathout = construct_simipath(self.pathout)
-#                self.val = wx.ID_OK
-#                self.make_table()
-#                self.make_simi()
-#        else :
-#            self.corpus = Corpus(parent)
-#            self.corpus.content = self.parent.content
-#            self.corpus.parametre['encodage'] = parent.corpus_encodage
-#            self.corpus.parametre['lang'] = parent.corpus_lang
-#            self.corpus.parametre['filename'] = parent.filename
-#            self.corpus.parametre['eff_min_uce'] = None
-#            self.pathout = ConstructPathOut(self.corpus.parametre['filename'], 'simitxt')
-#            self.dictpathout = construct_simipath(self.pathout)
-#            dial = StatDialog(self, self.parent)
-#            dial.check_uce.SetValue(True)
-#            dial.check_uce.Enable(False)
-#            dial.OnCheckUce(wx.EVT_MENU)
-#            self.val = dial.ShowModal()
-#            if self.val == wx.ID_OK :
-#                with open(self.parent.ConfigPath['key'], 'w') as f:
-#                    self.KeyConf.write(f)
-#                if dial.radio_lem.GetSelection() == 0 : lem = True
-#                else : lem = False
-#                if dial.exp.GetSelection() == 0 : exp = True
-#                else : exp = False
-#                dial.Destroy()
-#                self.corpus.parametre['lem'] = lem
-#                self.corpus.parametre['expressions'] = exp
-#                self.corpus.parametre['nbforme_uce'] = dial.spin_ctrl_4.GetValue()
-#                self.corpus.parametre['eff_min_forme'] = dial.spin_ctrl_5.GetValue()
-#                self.corpus.parametre['max_actives'] = dial.spin_max_actives.GetValue()
-#                self.make_corpus()
-#                self.make_table()
-#                self.make_simi()
-#            else :
-#                dial.Destroy()
-#
-#    def make_corpus(self) :
-#        print 'make corpus'
-#        if not self.cmd :
-#            dlg = progressbar(self, maxi = 6)
-#        self.corpus.supplementaires = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "2"]
-#        self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
-#        ucis_txt, ucis_paras_txt = self.corpus.start_analyse(self.parent, dlg = dlg, cmd = self.cmd)
-#        del ucis_txt
-#    
-#        if not self.cmd :
-#            dlg.Update(5, '%i ucis - Construction des uces' % len(ucis_paras_txt))
-#        self.corpus.make_ucis_paras_uces(ucis_paras_txt, make_uce = True)
-#        del ucis_paras_txt
-#
-#        if self.corpus.para_coords != [[] for val in self.corpus.para_coords] :
-#            self.corpus.parametre['para'] = True
-#        else :
-#            self.corpus.parametre['para'] = False
-#        self.corpus.make_etoiles(self.corpus.para_coords)
-#        print 'len(ucis_paras_uces)', len(self.corpus.ucis_paras_uces)
-#
-#        if not self.cmd :
-#            dlg.Update(6, u'Dictionnaires')
-#        uces, self.orderuces = self.corpus.make_forms_and_uces()
-#        self.corpus.ucenb = len(uces)
-#        self.corpus.make_lems(self.parent.lexique)
-#
-#        self.corpus.make_var_actives() 
-#        self.corpus.make_var_supp()
-#        self.corpus.lems_eff = self.corpus.make_lem_eff()
-#
-#        #variables = treat_var_mod(listet)
-#        #print(variables)
-#        #self.corpus.write_etoiles(self.dictpathout['etoiles'])
-#        if not self.cmd :
-#            dlg.Destroy()
-#                
-#    def make_table(self) :
-#        if 'orderuces' not in dir(self) :
-#            self.orderuces = [(i,j,k) for i, uci in enumerate(self.corpus.ucis_paras_uces) for j, para in enumerate(uci) for k, uce in enumerate(para)]
-#            self.orderuces = dict([[val, i] for i, val in enumerate(self.orderuces)])
-#            self.corpus.ucenb = len(self.orderuces)
-#        #tabuc1 = self.corpus.make_table_with_uce(self.orderuces)
-#        #tabuc1.insert(0,self.corpus.actives)
-#        #tabuc1 = self.corpus.make_sparse_matrix_with_uce(self.orderuces)
-#        #self.corpus.write_sparse_matrix(self.dictpathout['mat01'], tabuc1, self.corpus.ucenb, len(self.corpus.actives))
-#        if self.corpus.actives is None :
-#            self.corpus.typeactive = [option for option in self.KeyConf.options('KEYS') if self.KeyConf.get('KEYS', option) == "1"]
-#            self.corpus.min_eff_formes()
-#            self.corpus.make_var_actives()
-#        self.corpus.make_and_write_sparse_matrix_from_uce(self.orderuces, self.dictpathout['mat01'])
-#        #self.corpus.write_tab(tabuc1,self.dictpathout['mat01'])
-#
-#    def make_simi(self) :
-#        self.tableau = Tableau(self.parent, '')
-#        self.tableau.listactives = self.corpus.actives
-#        self.tableau.parametre['fromtxt'] = True
-#        if 'lems_eff' not in dir(self.corpus) :
-#            self.corpus.lems_eff = self.corpus.make_lem_eff()
-#        #print('ATTENTION  ETOILES')
-#        #self.paramsimi['bystar'] = True
-#        self.listet = self.corpus.get_unique_etoiles()
-#        self.listet.sort()
-#        self.tableau.listet = copy(self.listet)
-#        self.paramsimi['stars'] = copy(self.listet)
-#        #self.paramsimi['cexfromchi'] = True
-#        self.paramsimi['sfromchi'] = False
-#        #self.paramsimi['vlabcolor'] = True
-#        self.tableau.actives = dict([[lem, self.corpus.lems_eff[lem]] for lem in self.corpus.actives])
-#        self.corpus.save_corpus(self.dictpathout['corpus']) 
-#        DoSimi(self, fromprof = self.dictpathout['mat01'], param = self.paramsimi, pathout = self.pathout)