# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
-#from corpusNG import Corpus
+#lisence : GNU GPL
+#copyright : 2012-2013 (c) Pierre Ratinaud
+
import logging
from chemins import PathOut, ChdTxtPathOut
from functions import exec_rcode, check_Rresult, DoConf, print_liste
from time import time, sleep
from uuid import uuid4
import os
-#ALCESTE
from PrintRScript import RchdTxt, AlcesteTxtProf
from OptionAlceste import OptionAlc
from layout import PrintRapport
from openanalyse import OpenAnalyse
+from dialog import StatDialog
from time import time
-######################################
-print '#######LOGGING TEST###########'
-log = logging.getLogger('iramuteq.analyse')
-#formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-#ch = logging.StreamHandler()
-#ch.setFormatter(formatter)
-#log.addHandler(ch)
-#log.setLevel(logging.INFO)
-#######################################
-
-#def make_ucecl_from_R(filein) :
-# with open(filein, 'rU') as f :
-# c = f.readlines()
-# c.pop(0)
-# ucecl = []
-# for line in c :
-# line = line.replace('\n', '').replace('"', '').split(';')
-# ucecl.append([int(line[0]) - 1, int(line[1])])
-# classesl = [val[1] for val in ucecl]
-# clnb = max(classesl)
-# ucecl = sorted(ucecl, key=itemgetter(1))
-# ucecl = [[uce[0] for uce in ucecl if uce[1] == i] for i in range(clnb+1)]
-# return ucecl
-#
-#def make_lc(self, uces, classes, clnb) :
-# self.lc = [[] for classe in range(0,clnb)]
-# for i in range(0,clnb):
-# self.lc[i] = [uce for j, uce in enumerate(uces) if i+1 == classes[j]]
-# self.lc0 = [uce for j, uce in enumerate(uces) if 0 == classes[j]]
+log = logging.getLogger('iramuteq.analyse')
class AnalyseText :
- def __init__(self, ira, corpus, parametres = None, dlg = False) :
+ def __init__(self, ira, corpus, parametres = None, dlg = False, lemdial = True) :
self.corpus = corpus
self.ira = ira
self.parent = ira
self.dlg = dlg
self.dialok = True
self.parametres = parametres
- self.pathout = PathOut(corpus.parametres['originalpath'], analyse_type = parametres['type'], dirout = corpus.parametres['pathout'])
- self.parametres = self.make_config(parametres)
+ self.lemdial = lemdial
+ self.val = False
+ self.keys = DoConf(self.ira.ConfigPath['key']).getoptions()
+ if not 'pathout' in self.parametres :
+ self.pathout = PathOut(corpus.parametres['originalpath'], analyse_type = parametres['type'], dirout = corpus.parametres['pathout'])
+ else :
+ self.pathout = PathOut(filename = corpus.parametres['originalpath'], dirout = self.parametres['pathout'], analyse_type = self.parametres['type'])
+ self.parametres = self.lemparam()
+ if self.parametres is not None :
+ self.parametres = self.make_config(parametres)
log.info(self.pathout.dirout)
if self.parametres is not None :
self.keys = DoConf(self.ira.ConfigPath['key']).getoptions()
- gramact = [k for k in keys if keys[k] == 1]
- gramsup = [k for k in keys if keys[k] == 2]
- #FIXME
- if not 'lem' in self.parametres :
- self.parametres['lem'] = 1
+ gramact = [k for k in self.keys if self.keys[k] == 1]
+ gramsup = [k for k in self.keys if self.keys[k] == 2]
self.parametres['pathout'] = self.pathout.mkdirout()
self.pathout = PathOut(dirout = self.parametres['pathout'])
self.pathout.createdir(self.parametres['pathout'])
self.parametres['type'] = parametres['type']
self.parametres['encoding'] = self.ira.syscoding
self.t1 = time()
- #if self.corpus.lems is None :
self.corpus.make_lems(lem = self.parametres['lem'])
corpus.parse_active(gramact, gramsup)
result_analyse = self.doanalyse()
def doanalyse(self) :
pass
+ def lemparam(self) :
+ if self.dlg and self.lemdial:
+ dial = StatDialog(self, self.parent)
+ dial.CenterOnParent()
+ val = dial.ShowModal()
+ if val == 5100 :
+ if dial.radio_lem.GetSelection() == 0 :
+ lem = 1
+ else :
+ lem = 0
+ self.parametres['lem'] = lem
+ dial.Destroy()
+ return self.parametres
+ else :
+ dial.Destroy()
+ return None
+ else :
+ return self.parametres
+
def make_config(self, config) :
if config is not None :
if not self.dlg :
return config
else :
return self.preferences()
+ else :
+ return None
def readconfig(self, config) :
return config
def preferences(self) :
- return {}
+ return self.parametres
def printRscript(self) :
pass
log.info('R code...')
pid = exec_rcode(self.ira.RPath, Rscript, wait = wait)
while pid.poll() is None :
- if dlg is not None :
+ if dlg :
self.dlg.Pulse(message)
sleep(0.2)
else :
sleep(0.2)
- check_Rresult(self.ira, pid)
+ return check_Rresult(self.ira, pid)
class Alceste(AnalyseText) :
def doanalyse(self) :
- #self.pathout = PathOut(self.corpus.parametres['filename'], 'alceste')
self.parametres['type'] = 'alceste'
self.pathout.basefiles(ChdTxtPathOut)
self.actives, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 1)
elif self.parametres['classif_mode'] == 2 :
self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
Rscript = self.printRscript()
- self.doR(Rscript)
- #self.lc = make_ucecl_from_R(self.pathout['uce'])
- #self.lc0 = self.lc.pop(0)
+ self.doR(Rscript, dlg = self.dlg, message = 'CHD...')
+
self.corpus.make_ucecl_from_R(self.pathout['uce'])
self.corpus.make_and_write_profile(self.actives, self.corpus.lc, self.pathout['Contout'])
self.sup, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 2)
self.clnb = len(self.corpus.lc)
self.parametres['clnb'] = self.clnb
Rscript = self.printRscript2()
- self.doR(Rscript)
+ self.doR(Rscript, dlg = self.dlg, message = 'profils et A.F.C. ...')
self.time = time() - self.t1
minutes, seconds = divmod(self.time, 60)
hours, minutes = divmod(minutes, 60)
self.dial.CenterOnParent()
self.dialok = self.dial.ShowModal()
if self.dialok == 5100 :
- if self.dial.radio_1.GetSelection() == 0 :
- lem = 1
- else :
- lem = 0
- parametres['lem'] = lem
parametres['classif_mode'] = self.dial.radio_box_2.GetSelection()
parametres['tailleuc1'] = self.dial.spin_ctrl_1.GetValue()
parametres['tailleuc2'] = self.dial.spin_ctrl_2.GetValue()
parametres['nbcl_p1'] = self.dial.spin_nbcl.GetValue()
parametres['max_actives'] = self.dial.spin_max_actives.GetValue()
parametres['corpus'] = ''
+ parametres['svdmethod'] = self.dial.svdmethod[self.dial.choicesvd.GetSelection()]
parametres['pathout'] = self.pathout.dirout
- for val in parametres :
- print val, parametres[val]
+ parametres['mode.patate'] = self.dial.check_patate.GetValue()
DoConf(self.parent.ConfigPath['alceste']).makeoptions(['ALCESTE'], [parametres])
self.dial.Destroy()
+ print parametres
return parametres
else :
self.dial.Destroy()
return None
def printRscript(self) :
- RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False)
+ RchdTxt(self.pathout, self.parent.RscriptsPath, self.parametres['mincl'], self.parametres['classif_mode'], nbt = self.parametres['nbcl_p1'] - 1, svdmethod = self.parametres['svdmethod'], libsvdc = self.parent.pref.getboolean('iramuteq','libsvdc'), libsvdc_path = self.parent.pref.get('iramuteq','libsvdc_path'), R_max_mem = False, mode_patate = self.parametres['mode.patate'])
return self.pathout['Rchdtxt']
def printRscript2(self) :
def print_graph_files(self) :
mess_afc = u"La position des points n'est peut être pas exacte"
- afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - 30 points par classes - facteurs 1 / 2\n%s' % mess_afc],
- [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - 30 points par classes - facteurs 1 / 2\n%s' % mess_afc],
- [os.path.basename(self.pathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - 30 points par classes - facteur 1 / 2\n%s' % mess_afc],
+ afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc],
+ [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables supplémentaires - coordonnées - 30 points par classes - facteurs 1 / 2 - %s' % mess_afc],
+ [os.path.basename(self.pathout['AFC2DEL_OUT']), u'Variables illustratives - Coordonnées - 30 points par classes - facteur 1 / 2 - %s' % mess_afc],
[os.path.basename(self.pathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2']]
- #[os.path.basename(self.pathout['AFC2DCoul']), u'Variables actives - Corrélation - facteur 1 / 2'],
- #[os.path.basename(self.pathout['AFC2DCoulSup']), u'Variables supplémentaires - Corrélation - facteur 1 / 2'],
- #[os.path.basename(self.pathout['AFC2DCoulEt']), u'Variables illustratives - Corrélations - facteur 1 / 2'],
- #[os.path.basename(self.pathout['AFC2DCoulCl']), u'Classes - Corrélations - facteurs 1 / 2'],]
chd_graph_list = [[os.path.basename(self.pathout['dendro1']), u'dendrogramme à partir de chd1']]
if self.parametres['classif_mode'] == 0 :
chd_graph_list.append([os.path.basename(self.pathout['dendro2']), u'dendrogramme à partir de chd2'])
PrintRapport(self, self.corpus, self.parametres)
-keys = {'art_def' : 2,
- 'pre' : 2,
- 'adj_dem' : 2,
- 'ono' : 2,
- 'pro_per' : 2,
- 'ver_sup' : 2,
- 'adv' : 1,
- 'ver' : 1,
- 'adj_ind' : 2,
- 'adj_pos' : 2,
- 'aux' : 2,
- 'adj_int' : 2,
- 'pro_ind' : 2,
- 'adj' : 1,
- 'pro_dem' : 2,
- 'nom' : 1,
- 'art_ind' : 2,
- 'pro_pos' : 2,
- 'nom_sup' : 2,
- 'adv_sup' : 2,
- 'adj_sup' : 2,
- 'adj_num' : 2,
- 'pro_rel' : 2,
- 'con' : 2,
- 'num' : 2,
- 'nr' : 1,
- 'sw' : 2,
-}
-
-gramact = [k for k in keys if keys[k] == 1]
-gramsup = [k for k in keys if keys[k] == 2]
-
-#corpus = Corpus('', {'filename': '/home/pierre/workspace/iramuteq/dev/testcorpus.txt','formesdb':'formes.db', 'ucesdb': 'uces.db', 'corpusdb' : 'corpus.db', 'syscoding' : 'utf-8'})
-#corpus.read_corpus()
-#corpus.parse_active(gramact, gramsup)
-#Alceste(corpus).doanalyse()
+#keys = {'art_def' : 2,
+# 'pre' : 2,
+# 'adj_dem' : 2,
+# 'ono' : 2,
+# 'pro_per' : 2,
+# 'ver_sup' : 2,
+# 'adv' : 1,
+# 'ver' : 1,
+# 'adj_ind' : 2,
+# 'adj_pos' : 2,
+# 'aux' : 2,
+# 'adj_int' : 2,
+# 'pro_ind' : 2,
+# 'adj' : 1,
+# 'pro_dem' : 2,
+# 'nom' : 1,
+# 'art_ind' : 2,
+# 'pro_pos' : 2,
+# 'nom_sup' : 2,
+# 'adv_sup' : 2,
+# 'adj_sup' : 2,
+# 'adj_num' : 2,
+# 'pro_rel' : 2,
+# 'con' : 2,
+# 'num' : 2,
+# 'nr' : 1,
+# 'sw' : 2,
+#}
+#
+#gramact = [k for k in keys if keys[k] == 1]
+#gramsup = [k for k in keys if keys[k] == 2]