From 81594f689f1e191599d96a2a503fbb5529df69d7 Mon Sep 17 00:00:00 2001 From: Pierre Date: Mon, 22 Oct 2012 22:19:01 +0200 Subject: [PATCH] ... --- corpusNG.py | 17 ++++++++++++++++- functions.py | 4 +++- iracmd.py | 27 ++++++++++++++++++--------- openanalyse.py | 1 + 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/corpusNG.py b/corpusNG.py index 380b9a2..a950d0f 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -547,6 +547,21 @@ class Corpus : if len(l) > taille_limite : l = l[-taille_limite:] return l + + def find_segments_in_classe(self, list_uce, taille_segment, taille_limite): + d={} + for uce in self.getconcorde(list_uce) : + uce = uce[1].split() + d =self.count_from_list([' '.join(uce[i:i+taille_segment]) for i in range(len(uce)-(taille_segment - 1))], d) + l = [[d[val], val, taille_segment] for val in d if d[val] >= 3] + del(d) + l.sort() + if len(l) > taille_limite : + l = l[-taille_limite:] + return l + + + def make_ucecl_from_R(self, filein) : with open(filein, 'rU') as f : @@ -983,7 +998,7 @@ class BuildFromAlceste(BuildCorpus) : self.limitshow = 0 else : self.limitshow = self.last / 100000 - log.debug(`iduci`, `idpara`, `iduce`) + log.debug(' '.join([`iduci`,`idpara`,`iduce`])) if self.last > self.lim : self.backup_uce() self.last = 0 diff --git a/functions.py b/functions.py index bb7e906..0e5edfd 100644 --- a/functions.py +++ b/functions.py @@ -69,7 +69,7 @@ class History : def write(self) : sections = self.corpora.keys() + self.analyses.keys() - parametres = [self.corpora[key] for key in self.corpora.keys() if key != 'analyses'] + [self.analyses[key] for key in self.analyses.keys()] + parametres = [self.corpora[key] for key in self.corpora.keys()] + [self.analyses[key] for key in self.analyses.keys()] self.conf.makeoptions(sections, parametres) log.info('write history') @@ -161,6 +161,8 @@ class DoConf : self.conf.set(section, option, parametres[i][option].encode('utf8')) elif isinstance(parametres[i][option], wx.Colour) : self.conf.set(section, option, str(parametres[i][option])) + elif option == 'analyses' : + pass else : self.conf.set(section, option, `parametres[i][option]`) if outfile is None : diff --git a/iracmd.py b/iracmd.py index 244f41f..ad31bed 100644 --- a/iracmd.py +++ b/iracmd.py @@ -22,7 +22,6 @@ from textaslexico import Lexico from textstat import Stat import tempfile ###################################### -print '#######LOGGING TEST###########' import logging log = logging.getLogger('iramuteq') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') @@ -54,8 +53,8 @@ class CmdLine : parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False) parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False) - - parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration", metavar="CONF", default=None) + parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration pour l'analyse", metavar="CONF", default=None) + parser.add_option("-d", "--confcorp", dest="corpusconfigfile", help="chemin du fichier de configuration pour le corpus", metavar="CONF", default=None) parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding()) parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french') parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False) @@ -66,9 +65,14 @@ class CmdLine : options.type_analyse if options.configfile is not None: config = DoConf(os.path.abspath(options.configfile)).getoptions() - elif options.type_analyse : + elif options.filename and options.type_analyse : config = DoConf(self.ConfigPath[options.type_analyse]).getoptions() #self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile) + elif options.read : + pass + else : + print 'rien a faire' + return self.TEMPDIR = tempfile.mkdtemp('iramuteq') self.RscriptsPath = ConstructRscriptsPath(AppliPath) self.PathPath = ConfigParser() @@ -78,7 +82,7 @@ class CmdLine : self.pref.read(self.ConfigPath['preferences']) self.history = History(self.ConfigPath['history']) #self.history.write() - if options.filename or options.read or options.build: + if options.filename or options.read :#or options.build: self.corpus_encodage = options.encodage self.corpus_lang = options.language @@ -88,13 +92,18 @@ class CmdLine : self.expressions = ReadDicoAsDico(self.DictPath.get(options.language + '_exp', 'french_exp')) if options.filename : self.filename = os.path.abspath(options.filename) - corpus_parametres = DoConf('/home/pierre/.iramuteq/corpus.cfg').getoptions('corpus') - corpus_parametres['filename'] = self.filename + if options.corpusconfigfile is not None : + corpus_parametres = DoConf(options.corpusconfigfile).getoptions('corpus') + else : + corpus_parametres = DoConf(self.ConfigPath['corpus']).getoptions() + dire, corpus_parametres['filename'] = os.path.split(self.filename) + corpus_parametres['originalpath'] = self.filename corpus_parametres['encoding'] = self.corpus_encodage - corpus_parametres['syscoding'] = 'utf8' - corpus_parametres['pathout'] = PathOut(options.filename, 'corpus').dirout + corpus_parametres['syscoding'] = locale.getpreferredencoding() + corpus_parametres['pathout'] = PathOut(self.filename, 'corpus').mkdirout() corpus = BuildFromAlceste(self.filename, corpus_parametres, self.lexique, self.expressions).corpus self.history.add(corpus.parametres) + corpus = copycorpus(corpus) #with codecs.open(self.filename, 'r', self.corpus_encodage) as f: elif options.read : diff --git a/openanalyse.py b/openanalyse.py index 3787fe6..cc50ebe 100644 --- a/openanalyse.py +++ b/openanalyse.py @@ -59,6 +59,7 @@ class OpenAnalyse(): self.parent.tree.OnItemAppend(self.conf) if self.conf['uuid'] in self.parent.history.openedcorpus : log.info('corpus is already opened') + self.doopen(self.parent.history.openedcorpus[self.conf['uuid']]) else : corpus = Corpus(self, parametres = self.conf, read = self.parent.history.history[self.conf['uuid']]['ira']) self.parent.history.openedcorpus[self.conf['uuid']] = corpus -- 2.7.4