X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=iracmd.py;h=aefb09daa3a983cbdda233ee393069d5a6e4e1a5;hp=e0fd3dab5907ddd429c9acbb45ca58d1fdcf5f89;hb=3d1a621b481e251f8a086af586c7eb0bb87b0004;hpb=a503f041dc4947ee21c1d353ddd05ddb13a5e322 diff --git a/iracmd.py b/iracmd.py index e0fd3da..aefb09d 100644 --- a/iracmd.py +++ b/iracmd.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2010 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL import os from optparse import OptionParser @@ -11,15 +11,19 @@ reload(sys) import locale import codecs sys.setdefaultencoding(locale.getpreferredencoding()) -from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath -from functions import ReadLexique, DoConf, History +from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath, PathOut +from functions import ReadLexique, DoConf, History, ReadDicoAsDico from ConfigParser import * ####################################### #from textchdalc import AnalyseAlceste #from textdist import PamTxt #from textafcuci import AfcUci +from textreinert import Reinert +from corpus import Corpus, copycorpus, BuildFromAlceste, BuildSubCorpus from textaslexico import Lexico from textstat import Stat +from tools import SubCorpus +from textsimi import SimiTxt import tempfile ###################################### import logging @@ -31,9 +35,7 @@ log.addHandler(ch) log.setLevel(logging.DEBUG) ####################################### -log.debug('----------TEST corpusNG-----------------') -from analysetxt import Alceste, gramact, gramsup -from corpusNG import * + #cmd = iracmd.CmdLine(args=['-f','/home/pierre/workspace/iramuteq/corpus/lru2.txt','-t','alceste']) @@ -45,7 +47,8 @@ else: UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq')) class CmdLine : - def __init__(self, args = None) : + def __init__(self, args = None, AppliPath = None, parametres = None) : + AppliPath = AppliPath self.DictPath = ConstructDicoPath(AppliPath) self.ConfigPath = ConstructConfigPath(UserConfigPath) self.syscoding = sys.getdefaultencoding() @@ -56,7 +59,9 @@ class CmdLine : self.RPath = self.PathPath.get('PATHS', 'rpath') self.pref = RawConfigParser() self.pref.read(self.ConfigPath['preferences']) - self.history = History(self.ConfigPath['history']) + self.history = History(os.path.join(UserConfigPath, 'history.db')) + print 'CLEAN HISTORY' +# self.history.clean() parser = OptionParser() @@ -80,7 +85,6 @@ class CmdLine : config = DoConf(os.path.abspath(options.configfile)).getoptions() elif options.filename and options.type_analyse : config = DoConf(self.ConfigPath[options.type_analyse]).getoptions() - #self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile) elif options.read and options.type_analyse : config = DoConf(self.ConfigPath[options.type_analyse]).getoptions() elif options.read : @@ -91,15 +95,17 @@ class CmdLine : print 'rien a faire' return - #self.history.write() - if options.filename or options.read :#or options.build: + if options.filename or options.read : self.corpus_encodage = options.encodage self.corpus_lang = options.language - + self.keys = DoConf(self.ConfigPath['key']).getoptions() + - #print 'PAS DE CODECS POUR CABLE' ReadLexique(self, lang = options.language) self.expressions = ReadDicoAsDico(self.DictPath.get(options.language + '_exp', 'french_exp')) + gramact = [k for k in self.keys if self.keys[k] == 1] + gramsup = [k for k in self.keys if self.keys[k] == 2] + if options.filename : self.filename = os.path.abspath(options.filename) if options.corpusconfigfile is not None : @@ -120,28 +126,39 @@ class CmdLine : else : self.history.add(corpus.parametres) corpus = copycorpus(corpus) - - #with codecs.open(self.filename, 'r', self.corpus_encodage) as f: elif options.read : corpus = Corpus(self, parametres = DoConf(options.read).getoptions('corpus'), read = options.read) - corpus.parametres['path'] = os.path.abspath(options.read) + corpus.parametres['pathout'] = os.path.dirname(os.path.abspath(options.read)) pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read))) self.corpus = corpus + print self.corpus + corpus.parametres['pathout'] = '/home/pierre/fac/etudiant/verdier/corpus20_corpus_2/test2' + BuildSubCorpus(corpus, parametres = {'fromthem' : True, 'theme' : [u'-*thématique_idéal']}) if corpus is not None : corpus.conn_all() + #corpus = SubCorpus(self, corpus, [0,1,2,3,4,5,6,7]) + #corpus.conn_all() corpus.make_lems() corpus.parse_active(gramact, gramsup) + #print corpus.getlemconcorde('de').fetchall() # log.warning('ATTENTION gethapaxuces') # MakeUciStat(corpus) -# qfqsdf #corpus.gethapaxuces() - #ucisize = corpus.getucisize() - #ucisize = [`val` for val in ucisize] - #uciet = [uci.etoiles[1] for uci in corpus.ucis] - #res = zip(uciet, ucisize) - #with open('ucisize.csv', 'w') as f : - # f.write('\n'.join(['\t'.join(val) for val in res])) + # ucisize = corpus.getucisize() + # ucisize = [`val` for val in ucisize] + #uciet = [uci.etoiles[1:] for uci in corpus.ucis] + #uceet = [corpus.ucis[uce.uci].etoiles[1:] for uci in corpus.ucis for uce in uci.uces] + #print uceet[0:10] + #for line in uceet : + # print '\t'.join(line) + #res = zip(uciet, ucisize) + # res = [uciet[i] + [ucisize[i]] for i, val in enumerate(uciet)] + # print res[0:10] + #ucesize = corpus.getucesize() + #print ucesize[0:40] + #with open('sentences_size.csv', 'w') as f : + # f.write('\n'.join([`val` for val in ucesize])) # self.content = f.read() #self.content = self.content.replace('\r','') if options.type_analyse == 'alceste' : @@ -151,7 +168,7 @@ class CmdLine : #corpus.read_corpus() #corpus.parse_active(gramact, gramsup) config['type'] = 'alceste' - self.Text = Alceste(self, corpus, parametres = config) + self.Text = Reinert(self, corpus, parametres = config) # self.Text = AnalyseAlceste(self, cmd = True, big = True) #self.Text = AnalyseAlceste(self, cmd = True) elif options.type_analyse == 'pam' : @@ -159,13 +176,15 @@ class CmdLine : elif options.type_analyse == 'afcuci' : self.Text = AfcUci(self, cmd = True) elif options.type_analyse == 'stat' : - self.Text = Stat(self, corpus, parametres = config) + self.Text = Stat(self, corpus, parametres = {'type':'stat'}) elif options.type_analyse == 'spec' : self.Text = Lexico(self, corpus, config = {'type' : 'spec'}) + elif options.type_analyse == 'simitxt' : + self.Text = SimiTxt(self, corpus, parametres = parametres) #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's' # self.Text.corpus.make_colored_corpus('colored.html') if __name__ == '__main__': __name__ = 'Main' - CmdLine() + CmdLine(AppliPath = AppliPath)