X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=iracmd.py;h=e0fd3dab5907ddd429c9acbb45ca58d1fdcf5f89;hp=deba7cf4319c8504428c62dcd6647087b2df3d23;hb=655d1e1ab00740c37712f476dc89dff7965c2161;hpb=22cd27b2bbe9ab1ffa7ef06fa764b5147ae17dad diff --git a/iracmd.py b/iracmd.py index deba7cf..e0fd3da 100644 --- a/iracmd.py +++ b/iracmd.py @@ -22,7 +22,6 @@ from textaslexico import Lexico from textstat import Stat import tempfile ###################################### -print '#######LOGGING TEST###########' import logging log = logging.getLogger('iramuteq') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') @@ -36,7 +35,7 @@ log.debug('----------TEST corpusNG-----------------') from analysetxt import Alceste, gramact, gramsup from corpusNG import * - +#cmd = iracmd.CmdLine(args=['-f','/home/pierre/workspace/iramuteq/corpus/lru2.txt','-t','alceste']) AppliPath = os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0]))) if os.getenv('HOME') != None: @@ -46,35 +45,54 @@ else: UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq')) class CmdLine : - def __init__(self) : + def __init__(self, args = None) : self.DictPath = ConstructDicoPath(AppliPath) self.ConfigPath = ConstructConfigPath(UserConfigPath) self.syscoding = sys.getdefaultencoding() + self.TEMPDIR = tempfile.mkdtemp('iramuteq') + self.RscriptsPath = ConstructRscriptsPath(AppliPath) + self.PathPath = ConfigParser() + self.PathPath.read(self.ConfigPath['path']) + self.RPath = self.PathPath.get('PATHS', 'rpath') + self.pref = RawConfigParser() + self.pref.read(self.ConfigPath['preferences']) + self.history = History(self.ConfigPath['history']) + parser = OptionParser() parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False) - parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default='False') - - parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration", metavar="CONF", default=False) + parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False) + parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration pour l'analyse", metavar="CONF", default=None) + parser.add_option("-d", "--confcorp", dest="corpusconfigfile", help="chemin du fichier de configuration pour le corpus", metavar="CONF", default=None) parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding()) parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french') parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False) + parser.add_option("-b", "--build", action="store_true", dest="build", help = "construire un corpus", default = False) - (options, args) = parser.parse_args() + if args is None : + (options, args) = parser.parse_args() + else : + (options, args) = parser.parse_args(args) print args print options - if options.configfile : - self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile) - self.TEMPDIR = tempfile.mkdtemp('iramuteq') - self.RscriptsPath = ConstructRscriptsPath(AppliPath) - self.PathPath = ConfigParser() - self.PathPath.read(self.ConfigPath['path']) - self.RPath = self.PathPath.get('PATHS', 'rpath') - self.pref = RawConfigParser() - self.pref.read(self.ConfigPath['preferences']) - self.history = History(self.ConfigPath['history']) + options.type_analyse + if options.configfile is not None: + config = DoConf(os.path.abspath(options.configfile)).getoptions() + elif options.filename and options.type_analyse : + config = DoConf(self.ConfigPath[options.type_analyse]).getoptions() + #self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile) + elif options.read and options.type_analyse : + config = DoConf(self.ConfigPath[options.type_analyse]).getoptions() + elif options.read : + pass + elif options.filename and options.build : + pass + else : + print 'rien a faire' + return + #self.history.write() - if options.filename or options.read or options.build: + if options.filename or options.read :#or options.build: self.corpus_encodage = options.encodage self.corpus_lang = options.language @@ -84,42 +102,66 @@ class CmdLine : self.expressions = ReadDicoAsDico(self.DictPath.get(options.language + '_exp', 'french_exp')) if options.filename : self.filename = os.path.abspath(options.filename) - corpus_parametres = DoConf('/home/pierre/.iramuteq/corpus.cfg').getoptions('corpus') - corpus_parametres['filename'] = self.filename + if options.corpusconfigfile is not None : + corpus_parametres = DoConf(options.corpusconfigfile).getoptions('corpus') + else : + corpus_parametres = DoConf(self.ConfigPath['corpus']).getoptions() + dire, corpus_parametres['filename'] = os.path.split(self.filename) + corpus_parametres['originalpath'] = self.filename corpus_parametres['encoding'] = self.corpus_encodage - corpus_parametres['syscoding'] = 'utf8' - corpus_parametres['pathout'] = PathOut(options.filename, 'corpus').dirout - corpus = BuildFromAlceste(self.filename, corpus_parametres, self.lexique, self.expressions).corpus - self.history.add(corpus.parametres) + corpus_parametres['syscoding'] = locale.getpreferredencoding() + corpus_parametres['pathout'] = PathOut(self.filename, 'corpus').mkdirout() + try : + corpus = BuildFromAlceste(self.filename, corpus_parametres, self.lexique, self.expressions).corpus + except Exception, txt: + log.info('probleme lors de la construction: %s' %txt) + corpus = None + raise + else : + self.history.add(corpus.parametres) + corpus = copycorpus(corpus) #with codecs.open(self.filename, 'r', self.corpus_encodage) as f: elif options.read : corpus = Corpus(self, parametres = DoConf(options.read).getoptions('corpus'), read = options.read) corpus.parametres['path'] = os.path.abspath(options.read) pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read))) + self.corpus = corpus - - corpus.conn_all() - corpus.parse_active(gramact, gramsup) + if corpus is not None : + corpus.conn_all() + corpus.make_lems() + corpus.parse_active(gramact, gramsup) +# log.warning('ATTENTION gethapaxuces') +# MakeUciStat(corpus) +# qfqsdf + #corpus.gethapaxuces() + #ucisize = corpus.getucisize() + #ucisize = [`val` for val in ucisize] + #uciet = [uci.etoiles[1] for uci in corpus.ucis] + #res = zip(uciet, ucisize) + #with open('ucisize.csv', 'w') as f : + # f.write('\n'.join(['\t'.join(val) for val in res])) # self.content = f.read() #self.content = self.content.replace('\r','') - if options.type_analyse == 'alceste' : - log.debug('ATTENTION : ANALYSE NG') - #print corpus.make_etoiles() - #zerzre - #corpus.read_corpus() - #corpus.parse_active(gramact, gramsup) - Alceste(self, corpus) - # self.Text = AnalyseAlceste(self, cmd = True, big = True) - #self.Text = AnalyseAlceste(self, cmd = True) - elif options.type_analyse == 'pam' : - self.Text = PamTxt(self, cmd = True) - elif options.type_analyse == 'afcuci' : - self.Text = AfcUci(self, cmd = True) - elif options.type_analyse == 'stat' : - self.Text = Stat(self, corpus, config = {'type' : 'stat'}) - elif options.type_analyse == 'spec' : - self.Text = Lexico(self, corpus, config = {'type' : 'spec'}) + if options.type_analyse == 'alceste' : + log.debug('ATTENTION : ANALYSE NG') + #print corpus.make_etoiles() + #zerzre + #corpus.read_corpus() + #corpus.parse_active(gramact, gramsup) + config['type'] = 'alceste' + self.Text = Alceste(self, corpus, parametres = config) + # self.Text = AnalyseAlceste(self, cmd = True, big = True) + #self.Text = AnalyseAlceste(self, cmd = True) + elif options.type_analyse == 'pam' : + self.Text = PamTxt(self, cmd = True) + elif options.type_analyse == 'afcuci' : + self.Text = AfcUci(self, cmd = True) + elif options.type_analyse == 'stat' : + self.Text = Stat(self, corpus, parametres = config) + elif options.type_analyse == 'spec' : + self.Text = Lexico(self, corpus, config = {'type' : 'spec'}) #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's' # self.Text.corpus.make_colored_corpus('colored.html')