2 # -*- coding: utf-8 -*-
3 #Author: Pierre Ratinaud
4 #Copyright (c) 2010 Pierre Ratinaud
8 from optparse import OptionParser
13 sys.setdefaultencoding(locale.getpreferredencoding())
14 from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath
15 from functions import ReadLexique, DoConf, History, ReadDicoAsDico
16 from ConfigParser import *
17 #######################################
18 #from textchdalc import AnalyseAlceste
19 #from textdist import PamTxt
20 #from textafcuci import AfcUci
21 from analysetxt import Alceste, gramact, gramsup
22 from corpus import Corpus, copycorpus
23 from textaslexico import Lexico
24 from textstat import Stat
25 from tools import SubCorpus
27 ######################################
29 log = logging.getLogger('iramuteq')
30 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
31 ch = logging.StreamHandler()
32 ch.setFormatter(formatter)
34 log.setLevel(logging.DEBUG)
35 #######################################
39 #cmd = iracmd.CmdLine(args=['-f','/home/pierre/workspace/iramuteq/corpus/lru2.txt','-t','alceste'])
41 AppliPath = os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
42 if os.getenv('HOME') != None:
43 user_home = os.getenv('HOME')
45 user_home = os.getenv('HOMEPATH')
46 UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq'))
49 def __init__(self, args = None) :
50 self.DictPath = ConstructDicoPath(AppliPath)
51 self.ConfigPath = ConstructConfigPath(UserConfigPath)
52 self.syscoding = sys.getdefaultencoding()
53 self.TEMPDIR = tempfile.mkdtemp('iramuteq')
54 self.RscriptsPath = ConstructRscriptsPath(AppliPath)
55 self.PathPath = ConfigParser()
56 self.PathPath.read(self.ConfigPath['path'])
57 self.RPath = self.PathPath.get('PATHS', 'rpath')
58 self.pref = RawConfigParser()
59 self.pref.read(self.ConfigPath['preferences'])
60 self.history = History(os.path.join(UserConfigPath, 'history.db'))
62 parser = OptionParser()
64 parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False)
65 parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False)
66 parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration pour l'analyse", metavar="CONF", default=None)
67 parser.add_option("-d", "--confcorp", dest="corpusconfigfile", help="chemin du fichier de configuration pour le corpus", metavar="CONF", default=None)
68 parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding())
69 parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french')
70 parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False)
71 parser.add_option("-b", "--build", action="store_true", dest="build", help = "construire un corpus", default = False)
74 (options, args) = parser.parse_args()
76 (options, args) = parser.parse_args(args)
80 if options.configfile is not None:
81 config = DoConf(os.path.abspath(options.configfile)).getoptions()
82 elif options.filename and options.type_analyse :
83 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
84 elif options.read and options.type_analyse :
85 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
88 elif options.filename and options.build :
94 if options.filename or options.read :
95 self.corpus_encodage = options.encodage
96 self.corpus_lang = options.language
99 ReadLexique(self, lang = options.language)
100 self.expressions = ReadDicoAsDico(self.DictPath.get(options.language + '_exp', 'french_exp'))
101 if options.filename :
102 self.filename = os.path.abspath(options.filename)
103 if options.corpusconfigfile is not None :
104 corpus_parametres = DoConf(options.corpusconfigfile).getoptions('corpus')
106 corpus_parametres = DoConf(self.ConfigPath['corpus']).getoptions()
107 dire, corpus_parametres['filename'] = os.path.split(self.filename)
108 corpus_parametres['originalpath'] = self.filename
109 corpus_parametres['encoding'] = self.corpus_encodage
110 corpus_parametres['syscoding'] = locale.getpreferredencoding()
111 corpus_parametres['pathout'] = PathOut(self.filename, 'corpus').mkdirout()
113 corpus = BuildFromAlceste(self.filename, corpus_parametres, self.lexique, self.expressions).corpus
114 except Exception, txt:
115 log.info('probleme lors de la construction: %s' %txt)
119 self.history.add(corpus.parametres)
120 corpus = copycorpus(corpus)
122 corpus = Corpus(self, parametres = DoConf(options.read).getoptions('corpus'), read = options.read)
123 corpus.parametres['pathout'] = os.path.dirname(os.path.abspath(options.read))
124 pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read)))
127 if corpus is not None :
129 #corpus = SubCorpus(self, corpus, [0,1,2,3,4,5,6,7])
132 corpus.parse_active(gramact, gramsup)
133 #print corpus.getlemconcorde('de').fetchall()
134 # log.warning('ATTENTION gethapaxuces')
135 # MakeUciStat(corpus)
137 #corpus.gethapaxuces()
138 #ucisize = corpus.getucisize()
139 #ucisize = [`val` for val in ucisize]
140 #uciet = [uci.etoiles[1] for uci in corpus.ucis]
141 #res = zip(uciet, ucisize)
142 #with open('ucisize.csv', 'w') as f :
143 # f.write('\n'.join(['\t'.join(val) for val in res]))
144 # self.content = f.read()
145 #self.content = self.content.replace('\r','')
146 if options.type_analyse == 'alceste' :
147 log.debug('ATTENTION : ANALYSE NG')
148 #print corpus.make_etoiles()
150 #corpus.read_corpus()
151 #corpus.parse_active(gramact, gramsup)
152 config['type'] = 'alceste'
153 self.Text = Alceste(self, corpus, parametres = config)
154 # self.Text = AnalyseAlceste(self, cmd = True, big = True)
155 #self.Text = AnalyseAlceste(self, cmd = True)
156 elif options.type_analyse == 'pam' :
157 self.Text = PamTxt(self, cmd = True)
158 elif options.type_analyse == 'afcuci' :
159 self.Text = AfcUci(self, cmd = True)
160 elif options.type_analyse == 'stat' :
161 self.Text = Stat(self, corpus, parametres = {'type':'stat'})
162 elif options.type_analyse == 'spec' :
163 self.Text = Lexico(self, corpus, config = {'type' : 'spec'})
164 #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's'
165 # self.Text.corpus.make_colored_corpus('colored.html')
167 if __name__ == '__main__':