2 # -*- coding: utf-8 -*-
3 #Author: Pierre Ratinaud
4 #Copyright (c) 2010 Pierre Ratinaud
8 from optparse import OptionParser
13 sys.setdefaultencoding(locale.getpreferredencoding())
14 from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath
15 from functions import ReadLexique, DoConf, History, ReadDicoAsDico
16 from ConfigParser import *
17 #######################################
18 #from textchdalc import AnalyseAlceste
19 #from textdist import PamTxt
20 #from textafcuci import AfcUci
21 from analysetxt import Alceste
22 from corpus import Corpus, copycorpus
23 from textaslexico import Lexico
24 from textstat import Stat
25 from tools import SubCorpus
27 ######################################
29 log = logging.getLogger('iramuteq')
30 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
31 ch = logging.StreamHandler()
32 ch.setFormatter(formatter)
34 log.setLevel(logging.DEBUG)
35 #######################################
39 #cmd = iracmd.CmdLine(args=['-f','/home/pierre/workspace/iramuteq/corpus/lru2.txt','-t','alceste'])
41 AppliPath = os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
42 if os.getenv('HOME') != None:
43 user_home = os.getenv('HOME')
45 user_home = os.getenv('HOMEPATH')
46 UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq'))
49 def __init__(self, args = None, AppliPath = None) :
51 self.DictPath = ConstructDicoPath(AppliPath)
52 self.ConfigPath = ConstructConfigPath(UserConfigPath)
53 self.syscoding = sys.getdefaultencoding()
54 self.TEMPDIR = tempfile.mkdtemp('iramuteq')
55 self.RscriptsPath = ConstructRscriptsPath(AppliPath)
56 self.PathPath = ConfigParser()
57 self.PathPath.read(self.ConfigPath['path'])
58 self.RPath = self.PathPath.get('PATHS', 'rpath')
59 self.pref = RawConfigParser()
60 self.pref.read(self.ConfigPath['preferences'])
61 self.history = History(os.path.join(UserConfigPath, 'history.db'))
63 parser = OptionParser()
65 parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False)
66 parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False)
67 parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration pour l'analyse", metavar="CONF", default=None)
68 parser.add_option("-d", "--confcorp", dest="corpusconfigfile", help="chemin du fichier de configuration pour le corpus", metavar="CONF", default=None)
69 parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding())
70 parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french')
71 parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False)
72 parser.add_option("-b", "--build", action="store_true", dest="build", help = "construire un corpus", default = False)
75 (options, args) = parser.parse_args()
77 (options, args) = parser.parse_args(args)
81 if options.configfile is not None:
82 config = DoConf(os.path.abspath(options.configfile)).getoptions()
83 elif options.filename and options.type_analyse :
84 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
85 elif options.read and options.type_analyse :
86 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
89 elif options.filename and options.build :
95 if options.filename or options.read :
96 self.corpus_encodage = options.encodage
97 self.corpus_lang = options.language
98 self.keys = DoConf(self.ConfigPath['key']).getoptions()
101 ReadLexique(self, lang = options.language)
102 self.expressions = ReadDicoAsDico(self.DictPath.get(options.language + '_exp', 'french_exp'))
103 gramact = [k for k in self.keys if self.keys[k] == 1]
104 gramsup = [k for k in self.keys if self.keys[k] == 2]
106 if options.filename :
107 self.filename = os.path.abspath(options.filename)
108 if options.corpusconfigfile is not None :
109 corpus_parametres = DoConf(options.corpusconfigfile).getoptions('corpus')
111 corpus_parametres = DoConf(self.ConfigPath['corpus']).getoptions()
112 dire, corpus_parametres['filename'] = os.path.split(self.filename)
113 corpus_parametres['originalpath'] = self.filename
114 corpus_parametres['encoding'] = self.corpus_encodage
115 corpus_parametres['syscoding'] = locale.getpreferredencoding()
116 corpus_parametres['pathout'] = PathOut(self.filename, 'corpus').mkdirout()
118 corpus = BuildFromAlceste(self.filename, corpus_parametres, self.lexique, self.expressions).corpus
119 except Exception, txt:
120 log.info('probleme lors de la construction: %s' %txt)
124 self.history.add(corpus.parametres)
125 corpus = copycorpus(corpus)
127 corpus = Corpus(self, parametres = DoConf(options.read).getoptions('corpus'), read = options.read)
128 corpus.parametres['pathout'] = os.path.dirname(os.path.abspath(options.read))
129 pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read)))
132 if corpus is not None :
134 #corpus = SubCorpus(self, corpus, [0,1,2,3,4,5,6,7])
137 corpus.parse_active(gramact, gramsup)
138 for forme in corpus.formes :
139 if corpus.formes[forme].gram == '' or corpus.formes[forme].gram == ' ' :
141 for val in dir(corpus.formes[forme]) :
142 print getattr(corpus.formes[forme], val)
143 #print corpus.getlemconcorde('de').fetchall()
144 # log.warning('ATTENTION gethapaxuces')
145 # MakeUciStat(corpus)
147 #corpus.gethapaxuces()
148 #ucisize = corpus.getucisize()
149 #ucisize = [`val` for val in ucisize]
150 #uciet = [uci.etoiles[1] for uci in corpus.ucis]
151 #res = zip(uciet, ucisize)
152 #with open('ucisize.csv', 'w') as f :
153 # f.write('\n'.join(['\t'.join(val) for val in res]))
154 # self.content = f.read()
155 #self.content = self.content.replace('\r','')
156 if options.type_analyse == 'alceste' :
157 log.debug('ATTENTION : ANALYSE NG')
158 #print corpus.make_etoiles()
160 #corpus.read_corpus()
161 #corpus.parse_active(gramact, gramsup)
162 config['type'] = 'alceste'
163 self.Text = Alceste(self, corpus, parametres = config)
164 # self.Text = AnalyseAlceste(self, cmd = True, big = True)
165 #self.Text = AnalyseAlceste(self, cmd = True)
166 elif options.type_analyse == 'pam' :
167 self.Text = PamTxt(self, cmd = True)
168 elif options.type_analyse == 'afcuci' :
169 self.Text = AfcUci(self, cmd = True)
170 elif options.type_analyse == 'stat' :
171 self.Text = Stat(self, corpus, parametres = {'type':'stat'})
172 elif options.type_analyse == 'spec' :
173 self.Text = Lexico(self, corpus, config = {'type' : 'spec'})
174 #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's'
175 # self.Text.corpus.make_colored_corpus('colored.html')
177 if __name__ == '__main__':
179 CmdLine(AppliPath = AppliPath)