2 # -*- coding: utf-8 -*-
3 #Author: Pierre Ratinaud
4 #Copyright (c) 2010 Pierre Ratinaud
8 from optparse import OptionParser
13 sys.setdefaultencoding(locale.getpreferredencoding())
14 from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath, PathOut
15 from functions import ReadLexique, DoConf, History, ReadDicoAsDico
16 from ConfigParser import *
17 #######################################
18 #from textchdalc import AnalyseAlceste
19 #from textdist import PamTxt
20 #from textafcuci import AfcUci
21 from analysetxt import Alceste
22 from corpus import Corpus, copycorpus, BuildFromAlceste
23 from textaslexico import Lexico
24 from textstat import Stat
25 from tools import SubCorpus
26 from textsimi import SimiTxt
28 ######################################
30 log = logging.getLogger('iramuteq')
31 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
32 ch = logging.StreamHandler()
33 ch.setFormatter(formatter)
35 log.setLevel(logging.DEBUG)
36 #######################################
40 #cmd = iracmd.CmdLine(args=['-f','/home/pierre/workspace/iramuteq/corpus/lru2.txt','-t','alceste'])
42 AppliPath = os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
43 if os.getenv('HOME') != None:
44 user_home = os.getenv('HOME')
46 user_home = os.getenv('HOMEPATH')
47 UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq'))
50 def __init__(self, args = None, AppliPath = None, parametres = None) :
52 self.DictPath = ConstructDicoPath(AppliPath)
53 self.ConfigPath = ConstructConfigPath(UserConfigPath)
54 self.syscoding = sys.getdefaultencoding()
55 self.TEMPDIR = tempfile.mkdtemp('iramuteq')
56 self.RscriptsPath = ConstructRscriptsPath(AppliPath)
57 self.PathPath = ConfigParser()
58 self.PathPath.read(self.ConfigPath['path'])
59 self.RPath = self.PathPath.get('PATHS', 'rpath')
60 self.pref = RawConfigParser()
61 self.pref.read(self.ConfigPath['preferences'])
62 self.history = History(os.path.join(UserConfigPath, 'history.db'))
64 # self.history.clean()
66 parser = OptionParser()
68 parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False)
69 parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False)
70 parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration pour l'analyse", metavar="CONF", default=None)
71 parser.add_option("-d", "--confcorp", dest="corpusconfigfile", help="chemin du fichier de configuration pour le corpus", metavar="CONF", default=None)
72 parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding())
73 parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french')
74 parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False)
75 parser.add_option("-b", "--build", action="store_true", dest="build", help = "construire un corpus", default = False)
78 (options, args) = parser.parse_args()
80 (options, args) = parser.parse_args(args)
84 if options.configfile is not None:
85 config = DoConf(os.path.abspath(options.configfile)).getoptions()
86 elif options.filename and options.type_analyse :
87 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
88 elif options.read and options.type_analyse :
89 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
92 elif options.filename and options.build :
98 if options.filename or options.read :
99 self.corpus_encodage = options.encodage
100 self.corpus_lang = options.language
101 self.keys = DoConf(self.ConfigPath['key']).getoptions()
104 ReadLexique(self, lang = options.language)
105 self.expressions = ReadDicoAsDico(self.DictPath.get(options.language + '_exp', 'french_exp'))
106 gramact = [k for k in self.keys if self.keys[k] == 1]
107 gramsup = [k for k in self.keys if self.keys[k] == 2]
109 if options.filename :
110 self.filename = os.path.abspath(options.filename)
111 if options.corpusconfigfile is not None :
112 corpus_parametres = DoConf(options.corpusconfigfile).getoptions('corpus')
114 corpus_parametres = DoConf(self.ConfigPath['corpus']).getoptions()
115 dire, corpus_parametres['filename'] = os.path.split(self.filename)
116 corpus_parametres['originalpath'] = self.filename
117 corpus_parametres['encoding'] = self.corpus_encodage
118 corpus_parametres['syscoding'] = locale.getpreferredencoding()
119 corpus_parametres['pathout'] = PathOut(self.filename, 'corpus').mkdirout()
121 corpus = BuildFromAlceste(self.filename, corpus_parametres, self.lexique, self.expressions).corpus
122 except Exception, txt:
123 log.info('probleme lors de la construction: %s' %txt)
127 self.history.add(corpus.parametres)
128 corpus = copycorpus(corpus)
130 corpus = Corpus(self, parametres = DoConf(options.read).getoptions('corpus'), read = options.read)
131 corpus.parametres['pathout'] = os.path.dirname(os.path.abspath(options.read))
132 pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read)))
136 if corpus is not None :
138 #corpus = SubCorpus(self, corpus, [0,1,2,3,4,5,6,7])
141 corpus.parse_active(gramact, gramsup)
142 #print corpus.getlemconcorde('de').fetchall()
143 # log.warning('ATTENTION gethapaxuces')
144 # MakeUciStat(corpus)
145 #corpus.gethapaxuces()
146 ucisize = corpus.getucisize()
147 ucisize = [`val` for val in ucisize]
148 uciet = [[val.split('_')[1] for val in uci.etoiles[1:]] for uci in corpus.ucis]
150 # print '\t'.join(line)
151 #res = zip(uciet, ucisize)
152 res = [uciet[i] + [ucisize[i]] for i, val in enumerate(uciet)]
154 with open('ucisize.csv', 'w') as f :
155 f.write('\n'.join(['\t'.join(val) for val in res]))
156 # self.content = f.read()
157 #self.content = self.content.replace('\r','')
158 if options.type_analyse == 'alceste' :
159 log.debug('ATTENTION : ANALYSE NG')
160 #print corpus.make_etoiles()
162 #corpus.read_corpus()
163 #corpus.parse_active(gramact, gramsup)
164 config['type'] = 'alceste'
165 self.Text = Alceste(self, corpus, parametres = config)
166 # self.Text = AnalyseAlceste(self, cmd = True, big = True)
167 #self.Text = AnalyseAlceste(self, cmd = True)
168 elif options.type_analyse == 'pam' :
169 self.Text = PamTxt(self, cmd = True)
170 elif options.type_analyse == 'afcuci' :
171 self.Text = AfcUci(self, cmd = True)
172 elif options.type_analyse == 'stat' :
173 self.Text = Stat(self, corpus, parametres = {'type':'stat'})
174 elif options.type_analyse == 'spec' :
175 self.Text = Lexico(self, corpus, config = {'type' : 'spec'})
176 elif options.type_analyse == 'simitxt' :
177 self.Text = SimiTxt(self, corpus, parametres = parametres)
178 #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's'
179 # self.Text.corpus.make_colored_corpus('colored.html')
181 if __name__ == '__main__':
183 CmdLine(AppliPath = AppliPath)