2 # -*- coding: utf-8 -*-
3 #Author: Pierre Ratinaud
4 #Copyright (c) 2010 Pierre Ratinaud
8 from optparse import OptionParser
13 sys.setdefaultencoding(locale.getpreferredencoding())
14 from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath
15 from functions import ReadLexique, DoConf, History, ReadDicoAsDico
16 from ConfigParser import *
17 #######################################
18 #from textchdalc import AnalyseAlceste
19 #from textdist import PamTxt
20 #from textafcuci import AfcUci
21 from analysetxt import Alceste
22 from corpus import Corpus, copycorpus
23 from textaslexico import Lexico
24 from textstat import Stat
25 from tools import SubCorpus
27 ######################################
29 log = logging.getLogger('iramuteq')
30 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
31 ch = logging.StreamHandler()
32 ch.setFormatter(formatter)
34 log.setLevel(logging.DEBUG)
35 #######################################
39 #cmd = iracmd.CmdLine(args=['-f','/home/pierre/workspace/iramuteq/corpus/lru2.txt','-t','alceste'])
41 AppliPath = os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
42 if os.getenv('HOME') != None:
43 user_home = os.getenv('HOME')
45 user_home = os.getenv('HOMEPATH')
46 UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq'))
49 def __init__(self, args = None) :
50 self.DictPath = ConstructDicoPath(AppliPath)
51 self.ConfigPath = ConstructConfigPath(UserConfigPath)
52 self.syscoding = sys.getdefaultencoding()
53 self.TEMPDIR = tempfile.mkdtemp('iramuteq')
54 self.RscriptsPath = ConstructRscriptsPath(AppliPath)
55 self.PathPath = ConfigParser()
56 self.PathPath.read(self.ConfigPath['path'])
57 self.RPath = self.PathPath.get('PATHS', 'rpath')
58 self.pref = RawConfigParser()
59 self.pref.read(self.ConfigPath['preferences'])
60 self.history = History(os.path.join(UserConfigPath, 'history.db'))
62 parser = OptionParser()
64 parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False)
65 parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default=False)
66 parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration pour l'analyse", metavar="CONF", default=None)
67 parser.add_option("-d", "--confcorp", dest="corpusconfigfile", help="chemin du fichier de configuration pour le corpus", metavar="CONF", default=None)
68 parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding())
69 parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french')
70 parser.add_option("-r", "--read", dest="read", help="lire un corpus", metavar="READ", default = False)
71 parser.add_option("-b", "--build", action="store_true", dest="build", help = "construire un corpus", default = False)
74 (options, args) = parser.parse_args()
76 (options, args) = parser.parse_args(args)
80 if options.configfile is not None:
81 config = DoConf(os.path.abspath(options.configfile)).getoptions()
82 elif options.filename and options.type_analyse :
83 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
84 elif options.read and options.type_analyse :
85 config = DoConf(self.ConfigPath[options.type_analyse]).getoptions()
88 elif options.filename and options.build :
94 if options.filename or options.read :
95 self.corpus_encodage = options.encodage
96 self.corpus_lang = options.language
97 self.keys = DoConf(self.ConfigPath['key']).getoptions()
100 ReadLexique(self, lang = options.language)
101 self.expressions = ReadDicoAsDico(self.DictPath.get(options.language + '_exp', 'french_exp'))
102 gramact = [k for k in self.keys if self.keys[k] == 1]
103 gramsup = [k for k in self.keys if self.keys[k] == 2]
105 if options.filename :
106 self.filename = os.path.abspath(options.filename)
107 if options.corpusconfigfile is not None :
108 corpus_parametres = DoConf(options.corpusconfigfile).getoptions('corpus')
110 corpus_parametres = DoConf(self.ConfigPath['corpus']).getoptions()
111 dire, corpus_parametres['filename'] = os.path.split(self.filename)
112 corpus_parametres['originalpath'] = self.filename
113 corpus_parametres['encoding'] = self.corpus_encodage
114 corpus_parametres['syscoding'] = locale.getpreferredencoding()
115 corpus_parametres['pathout'] = PathOut(self.filename, 'corpus').mkdirout()
117 corpus = BuildFromAlceste(self.filename, corpus_parametres, self.lexique, self.expressions).corpus
118 except Exception, txt:
119 log.info('probleme lors de la construction: %s' %txt)
123 self.history.add(corpus.parametres)
124 corpus = copycorpus(corpus)
126 corpus = Corpus(self, parametres = DoConf(options.read).getoptions('corpus'), read = options.read)
127 corpus.parametres['pathout'] = os.path.dirname(os.path.abspath(options.read))
128 pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read)))
131 if corpus is not None :
133 #corpus = SubCorpus(self, corpus, [0,1,2,3,4,5,6,7])
136 corpus.parse_active(gramact, gramsup)
137 for forme in corpus.formes :
138 if corpus.formes[forme].gram == '' or corpus.formes[forme].gram == ' ' :
140 for val in dir(corpus.formes[forme]) :
141 print getattr(corpus.formes[forme], val)
142 #print corpus.getlemconcorde('de').fetchall()
143 # log.warning('ATTENTION gethapaxuces')
144 # MakeUciStat(corpus)
146 #corpus.gethapaxuces()
147 #ucisize = corpus.getucisize()
148 #ucisize = [`val` for val in ucisize]
149 #uciet = [uci.etoiles[1] for uci in corpus.ucis]
150 #res = zip(uciet, ucisize)
151 #with open('ucisize.csv', 'w') as f :
152 # f.write('\n'.join(['\t'.join(val) for val in res]))
153 # self.content = f.read()
154 #self.content = self.content.replace('\r','')
155 if options.type_analyse == 'alceste' :
156 log.debug('ATTENTION : ANALYSE NG')
157 #print corpus.make_etoiles()
159 #corpus.read_corpus()
160 #corpus.parse_active(gramact, gramsup)
161 config['type'] = 'alceste'
162 self.Text = Alceste(self, corpus, parametres = config)
163 # self.Text = AnalyseAlceste(self, cmd = True, big = True)
164 #self.Text = AnalyseAlceste(self, cmd = True)
165 elif options.type_analyse == 'pam' :
166 self.Text = PamTxt(self, cmd = True)
167 elif options.type_analyse == 'afcuci' :
168 self.Text = AfcUci(self, cmd = True)
169 elif options.type_analyse == 'stat' :
170 self.Text = Stat(self, corpus, parametres = {'type':'stat'})
171 elif options.type_analyse == 'spec' :
172 self.Text = Lexico(self, corpus, config = {'type' : 'spec'})
173 #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's'
174 # self.Text.corpus.make_colored_corpus('colored.html')
176 if __name__ == '__main__':