# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2010 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
import os
from optparse import OptionParser
#from textchdalc import AnalyseAlceste
#from textdist import PamTxt
#from textafcuci import AfcUci
-from analysetxt import Alceste
-from corpus import Corpus, copycorpus, BuildFromAlceste
+from textreinert import Reinert
+from corpus import Corpus, copycorpus, BuildFromAlceste, BuildSubCorpus
from textaslexico import Lexico
from textstat import Stat
from tools import SubCorpus
pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read)))
self.corpus = corpus
print self.corpus
+ corpus.parametres['pathout'] = '/home/pierre/fac/etudiant/verdier/corpus20_corpus_2/test2'
+ BuildSubCorpus(corpus, parametres = {'fromthem' : True, 'theme' : [u'-*thématique_idéal']})
if corpus is not None :
corpus.conn_all()
# log.warning('ATTENTION gethapaxuces')
# MakeUciStat(corpus)
#corpus.gethapaxuces()
- ucisize = corpus.getucisize()
- ucisize = [`val` for val in ucisize]
- uciet = [[val.split('_')[1] for val in uci.etoiles[1:]] for uci in corpus.ucis]
- #for line in uciet :
+ # ucisize = corpus.getucisize()
+ # ucisize = [`val` for val in ucisize]
+ #uciet = [uci.etoiles[1:] for uci in corpus.ucis]
+ #uceet = [corpus.ucis[uce.uci].etoiles[1:] for uci in corpus.ucis for uce in uci.uces]
+ #print uceet[0:10]
+ #for line in uceet :
# print '\t'.join(line)
#res = zip(uciet, ucisize)
- res = [uciet[i] + [ucisize[i]] for i, val in enumerate(uciet)]
- print res[0:10]
- with open('ucisize.csv', 'w') as f :
- f.write('\n'.join(['\t'.join(val) for val in res]))
+ # res = [uciet[i] + [ucisize[i]] for i, val in enumerate(uciet)]
+ # print res[0:10]
+ #ucesize = corpus.getucesize()
+ #print ucesize[0:40]
+ #with open('sentences_size.csv', 'w') as f :
+ # f.write('\n'.join([`val` for val in ucesize]))
# self.content = f.read()
#self.content = self.content.replace('\r','')
if options.type_analyse == 'alceste' :
#corpus.read_corpus()
#corpus.parse_active(gramact, gramsup)
config['type'] = 'alceste'
- self.Text = Alceste(self, corpus, parametres = config)
+ self.Text = Reinert(self, corpus, parametres = config)
# self.Text = AnalyseAlceste(self, cmd = True, big = True)
#self.Text = AnalyseAlceste(self, cmd = True)
elif options.type_analyse == 'pam' :