import locale
import sys
from time import time
-from functions import decoupercharact, ReadDicoAsDico, DoConf, ReadLexique
+from functions import decoupercharact, ReadDicoAsDico, DoConf, ReadLexique, progressbar
import re
import sqlite3
import itertools
query = 'SELECT uces FROM uces where id IN (%s) ORDER BY id' % formesid
res = self.cformes.execute(query)
return list(set(list(itertools.chain(*[[int(val) for val in row[0].split()] if not isinstance(row[0], int) else [row[0]] for row in res]))))
-
+
def gettgenst(self, tgen):
- formesid = ', '.join([`val` for lem in tgen for val in self.lems[lem].formes if lem in self.lems])
- query = 'SELECT uces FROM uces where id IN (%s) ORDER BY id' % formesid
+ formesid = []
+ for lem in tgen :
+ if lem in self.lems :
+ formesid += self.lems[lem].formes
+ else :
+ print 'abscent : %s' % lem
+ query = 'SELECT uces FROM uces where id IN %s ORDER BY id' % str(tuple(formesid))
res = self.cformes.execute(query)
return list(set(list(itertools.chain(*[[int(val) for val in row[0].split()] if not isinstance(row[0], int) else [row[0]] for row in res]))))
+
+ def gettgenstprof(self, tgen, classe, i, clnb):
+ tgenst = []
+ for lem in tgen :
+ if lem in self.lems :
+ lemst = self.getlemuces(lem)
+ tgenst += lemst
+ if not lem in self.tgenlem :
+ self.tgenlem[lem] = [0] * clnb
+ self.tgenlem[lem][i] = len(set(lemst).intersection(classe))
+ else :
+ print 'abscent: ',lem
+ return list(set(tgenst))
def gettgentxt(self, tgen):
sts = self.gettgenst(tgen)
for lem in tokeep :
deff = self.getlemuceseff(lem)
ucesk = deff.keys()
- tab.append([lem] + [sum([deff[uce] for uce in et.intersection(ucesk)]) for et in etuces])
+ line = [lem] + [sum([deff[uce] for uce in et.intersection(ucesk)]) for et in etuces]
+ if sum(line[1:]) >= mineff :
+ tab.append(line)
tab.insert(0, [''] + etoiles)
return tab
def make_tgen_profile(self, tgen, ucecl, uci = False) :
log.info('tgen/classes')
+ self.tgenlem = {}
+ clnb = len(ucecl)
if uci :
- tab = [[lem] + [len(set(self.gettgentxt(tgen[lem])).intersection(classe)) for classe in ucecl] for lem in tgen]
+ #FIXME : NE MARCHE PLUS CHANGER CA
+ tab = [[lem] + [len(set(self.gettgentxt(tgen[lem])).intersection(classe)) for i, classe in enumerate(ucecl)] for lem in tgen]
else :
- tab = [[lem] + [len(set(self.gettgenst(tgen[lem])).intersection(classe)) for classe in ucecl] for lem in tgen]
+ tab = [[lem] + [len(set(self.gettgenstprof(tgen[lem], classe, i, clnb)).intersection(classe)) for i, classe in enumerate(ucecl)] for lem in tgen]
tab = [[line[0]] + [val for val in line[1:]] for line in tab if sum(line[1:]) >= 3]
return tab
#i = 0
ident += 1
f.write('\n'.join([self.ucis[self.iduces[uce[0]].uci].paras[ident].encode(self.parametres['syscoding']), uce[1].encode(self.parametres['syscoding'])]) + '\n')
+ def export_meta_table(self, outf) :
+ metas = [[`i`] + text.etoiles[1:] for i, text in enumerate(self.ucis)]
+ longueur_max = max([len(val) for val in metas])
+ first = ['column_%i' % i for i in range(longueur_max)]
+ metas.insert(0, first)
+ with open(outf, 'w') as f :
+ f.write('\n'.join(['\t'.join(line) for line in metas]).encode(self.parametres['syscoding']))
+
def export_corpus_classes(self, outf, alc = True, lem = False, uci = False) :
ucecl = {}
for i, lc in enumerate(self.lc) :
f.write(''.join([' '.join([`duces[uce]+1`,`i+1`,`1`]),'\n']))
f.seek(0)
with open(outfile, 'w') as ffin :
- ffin.write("%%%%MatrixMarket matrix coordinate integer general\n%i %i %i\n" % (self.getucenb(), len(actives), nbl))
+ ffin.write("%%%%MatrixMarket matrix coordinate integer general\n%i %i %i\n" % (len(uces), len(actives), nbl))
for line in f :
ffin.write(line)
os.remove(outfile + '~')
self.cleans.append(self.dotiret)
def make_expression(self,txt) :
- for expression in self.expressions:
+ exp = self.expressions.keys()
+ exp.sort(reverse=True)
+ for expression in exp :
if expression in txt :
txt = txt.replace(expression, self.expressions[expression][0])
return txt
def __init__(self, parent, dlg = None) :
self.parent = parent
self.dlg = dlg
+
parametres = DoConf(os.path.join(self.parent.UserConfigPath,'corpus.cfg')).getoptions('corpus')
parametres['pathout'] = PathOut(parent.filename, 'corpus').mkdirout()
parametres['corpus_name'] = os.path.split(parametres['pathout'])[1]
dial.txtpath.SetLabel(parent.filename)
#dial.repout_choices.SetValue(parametres['pathout'])
self.res = dial.ShowModal()
+ if self.dlg is not None :
+ self.dlg = progressbar(self.parent, self.dlg)
if self.res == 5100 :
parametres = dial.doparametres()
parametres['originalpath'] = parent.filename