X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpusNG.py;h=5e7ba26ae751ae92b4e42970dbbdc4b80b58648f;hp=11961e109bf5c7662aba1fcbce2ba34208358415;hb=25061ca972d5be7a8ef3aa42276accc3761a3364;hpb=f4cba135ed6aed06ee15e9d41a9091101a4c1bef diff --git a/corpusNG.py b/corpusNG.py index 11961e1..5e7ba26 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -527,6 +527,17 @@ class Corpus : det[et[0]] = 1 return det + def make_etline(self, listet) : + etuces = [[] for et in listet] + for uci in self.ucis : + get = list(set(uci.etoiles).intersection(listet)) + if len(get) > 1 : + return '2 variables sur la meme ligne' + elif get != [] : + etuces[listet.index(get[0])] += [uce.ident for uce in uci.uces] + return etuces + + def make_and_write_profile_et(self, ucecl, fileout) : log.info('etoiles/classes') etoiles = self.make_etoiles() @@ -985,11 +996,9 @@ class BuildCorpus : class BuildFromAlceste(BuildCorpus) : - #def __init___(self, infile, parametres_corpus) : - # BuildCorpus.__init__(self, infile, parametres_corpus) - - def read_corpus(self, infile) : + if self.dlg is not None : + self.dlg.Pulse('textes : 0 - segments : 0') self.limitshow = 0 self.count = 1 if self.corpus.parametres['ucimark'] == 0 : @@ -1000,11 +1009,9 @@ class BuildFromAlceste(BuildCorpus) : iduci = -1 idpara = -1 iduce = -1 - linenb = 0 try : with codecs.open(infile, 'r', self.encoding) as f : - for line in f : - linenb += 1 + for linenb, line in enumerate(f) : line = line.rstrip('\n\r') if self.testuci(line) : iduci += 1 @@ -1015,9 +1022,14 @@ class BuildFromAlceste(BuildCorpus) : else : if iduci > 0 : if self.corpus.ucis[-1].uces == [] : - log.info('linenb : %i' % linenb) - raise Exception("EmptyText %i" % linenb) + log.info(u'Empty text : %i' % linenb) + iduci -= 1 + self.corpus.ucis.pop() + #raise Exception("EmptyText %i" % linenb) self.corpus.ucis.append(Uci(iduci, line)) + if self.dlg is not None : + if not (iduci + 1) % 10 : + self.dlg.Pulse('textes : %i - segments : %i' % (iduci + 1, iduce +1)) elif line.startswith(u'-*') : if iduci != -1 : if txt != [] : @@ -1037,7 +1049,7 @@ class BuildFromAlceste(BuildCorpus) : if iduci != -1 and iduce != -1: self.backup_uce() else : - log.info(_(u"No Texte in corpora. Are you sure of the formatting ?")) + log.info(_(u"No Text in corpora. Are you sure of the formatting ?")) raise Exception('TextBeforeTextMark') except UnicodeDecodeError : raise Exception("CorpusEncoding") @@ -1065,13 +1077,13 @@ class BuildFromAlceste(BuildCorpus) : for word in uce : self.last += 1 self.corpus.add_word(word) - if self.dlg is not None : - if self.limitshow > self.count : - self.dlg.Pulse('uci: %i - uce : %i' % (iduci + 1, iduce +1)) - self.count += 1 - self.limitshow = 0 - else : - self.limitshow = self.last / 100000 + #if self.dlg is not None : + # if self.limitshow > self.count : + # self.dlg.Pulse('textes : %i - segments : %i' % (iduci + 1, iduce +1)) + # self.count += 1 + # self.limitshow = 0 + # else : + # self.limitshow = self.last / 100000 log.debug(' '.join([`iduci`,`idpara`,`iduce`])) if self.last > self.lim : self.backup_uce()