X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpus.py;h=5a18f628b0fcd0a5118ffbdbedd2d52362322719;hp=76ebb660911223bf31496b8d0a23aea56c4d2290;hb=278fceaa7db7b84d7c6f3bbd3f86e5ddb0ebda09;hpb=432118f2ac3d2f8234c388e77d0fb9e14234750f diff --git a/corpus.py b/corpus.py index 76ebb66..5a18f62 100644 --- a/corpus.py +++ b/corpus.py @@ -77,10 +77,10 @@ class Corpus : gramtype = self.parent.lexique[word][1] lem = self.parent.lexique[word][0] elif word.isdigit() : - gramtype = 'num' + gramtype = u'num' lem = word else : - gramtype = 'nr' + gramtype = u'nr' lem = word self.formes[word] = Word(word, gramtype, len(self.formes), lem) self.idformesuces[self.formes[word].ident] = {self.ucis[-1].uces[-1].ident : 1} @@ -514,7 +514,7 @@ class Corpus : self.lems[lem].act = 2 elif self.lems[lem].gram in gramact : self.lems[lem].act = 1 - elif gramsup is not None : + elif gramsup is not None and self.lems[lem].gram not in gramact: if self.lems[lem].gram in gramsup : self.lems[lem].act = 2 else : @@ -1081,7 +1081,7 @@ class BuildCorpus : def firstclean(self, txt) : txt = txt.replace(u'’',"'") txt = txt.replace(u'œ', u'oe') - return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', ' £$£ ') + return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', u' £$£ ') def make_cleans(self, txt) : for clean in self.cleans :