X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpus.py;h=0635f2a3f09675b513094ab2c3078f811587c1b9;hp=cd6c36467b587157ab7bbc3b36a3c2efd5edf608;hb=95be18f0436f49152f463280dbe016a3ecd3dbd0;hpb=e531d59ce2d8f72dfc3138446db913af1f20b134 diff --git a/corpus.py b/corpus.py index cd6c364..0635f2a 100644 --- a/corpus.py +++ b/corpus.py @@ -275,8 +275,12 @@ class Corpus : if self.iduces is None : self.iduces = dict([[uce.ident, uce] for uci in self.ucis for uce in uci.uces]) - def make_lexitable(self, mineff, etoiles) : - tokeep = [lem for lem in self.lems if self.lems[lem].freq >= mineff] + def make_lexitable(self, mineff, etoiles, gram = 0) : + if gram == 0 : + grams = {1:'', 2:''} + else : + grams = {gram :''} + tokeep = [lem for lem in self.lems if self.lems[lem].freq >= mineff and self.lems[lem].act in grams] etuces = [[] for et in etoiles] for uci in self.ucis : get = list(set(uci.etoiles).intersection(etoiles)) @@ -434,7 +438,7 @@ class Corpus : f.write(guce.encode(self.parametres['syscoding']) + '\n\n') def export_classe(self, outf, classe, lem = False) : - sts = self.lc[classe] + sts = self.lc[classe - 1] res = self.getconcorde(sts) self.make_iduces() with open(outf, 'w') as f : @@ -1081,7 +1085,7 @@ class BuildCorpus : def firstclean(self, txt) : txt = txt.replace(u'’',"'") txt = txt.replace(u'œ', u'oe') - return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', ' £$£ ') + return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', u' £$£ ') def make_cleans(self, txt) : for clean in self.cleans :