X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpusNG.py;h=11961e109bf5c7662aba1fcbce2ba34208358415;hp=6a027dc1509bbbf6f532538e2d2330d7d182d53b;hb=2569fdce19932457c65854f858191e45018622ea;hpb=a503f041dc4947ee21c1d353ddd05ddb13a5e322 diff --git a/corpusNG.py b/corpusNG.py index 6a027dc..11961e1 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -512,18 +512,20 @@ class Corpus : et = etoile.split('_') if et[0] in det : try : - if et[1] in det[et[0]] : - det[et[0]][et[1]] += 1 + endet = '_'.join(et[1:]) + if endet in det[et[0]] : + det[et[0]][endet] += 1 else : - det[et[0]][et[1]] = 1 + det[et[0]][endet] = 1 except IndexError : det[et[0]] += 1 else : try : - det[et[0]] = {et[1] :1} + endet = '_'.join(et[1:]) + det[et[0]] = {endet :1} except IndexError : det[et[0]] = 1 - print det + return det def make_and_write_profile_et(self, ucecl, fileout) : log.info('etoiles/classes') @@ -906,8 +908,9 @@ class BuildCorpus : self.cleans.append(self.dolower) if self.corpus.parametres.get('firstclean', 1) : self.cleans.append(self.firstclean) - self.rule = self.corpus.parametres.get('keep_caract', u"^a-zA-Z0-9àÀâÂäÄáÁéÉèÈêÊëËìÌîÎïÏòÒôÔöÖùÙûÛüÜçÇßœŒ’ñ.:,;!?*'_") - self.cleans.append(self.docharact) + if self.corpus.parametres['charact'] : + self.rule = self.corpus.parametres.get('keep_caract', u"^a-zA-Z0-9àÀâÂäÄáÁéÉèÈêÊëËìÌîÎïÏòÒôÔöÖùÙûÛüÜçÇßœŒ’ñ.:,;!?*'_") + self.cleans.append(self.docharact) if self.corpus.parametres.get('expressions', 1) : self.cleans.append(self.make_expression) if self.corpus.parametres.get('apos', 1) :