X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpusNG.py;h=19cfd29e4d1e65a3e4bc5b0a0faafa4b77c98979;hp=6a027dc1509bbbf6f532538e2d2330d7d182d53b;hb=1f82fb8e9ed83b8524b00039f1a8c51c2b12a8be;hpb=a503f041dc4947ee21c1d353ddd05ddb13a5e322 diff --git a/corpusNG.py b/corpusNG.py index 6a027dc..19cfd29 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -512,18 +512,31 @@ class Corpus : et = etoile.split('_') if et[0] in det : try : - if et[1] in det[et[0]] : - det[et[0]][et[1]] += 1 + endet = '_'.join(et[1:]) + if endet in det[et[0]] : + det[et[0]][endet] += 1 else : - det[et[0]][et[1]] = 1 + det[et[0]][endet] = 1 except IndexError : det[et[0]] += 1 else : try : - det[et[0]] = {et[1] :1} + endet = '_'.join(et[1:]) + det[et[0]] = {endet :1} except IndexError : det[et[0]] = 1 - print det + return det + + def make_etline(self, listet) : + etuces = [[] for et in listet] + for uci in self.ucis : + get = list(set(uci.etoiles).intersection(listet)) + if len(get) > 1 : + return '2 variables sur la meme ligne' + elif get != [] : + etuces[listet.index(get[0])] += [uce.ident for uce in uci.uces] + return etuces + def make_and_write_profile_et(self, ucecl, fileout) : log.info('etoiles/classes') @@ -906,8 +919,9 @@ class BuildCorpus : self.cleans.append(self.dolower) if self.corpus.parametres.get('firstclean', 1) : self.cleans.append(self.firstclean) - self.rule = self.corpus.parametres.get('keep_caract', u"^a-zA-Z0-9àÀâÂäÄáÁéÉèÈêÊëËìÌîÎïÏòÒôÔöÖùÙûÛüÜçÇßœŒ’ñ.:,;!?*'_") - self.cleans.append(self.docharact) + if self.corpus.parametres['charact'] : + self.rule = self.corpus.parametres.get('keep_caract', u"^a-zA-Z0-9àÀâÂäÄáÁéÉèÈêÊëËìÌîÎïÏòÒôÔöÖùÙûÛüÜçÇßœŒ’ñ.:,;!?*'_") + self.cleans.append(self.docharact) if self.corpus.parametres.get('expressions', 1) : self.cleans.append(self.make_expression) if self.corpus.parametres.get('apos', 1) :