X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=corpus.py;h=d4357cf97559ce4c4b4cdcd8886494e469d06603;hp=e0437077ff0fc5642a68f89a82ca2b5777da7ea3;hb=8cebf6f5e4531b73d8ba32f834c10cefd1086d80;hpb=d1d24d86422c9e9805516190ea17a379201f9300 diff --git a/corpus.py b/corpus.py index e043707..d4357cf 100644 --- a/corpus.py +++ b/corpus.py @@ -11,7 +11,6 @@ from time import time from functions import decoupercharact, ReadDicoAsDico, DoConf import re import sqlite3 -import numpy import itertools import logging from operator import itemgetter @@ -211,6 +210,18 @@ class Corpus : def getucesfrometoile(self, etoile) : return [uce.ident for uci in self.ucis for uce in uci.uces if etoile in uci.etoiles] + def getetoileuces(self) : + log.info('get uces etoiles') + etoileuces = {} + for uci in self.ucis : + etoiles = uci.etoiles[1:] + uci.paras + for et in etoiles : + if et in etoileuces : + etoileuces[et] += [uce.ident for uce in uci.uces] + else : + etoileuces[et] = [uce.ident for uce in uci.uces] + return etoileuces + def getucefromid(self, uceid) : if self.iduces is None : self.make_iduces() return self.iduces[uceid] @@ -565,13 +576,16 @@ class Corpus : elif get != [] : etuces[listet.index(get[0])] += [uce.ident for uce in uci.uces] return etuces - def make_and_write_profile_et(self, ucecl, fileout) : log.info('etoiles/classes') - etoiles = self.make_etoiles() + etoileuces = self.getetoileuces() + etoileuces = dict([[et, etoileuces[et]] for et in etoileuces if len(etoileuces[et]) > 1]) with open(fileout, 'w') as f : - f.write('\n'.join([';'.join([etoile] + [`len(set(self.getucesfrometoile(etoile)).intersection(classe))` for classe in ucecl]) for etoile in etoiles]).encode(self.parametres['syscoding'])) + f.write('\n'.join([';'.join([et] + [`len(set(etoileuces[et]).intersection(classe))` for classe in ucecl]) for et in etoileuces]).encode(self.parametres['syscoding'])) + #etoiles = self.make_etoiles() + #with open(fileout, 'w') as f : + # f.write('\n'.join([';'.join([etoile] + [`len(set(self.getucesfrometoile(etoile)).intersection(classe))` for classe in ucecl]) for etoile in etoiles]).encode(self.parametres['syscoding'])) def make_colored_corpus(self) : ucecl = {}