X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=tableau.py;h=71e73a78ee87880abf67ede7ee366bbe33e8de76;hp=06f58e83cf15918d795403a7bf9a38a9bd6d6cc5;hb=b5c29c4ff9eaa0979a0bb524d9395301c447783d;hpb=7fb5b2b86f6c9a0617208ee85211177c23d12f47 diff --git a/tableau.py b/tableau.py index 06f58e8..71e73a7 100644 --- a/tableau.py +++ b/tableau.py @@ -1,18 +1,20 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2010 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL import codecs import sys import xlrd import ooolib import os -import tempfile +from copy import copy import re import htmlentitydefs import shelve +from functions import DoConf from uuid import uuid4 +from chemins import PathOut import logging log = logging.getLogger('iramuteq.tableau') @@ -51,18 +53,41 @@ def UpdateDico(Dico, word, line): Dico[word][1].append(line) else: Dico[word] = [1, [line]] + +def copymatrix(tableau): + log.info('copy matrix') + copymat = Tableau(tableau.parent, parametres = tableau.parametres) + copymat.linecontent = copy(tableau.linecontent) + copymat.csvtable = copy(tableau.csvtable) + copymat.pathout = copy(tableau.pathout) + copymat.colnames = copy(tableau.colnames) + copymat.rownb = copy(tableau.rownb) + copymat.colnb = copy(tableau.colnb) + if copymat.csvtable is None : + copymat.open() + return copymat class Tableau() : - def __init__(self, parent, filename = '', filetype = 'csv', encodage = 'utf-8') : + def __init__(self, parent, filename = '', filetype = 'csv', encodage = 'utf-8', parametres = None) : self.parent = parent - self.parametre = {'filename' : filename} - self.parametre['filetype'] = filetype - self.parametre['encodage'] = encodage - self.parametre['pathout'] = os.path.dirname(os.path.abspath(filename)) - self.parametre['mineff'] = 3 - self.parametre['syscoding'] = sys.getdefaultencoding() - self.parametre['type'] = 'matrix' - self.parametre['name'] = 'unNOm' + if parametres is None : + self.parametres = DoConf(self.parent.ConfigPath['matrix']).getoptions('matrix') + self.parametres['pathout'] = PathOut(filename, 'matrix').mkdirout() + self.parametres['originalpath'] = filename + self.parametres['filetype'] = filetype + self.parametres['encodage'] = encodage + #self.parametre['pathout'] = os.path.dirname(os.path.abspath(filename)) + self.parametres['mineff'] = 3 + self.parametres['syscoding'] = sys.getdefaultencoding() + self.parametres['type'] = 'matrix' + self.parametres['matrix_name'] = os.path.basename(filename) + self.parametres['uuid'] = str(uuid4()) + self.parametres['shelves'] = os.path.join(self.parametres['pathout'], 'shelve.db') + self.parametres['ira'] = os.path.join(self.parametres['pathout'], 'Matrix.ira') + else : + self.parametres = parametres + self.pathout = PathOut(filename = filename, dirout = self.parametres['pathout']) + self.csvtable = None self.sups = {} self.actives = {} self.listactives = None @@ -77,13 +102,13 @@ class Tableau() : self.colnb = 0 self.rownb = 0 self.classes = [] - self.parametres = self.parametre + #self.parametres = self.parametre def read_tableau(self, fileout) : d=shelve.open(fileout) - self.parametre = d['parametre'] - if 'syscoding' not in self.parametre : - self.parametre['syscoding'] = sys.getdefaultencoding() + #self.parametres = d['parametres'] + #if 'syscoding' not in self.parametres : + # self.parametres['syscoding'] = sys.getdefaultencoding() self.actives = d['actives'] self.sups = d['sups'] self.classes = d['classes'] @@ -96,11 +121,20 @@ class Tableau() : self.datas = d['datas'] if 'lchi' in d : self.lchi = d['lchi'] + if 'content' in d : + self.content = d['content'] d.close() + + def open(self): + print 'open matrix' + self.read_csvfile() + self.colnames = self.csvtable[0][1:] + self.rownb = len(self.linecontent) + self.colnb = len(self.linecontent[0]) def save_tableau(self, fileout) : d=shelve.open(fileout) - d['parametre'] = self.parametre + d['parametres'] = self.parametres d['actives'] = self.actives d['sups'] = self.sups d['classes'] = self.classes @@ -113,49 +147,60 @@ class Tableau() : d['datas'] = self.datas if 'lchi' in dir(self) : d['lchi'] = self.lchi + d['content'] = self.content d.close() def make_content(self) : - if self.parametre['filetype'] == 'csv' : + self.pathout.createdir(self.parametres['pathout']) + if self.parametres['filetype'] == 'csv' : self.read_csv() - elif self.parametre['filetype'] == 'xls' : + elif self.parametres['filetype'] == 'xls' : self.read_xls() - elif self.parametre['filetype'] == 'ods' : + elif self.parametres['filetype'] == 'ods' : self.read_ods() - self.parametre['csvfile'] = tempfile.mktemp(dir=self.parent.TEMPDIR) + self.parametres['csvfile'] = os.path.join(self.parametres['pathout'], 'csvfile.csv') + self.make_tmpfile() + print self.parametres + DoConf().makeoptions(['matrix'],[self.parametres], self.parametres['ira']) + self.parent.history.addMatrix(self.parametres) + + def make_content_simple(self): + self.parametres['csvfile'] = os.path.join(self.parametres['pathout'], 'csvfile.csv') self.make_tmpfile() + DoConf().makeoptions(['matrix'],[self.parametres], self.parametres['ira']) + self.parent.history.addMatrix(self.parametres) def read_xls(self) : #FIXME : encodage #print '############## ENCODING IN EXCEL #######################' #datafile = xlrd.open_workbook(self.parametre['filename'], encoding_override="azerazerazer") - datafile = xlrd.open_workbook(self.parametre['filename']) - datatable = datafile.sheet_by_index(self.parametre['sheetnb']-1) - self.linecontent = [[str(datatable.cell_value(rowx = i, colx = j)) for j in range(datatable.ncols)] for i in range(datatable.nrows)] + datafile = xlrd.open_workbook(self.parametres['originalpath']) + datatable = datafile.sheet_by_index(self.parametres['sheetnb']-1) + self.linecontent = [[str(datatable.cell_value(rowx = i, colx = j)).replace(u'"','').replace(u';',' ').replace(u'\n',' ').replace('\r', ' ').replace('\t', ' ').strip() for j in range(datatable.ncols)] for i in range(datatable.nrows)] def read_ods(self) : - doc = ooolib.Calc(opendoc=self.parametre['filename']) + doc = ooolib.Calc(opendoc=self.parametres['originalpath']) doc.set_sheet_index(0) (cols, rows) = doc.get_sheet_dimensions() - print cols, rows for row in range(1, rows + 1): ligne = [] for col in range(1, cols + 1): data = doc.get_cell_value(col, row) if data is not None : - ligne.append(unescape(data[1])) + ligne.append(unescape(data[1].replace(u'"','').replace(u';',' ').replace(u'\n', ' ').replace('\t', ' ').strip())) else : ligne.append('') self.linecontent.append(ligne) def read_csv(self) : - with codecs.open(self.parametre['filename'], 'r', self.parametre['encodage']) as f : + with codecs.open(self.parametres['originalpath'], 'r', self.parametres['encodage']) as f : content = f.read() - self.linecontent = [line.replace('"','').split(self.parametre['colsep']) for line in content.splitlines()] + self.linecontent = [line.split(self.parametres['colsep']) for line in content.splitlines()] + self.linecontent = [[val.replace(u'"','').replace(u';',' ').replace('\t', ' ').strip() for val in line] for line in self.linecontent] def write_csvfile(self) : - with open(self.parametre['csvfile'], 'w') as f : - f.write('\n'.join([';'.join(line) for line in self.csvtable])) + with open(self.parametres['csvfile'], 'w') as f : + f.write('\n'.join(['\t'.join(line) for line in self.csvtable])) def make_tmpfile(self) : self.rownb = len(self.linecontent) @@ -179,14 +224,26 @@ class Tableau() : self.csvtable = [[self.idname] + self.colnames] + [[self.rownames[i]] + self.linecontent[i] for i in range(len(self.rownames))] self.write_csvfile() - def show_tab(self) : - self.parent.content = self.csvtable - self.parent.ShowMenu(_("View")) - self.parent.ShowMenu(_("Spreadsheet analysis")) - self.parent.ShowMenu(_("Text analysis"), False) - self.parent.type = "Data" - self.parent.DataPop = False - self.parent.OnViewData('') + def read_csvfile(self): + with codecs.open(self.parametres['csvfile'], 'r', self.parametres['syscoding']) as f: + self.csvtable = [line.split('\t') for line in f.read().splitlines()] + self.linecontent = [line[1:] for line in self.csvtable] + self.linecontent.pop(0) + + def extractfrommod(self, col, val): + return ([''] + self.colnames) + [line for line in self.csvtable[1:] if line[col + 1] == val] + + def splitfromvar(self, col): + newtabs = {} + for line in self.csvtable[1:] : + mod = line[col+1] + if mod in newtabs : + newtabs[mod].append(line) + else : + newtabs[mod] = [line] + for mod in newtabs : + newtabs[mod].insert(0, [''] + self.colnames) + return newtabs def check_rownames(self) : if len(self.rownames) == len(list(set(self.rownames))) : @@ -198,9 +255,9 @@ class Tableau() : def make_unique_list(self) : return list(set([val for line in self.linecontent for val in line if val.strip() != ''])) - def make_dico(self, linecontent) : + def make_dico(self, selcol) : dico = {} - for i, line in enumerate(linecontent) : + for i, line in enumerate(selcol) : for forme in line: if forme.strip() != '' : UpdateDico(dico, forme, i) @@ -211,8 +268,20 @@ class Tableau() : selcol = [[val for i, val in enumerate(row) if i in dc] for row in self.linecontent] return selcol + def countmultiple(self, liscol): + return self.make_dico(self.select_col(liscol)) + + def getactlistfromselection(self, listact) : + selcol = self.select_col(listact) + self.actives = self.make_dico(selcol) + return [[val, self.actives[val][0]] for val in self.actives] + + def make_listactives(self) : + self.listactives = [val for val in self.actives if val != 'NA' and self.actives[val] >= self.parametres['mineff']] + def write01(self, fileout, dico, linecontent) : - self.listactives = [val for val in dico if val != 'NA' and dico[val] >= self.parametre['mineff']] + if self.listactives is None : + self.listactives = [val for val in dico if val != 'NA' and dico[val] >= self.parametres['mineff']] out = [['0' for forme in self.listactives] for line in linecontent] for i, forme in enumerate(self.listactives) : for line in dico[forme][1] : @@ -226,7 +295,7 @@ class Tableau() : def make_01_from_selection(self, listact, listsup = None, dowrite = True) : selcol = self.select_col(listact) self.actives = self.make_dico(selcol) - self.write01(self.dictpathout['mat01'], self.actives, selcol) + self.write01(self.pathout['mat01.csv'], self.actives, selcol) if listsup is not None : selcol = self.select_col(listsup) self.sups = self.make_dico(selcol) @@ -239,7 +308,7 @@ class Tableau() : UpdateDico(self.sups, forme, i) else: UpdateDico(self.actives, forme, i) - self.listactives = [val for val in self.actives if self.actives[val][0] >= self.parametre['mineff']] + self.listactives = [val for val in self.actives if self.actives[val][0] >= self.parametres['mineff']] table = [['0' for i in range(len(self.listactives))] for j in range(self.rownb)] for i, val in enumerate(self.listactives) : for j, line in enumerate(self.linecontent) : @@ -251,12 +320,12 @@ class Tableau() : with open(fileout, 'w') as f: f.write('\n'.join([';'.join(line) for line in table])) - def printtable(self, filename, Table): + def printtable(self, filename, Table, sep = ';'): with open(filename, 'w') as f : - f.write('\n'.join([';'.join(line) for line in Table])) + f.write('\n'.join([sep.join(line) for line in Table])) def buildprofil(self) : - with open(self.dictpathout['uce'], 'rU') as filein : + with open(self.pathout['uce'], 'rU') as filein : content = filein.readlines() content.pop(0) lsucecl = [] @@ -283,7 +352,7 @@ class Tableau() : if cl == i + 1 : if active in self.linecontent[uce]: line[i + 1] += 1 - if sum(line[1:]) > self.parametre['mineff']: + if sum(line[1:]) > self.parametres['mineff']: tablecont.append([line[0]] + [`don` for don in line if type(don) == type(1)]) tablecontet = [] @@ -298,8 +367,8 @@ class Tableau() : line[i + 1] += 1 tablecontet.append([line[0]] + [`don` for don in line if type(don) == type(1)]) - self.printtable(self.dictpathout['ContEtOut'], tablecontet) - self.printtable(self.dictpathout['Contout'], tablecont) + self.printtable(self.pathout['ContEtOut'], tablecontet) + self.printtable(self.pathout['Contout'], tablecont) def get_colnames(self) : return self.colnames[:] @@ -307,7 +376,6 @@ class Tableau() : def make_table_from_classe(self, cl, la) : ln = [line[0] for line in self.classes if line[1] == cl] out = [['0' for col in la] for line in ln] - print self.actives for i, act in enumerate(la) : for j, line in enumerate(ln) : if line in self.actives[act][1] :