iramuteq.org Git - iramuteq/blob - functions.py

   1 # -*- coding: utf-8 -*-
   2 #Author: Pierre Ratinaud
   3 #Copyright (c) 2008-2020 Pierre Ratinaud
   4 #modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
   5 #License: GNU/GPL
   6
   7 #------------------------------------
   8 # import des modules python
   9 #------------------------------------
  10 import re
  11 from subprocess import Popen, call, PIPE
  12 import _thread
  13 import os
  14 import ast
  15 import sys
  16 import csv
  17 import platform
  18 import traceback
  19 import codecs
  20 import locale
  21 import datetime
  22 from copy import copy
  23 from shutil import copyfile
  24 import shelve
  25 import json
  26 #from dialog import BugDialog
  27 import logging
  28 from operator import itemgetter
  29
  30 #------------------------------------
  31 # import des modules wx
  32 #------------------------------------
  33 import wx
  34 import wx.adv
  35
  36 #------------------------------------
  37 # import des fichiers du projet
  38 #------------------------------------
  39 from configparser import ConfigParser
  40
  41
  42 log = logging.getLogger('iramuteq')
  43
  44
  45 indices_simi = ['cooccurrence' ,'pourcentage de cooccurrence','Russel','Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  46
  47 def open_folder(folder):
  48     if sys.platform == "win32":
  49         os.startfile(folder)
  50     else:
  51         opener ="open" if sys.platform == "darwin" else "xdg-open"
  52         #call([opener, folder])
  53         call(["%s %s &" % (opener, folder)], shell=True)
  54
  55 def normpath_win32(path) :
  56     if not sys.platform == 'win32' :
  57         return path
  58     while '\\\\' in path :
  59         path = path.replace('\\\\', '\\')
  60     if path.startswith('\\') and not path.startswith('\\\\') :
  61         path = '\\' + path
  62     return path
  63
  64 class TGen :
  65     def __init__(self, path = None, encoding = 'utf8'):
  66         self.path = path
  67         self.tgen = {}
  68         self.encoding = encoding
  69
  70     def __getitem__(self, key):
  71         return self.tgen[key]
  72
  73     def read(self, path = None):
  74         if path is None :
  75             path = self.path
  76         with codecs.open(path, 'r', self.encoding) as f :
  77             tgen = f.read()
  78         tgen = [line.split('\t') for line in tgen.splitlines()]
  79         tgen = dict([[line[0], line[1:]] for line in tgen])
  80         self.tgen = tgen
  81         self.path = path
  82
  83     def write(self, path = None):
  84         if path is None :
  85             path = self.path
  86         with open(path, 'w', encoding='utf8') as f :
  87             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]))
  88
  89     def writetable(self, pathout, tgens, totocc):
  90         etoiles = list(totocc.keys())
  91         etoiles.sort()
  92         with open(pathout, 'w', encoding='utf8') as f :
  93             line = '\t'.join(['tgens'] + etoiles) + '\n'
  94             f.write(line)
  95             for t in tgens :
  96                 line = '\t'.join([t] + [repr(tgens[t][et]) for et in etoiles]) + '\n'
  97                 f.write(line)
  98             i = 0
  99             totname = 'total'
 100             while totname + repr(i) in tgens :
 101                 i += 1
 102             totname = totname + repr(i)
 103             line = '\t'.join([totname] + [repr(totocc[et]) for et in etoiles]) + '\n'
 104             f.write(line)
 105
 106 class History :
 107     def __init__(self, filein, syscoding = 'utf8') :
 108         self.filein = filein
 109         self.syscoding = syscoding
 110         self.corpus = {}
 111         self.openedcorpus = {}
 112         self.openedmatrix = {}
 113         self.orph = []
 114         self.analyses = {}
 115         self.history = []
 116         self.opened = {}
 117         self.read()
 118
 119     def read(self) :
 120         d = shelve.open(self.filein)
 121         self.history = d.get('history', [])
 122         self.matrix = d.get('matrix', [])
 123         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 124         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 125         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 126         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 127         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 128         d.close()
 129
 130     def write(self) :
 131         d = shelve.open(self.filein)
 132         d['history'] = self.history
 133         d['matrix'] = self.matrix
 134         d.close()
 135
 136     def add(self, analyse) :
 137         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 138         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 139         if tosave['uuid'] in self.corpus :
 140             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 141             return
 142         if analyse.get('corpus', False) :
 143             if analyse['uuid'] in self.analyses :
 144                 return
 145             tosave['corpus'] = analyse['corpus']
 146             tosave['name'] = analyse['name']
 147             acorpus_uuid =  analyse['corpus']
 148             if acorpus_uuid in self.corpus :
 149                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 150                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 151                 else :
 152                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 153             else :
 154                 self.orph.append(tosave)
 155         else :
 156             tosave['corpus_name'] = analyse['corpus_name']
 157             #self.ordercorpus[tosave['uuid']] = len(history)
 158             #self.corpus[tosave['uuid']] = analyse
 159             self.history.append(tosave)
 160         self.write()
 161         self.read()
 162
 163     def addMatrix(self, analyse) :
 164         tosave = analyse
 165         #tosave['matrix_name'] = analyse['matrix_name']
 166         tosave['analyses'] = []
 167         self.matrix.append(tosave)
 168         self.write()
 169         self.read()
 170
 171     def addMatrixAnalyse(self, analyse) :
 172         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 173         tosave['name'] = analyse['name']
 174         if tosave['matrix'] in self.ordermatrix :
 175             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 176         self.write()
 177         self.read()
 178
 179     def addmultiple(self, analyses) :
 180         log.info('add multiple')
 181         for analyse in analyses :
 182             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 183             corpus = analyse['corpus']
 184             tosave['corpus'] = corpus
 185             tosave['name'] = analyse['name']
 186             if corpus in self.corpus :
 187                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 188                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 189                 else :
 190                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 191         self.write()
 192         self.read()
 193
 194     def delete(self, analyse, corpus = False) :
 195         log.info('delete %s' % analyse.get('name', 'noname'))
 196         if corpus :
 197             self.history.pop(self.ordercorpus[analyse['uuid']])
 198             if analyse['uuid'] in self.openedcorpus :
 199                 del self.openedcorpus[analyse['uuid']]
 200             log.info('delete corpus : %s' % analyse['uuid'])
 201         elif analyse['uuid'] in self.analyses :
 202             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 203             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 204         elif analyse['uuid'] in self.matrixanalyse :
 205             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 206         elif analyse.get('matrix', False) in self.matrixanalyse :
 207             analyses = self.matrix[self.ordermatrix[analyse['matrix']]]['analyses']
 208             topop = [i for i, val in enumerate(analyses) if analyse['uuid'] == val['uuid']][0]
 209             analyses.pop(topop)
 210             self.matrix[self.ordermatrix[analyse['matrix']]]['analyses'] = analyses
 211         self.write()
 212         self.read()
 213
 214     def addtab(self, analyse) :
 215         self.opened[analyse['uuid']] = analyse
 216
 217     def rmtab(self, analyse) :
 218         del self.opened[analyse['uuid']]
 219
 220     def update(self, analyse) :
 221         if 'matrix_name' in analyse :
 222             self.matrixanalyse[analyse['uuid']].update(analyse)
 223         elif 'corpus_name' in analyse :
 224             self.corpus[analyse['uuid']].update(analyse)
 225         elif 'corpus' in analyse :
 226             self.analyses[analyse['uuid']].update(analyse)
 227         else :
 228             toupdate = [an for an in self.matrixanalyse[analyse['matrix']]['analyses'] if an['uuid'] == analyse['uuid']]
 229             toupdate[0].update(analyse)
 230         self.write()
 231         self.read()
 232
 233     def clean(self) :
 234         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 235         print(corpustodel)
 236         for corpus in corpustodel :
 237             print('cleaning :', corpus['corpus_name'])
 238             self.delete(corpus, corpus = True)
 239         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 240         for analyse in anatodel :
 241             print('cleaning :', analyse['name'])
 242             self.delete(analyse)
 243
 244     def dostat(self):
 245         todel = {}
 246         tokens = 0
 247         corpusnb = {}
 248         subnb = 0
 249         analysenb = 0
 250         hours = 0
 251         minutes = 0
 252         secondes = 0
 253         ha = 0
 254         ma = 0
 255         sa = 0
 256         for corpus in self.history :
 257             analysenb += len(corpus.get('analyses', []))
 258             analyses = corpus.get('analyses', [])
 259             for analyse in analyses :
 260                 if os.path.exists(analyse['ira']) :
 261                     ana = DoConf(analyse['ira']).getoptions()
 262                     if 'time' in ana :
 263                         time = ana['time'].split()
 264                         ha += int(time[0].replace('h','')) * 3600
 265                         ma += int(time[1].replace('m','')) * 60
 266                         sa += int(time[2].replace('s',''))
 267             if os.path.exists(corpus['ira']) :
 268                 param = DoConf(corpus['ira']).getoptions()
 269                 time = param.get('time','0h 0m 0s')
 270                 time = time.split()
 271                 hours += int(time[0].replace('h','')) * 3600
 272                 minutes += int(time[1].replace('m','')) * 60
 273                 secondes += int(time[2].replace('s',''))
 274                 if param.get('originalpath', False) :
 275                     if param['originalpath'] in corpusnb :
 276                         corpusnb[param['originalpath']] += 1
 277                         tokens += int(param['occurrences'])
 278                     else :
 279                         corpusnb[param['originalpath']] = 1
 280                     #print param
 281                 else :
 282                     subnb += 1
 283             else :
 284                 if corpus['ira'] in todel :
 285                     todel['ira'] += 1
 286                 else :
 287                     todel['ira'] = 1
 288         print('Nbr total de corpus : %s' % len(self.history))
 289         corpus_nb = len(corpusnb) + len(todel)
 290         print('Nbr de corpus différents : %s' % corpus_nb)
 291         lentodel = len(todel)
 292         print('Nbr de corpus à supprimer : %s' % lentodel)
 293         print('Nbr de sous corpus : %s' % subnb)
 294         print("Nbr total d'occurrences : %s" % tokens)
 295         print('Moyenne occurrences par corpus : %f' % (tokens/corpus_nb))
 296         print('---------------------')
 297         print("Nbr total d'analyses : %s" % analysenb)
 298         print('Temps total indexation : %f h' % ((hours+minutes+secondes) / 3600))
 299         print('Temps total analyses :  %f h' % ((ha+ma+sa) / 3600))
 300
 301     def __str__(self) :
 302         return str(self.history)
 303
 304 class DoConf :
 305     def __init__(self, configfile=None, diff = None, parametres = None) :
 306         self.configfile = configfile
 307         self.conf = ConfigParser(interpolation=None) # pourquoi ce paramètre ???
 308
 309         if configfile is not None :
 310             configfile = normpath_win32(configfile)
 311             self.conf.read_file(codecs.open(configfile, 'r', 'utf8'))
 312         self.parametres = {}
 313         if parametres is not None :
 314             self.doparametres(parametres)
 315
 316     def doparametres(self, parametres) :
 317         return parametres
 318
 319     def getsections(self) :
 320         return self.conf.sections()
 321
 322     def getoptions(self, section = None, diff = None):
 323         parametres = {}
 324         if section is None :
 325             section = self.conf.sections()[0]
 326         for option in self.conf.options(section) :
 327             if self.conf.get(section, option).isdigit() :
 328                 parametres[option] = int(self.conf.get(section, option))
 329             elif self.conf.get(section, option) == 'False' :
 330                 parametres[option] = False
 331             elif self.conf.get(section, option) == 'True' :
 332                 parametres[option] = True
 333             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 334                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 335             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 336                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 337             else :
 338                 parametres[option] = self.conf.get(section, option)
 339         if 'type' not in parametres :
 340             parametres['type'] = section
 341         return parametres
 342
 343     def makeoptions(self, sections, parametres, outfile = None) :
 344         txt = ''
 345         for i, section in enumerate(sections) :
 346             txt += '[%s]\n' % section
 347             if not self.conf.has_section(section) :
 348                 self.conf.add_section(section)
 349             for option in parametres[i] :
 350                 if isinstance(parametres[i][option], int) :
 351                     self.conf.set(section, option, repr(parametres[i][option]))
 352                     txt += '%s = %i\n' % (option, parametres[i][option])
 353                 elif isinstance(parametres[i][option], str) :
 354                     self.conf.set(section, option, parametres[i][option])
 355                     txt += '%s = %s\n' % (option, parametres[i][option])
 356                 elif isinstance(parametres[i][option], wx.Colour) :
 357                     self.conf.set(section, option, str(parametres[i][option]))
 358                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 359                 elif option == 'analyses' :
 360                     pass
 361                 else :
 362                     self.conf.set(section, option, repr(parametres[i][option]))
 363                     txt += '%s = %s\n' % (option, repr(parametres[i][option]))
 364         if outfile is None :
 365             outfile = self.configfile
 366         outfile = normpath_win32(outfile)
 367         with open(outfile, 'w', encoding="utf-8") as f :
 368             f.write(txt)
 369             #self.conf.write(f)
 370
 371     def totext(self, parametres) :
 372         #txt = ['Corpus']
 373         txt = []
 374         for val in parametres :
 375             if isinstance(parametres[val], int) :
 376                 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
 377             elif isinstance(parametres[val], str) :
 378                 txt.append(' \t\t: '.join([val, parametres[val]]))
 379             elif val in ['listet', 'stars'] :
 380                 pass
 381             else :
 382                 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
 383         return '\n'.join(txt)
 384
 385
 386 def write_tab(tab, fileout) :
 387         csvWriter = csv.writer(open(fileout, 'w', newline='', encoding='utf8'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 388         csvWriter.writerows(tab)
 389
 390 class BugDialog(wx.Dialog):
 391     def __init__(self, *args, **kwds):
 392         # begin wxGlade: MyDialog.__init__
 393         kwds["style"] = wx.DEFAULT_DIALOG_STYLE | wx.STAY_ON_TOP
 394         kwds["size"] = wx.Size(500, 200)
 395         wx.Dialog.__init__(self, *args, **kwds)
 396         self.SetTitle(kwds['title'])
 397         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 398         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 399         self.button_1 = wx.Button(self, wx.ID_OK, "")
 400
 401         self.__set_properties()
 402         self.__do_layout()
 403         # end wxGlade
 404
 405     def __set_properties(self):
 406         # begin wxGlade: MyDialog.__set_properties
 407         self.SetMinSize(wx.Size(500, 200))
 408         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 409
 410         # end wxGlade
 411
 412     def __do_layout(self):
 413         # begin wxGlade: MyDialog.__do_layout
 414         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 415         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 416         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 417         self.SetSizer(sizer_1)
 418         sizer_1.Fit(self)
 419         self.Layout()
 420
 421
 422 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 423     AnalyseConf = ConfigParser()
 424     AnalyseConf.read(DictPathOut['ira'])
 425     AnalyseConf.add_section(section)
 426     date = datetime.datetime.now().ctime()
 427     AnalyseConf.set(section, 'date', str(date))
 428     AnalyseConf.set(section, 'clusternb', clusternb)
 429     AnalyseConf.set(section, 'corpus_name', corpname)
 430
 431     fileout = open(DictPathOut['ira'], 'w', encoding='utf8')
 432     AnalyseConf.write(fileout)
 433     fileout.close()
 434
 435 def multisort(liste2d, ordre, indices_tri):
 436
 437     """
 438     methode destinée à remplacer 'comp' qui a disparu en Python 3
 439         tri de tuples sur l'un des éléments du tuple
 440         en principe, elle doit renvoyer les éléments triés selon le principe d'avant
 441         tel que décrit dans la docstring de 'sortedby'
 442
 443         probablement à améliorer pour la rendre d'usage plus général
 444         en acceptant un nombre variable de parametres ???
 445     """
 446
 447     indices_triTuple = indices_tri.Tuple(int, ...)
 448     for key in reversed(indices_tri):
 449         liste2d.sort(key=attrgetter(key), reverse=ordre)
 450     return liste2d
 451
 452 def sortedby(liste2d, direct, *indices):
 453
 454     """
 455         sortedby: sort a list of lists (e.g. a table) by one or more indices
 456                   (columns of the table) and return the sorted list
 457
 458         e.g.
 459          for list = [[2,3],[1,2],[3,1]]:
 460          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 461          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 462
 463          elle n'est pas remplacée par la méthode 'multisort' ???
 464
 465     """
 466
 467 # iramuteq original
 468 #    nlist = map(lambda x, indices=indices:
 469 #                 map(lambda i, x=x: x[i], indices) + [x],
 470 #                 list)
 471
 472 # iramuteq passé à 2to3
 473 #    nlist = list(map(lambda x, indices=indices:
 474 #                 list(map(lambda i, x=x: x[i], indices)) + [x],
 475 #                 liste2d))
 476
 477     for key in reversed(indices):
 478         liste2d.sort(key=itemgetter(key), reverse=(direct==2))
 479     return liste2d
 480
 481
 482 #    if direct == 1:
 483 #        nlist.sort()
 484 #         sorted_list = multisort(liste2d, direct, *indices)
 485
 486 #    elif direct == 2:
 487 #        nlist.sort(reverse=True)
 488 #         sorted_list = multisort(liste2d, direct, *indices)
 489
 490 #    return [l[-1] for l in nlist]
 491 #    return sorted_list
 492
 493 def add_type(line, dictlem):
 494     if line[4] in dictlem:
 495         line.append(dictlem[line[4]])
 496     else :
 497         line.append('')
 498     return line
 499
 500 def treat_line_alceste(i, line) :
 501     if line[0] == '*' or line[0] == '*****' :
 502         return line + ['']
 503     if line[5] == 'NA':
 504         print('NA', line[5])
 505         pass
 506     elif float(line[5].replace(',', '.')) < 0.0001:
 507         line[5] = '< 0,0001'
 508     elif float(line[5].replace(',', '.')) > 0.05:
 509         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 510     else:
 511         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 512     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 513
 514 def ReadProfileAsDico(File, Alceste=False, encoding = 'utf8'):
 515     dictlem = {}
 516     print('lecture des profiles')
 517     FileReader = open(File, 'r', encoding='utf8')
 518     Filecontent = FileReader.readlines()
 519     FileReader.close()
 520     DictProfile = {}
 521     count = 0
 522     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 523     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 524     rows.pop(0)
 525     ClusterNb = rows[0][2]
 526     rows.pop(0)
 527     clusters = [row[2] for row in rows if row[0] == '**']
 528     valclusters = [row[1:4] for row in rows if row[0] == '****']
 529     lp = [i for i, line in enumerate(rows) if line[0] == '****']
 530     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 531     if Alceste :
 532         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 533         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 534     else :
 535         prof = [[line + [''] for line in pr] for pr in prof]
 536         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 537     for i, cluster in enumerate(clusters):
 538         DictProfile[cluster] = [valclusters[i]] + prof[i]
 539     return DictProfile
 540
 541 def GetTxtProfile(dictprofile, cluster_size) :
 542     proflist = []
 543     for classe in range(0, len(dictprofile)) :
 544         prof = dictprofile[str(classe + 1)]
 545         clinfo = cluster_size[classe]
 546         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 547     return '\n\n'.join(proflist)
 548
 549 def formatExceptionInfo(maxTBlevel=5):
 550     cla, exc, trbk = sys.exc_info()
 551     try :
 552         excName = cla.__name__
 553     except :
 554         excName = 'None'
 555     try:
 556         excArgs = exc.args[0]
 557     except :
 558         excArgs = "<no args>"
 559     excTb = traceback.format_tb(trbk, maxTBlevel)
 560     return (excName, excArgs, excTb)
 561
 562
 563 #fonction des etudiants de l'iut
 564 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 565     """
 566         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 567         Si on trouve un '$', c'est fini.
 568         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 569     """
 570     separateurs = [['.', 60.0], ['?', 60.0], ['!', 60.0], ['£$£', 60], [':', 50.0], [';', 40.0], [',', 10.0], [' ', 0.1]]
 571     trouve = False                 # si on a trouvé un bon séparateur
 572     iDecoupe = 0                # indice du caractere ou il faut decouper
 573
 574     # on découpe la chaine pour avoir au maximum 240 caractères
 575     longueur = min(longueur, len(chaine) - 1)
 576     chaineTravail = chaine[:longueur + 1]
 577     nbCar = longueur
 578     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 579
 580     # on vérifie si on ne trouve pas un '$'
 581     indice = chaineTravail.find('$')
 582     if indice > -1:
 583         trouve = True
 584         iDecoupe = indice
 585
 586     # si on ne trouve rien, on cherche le meilleur séparateur
 587     if not trouve:
 588         while nbCar >= 0:
 589             caractere = chaineTravail[nbCar]
 590             distance = abs(longueurOptimale - nbCar) + 1
 591             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 592
 593             # on vérifie si le caractére courant est une marque de ponctuation
 594             for s in separateurs:
 595                 if caractere == s[0]:
 596                     # si c'est une ponctuation
 597
 598                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 599                         # print nbCar, s[0]
 600                         meilleur[0] = s[0]
 601                         meilleur[1] = s[1]
 602                         meilleur[2] = nbCar
 603                         trouve = True
 604                         iDecoupe = nbCar
 605
 606                     # et on termine la recherche
 607                     break
 608
 609             # on passe au caractère précédant
 610             nbCar = nbCar - 1
 611
 612     # si on a trouvé
 613     if trouve:
 614         fin = chaine[iDecoupe + 1:]
 615         retour = chaineTravail[:iDecoupe]
 616         return len(retour) > 0, retour.split(), fin
 617     # si on a rien trouvé
 618     return False, chaine.split(), ''
 619
 620
 621 exceptions = {'paragrapheOT' : "Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 622               'EmptyText' : "Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 623               'CorpusEncoding' : "Problème d'encodage.",
 624               'TextBeforeTextMark' : "Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 625               'MissingAnalyse' : 'Aucun fichier à cet emplacement :\n',
 626 }
 627
 628 def BugReport(parent, error = None):
 629     for ch in parent.GetChildren():
 630         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 631             ch.Destroy()
 632     excName, exc, excTb = formatExceptionInfo()
 633     if excName == 'Exception' :
 634         print(exc)
 635         if len(exc.split()) == 2 :
 636             mss, linenb = exc.split()
 637             if mss in exceptions :
 638                 txt = exceptions[mss] + linenb
 639             else :
 640                 txt = exc
 641         else :
 642             if exc in exceptions :
 643                 txt = exceptions[exc]
 644             else :
 645                 txt = exc
 646         title = "Information"
 647     else :
 648         txt = '\n            !== BUG ==!       \n'
 649         txt += '*************************************\n'
 650         txt += '\n'.join(excTb).replace('    ', ' ')
 651         txt += excName + '\n'
 652         txt += repr(exc)
 653         title = "Bug"
 654
 655     dial = BugDialog(parent, **{'title' : title})
 656     if 'Rerror' in dir(parent) :
 657         txt += parent.Rerror
 658         parent.Rerror = ''
 659     log.info(txt)
 660     dial.text_ctrl_1.write(txt)
 661     dial.CenterOnParent()
 662     dial.ShowModal()
 663     dial.Destroy()
 664
 665 def PlaySound(parent):
 666     if parent.pref.getboolean('iramuteq', 'sound') :
 667         try:
 668             if "gtk2" in wx.PlatformInfo:
 669                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 670             else :
 671                 sound = wx.adv.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 672                 sound.Play(wx.adv.SOUND_SYNC)
 673         except :
 674             print('pas de son')
 675
 676 def ReadDicoAsDico(dicopath):
 677     with open(dicopath, 'r', encoding='UTF8') as f:
 678         content = f.readlines()
 679     lines = [line.rstrip('\n\r').replace('\n', '').replace('"', '').split('\t') for line in content if line != '']
 680     return dict([[line[0], line[1:]] for line in lines])
 681
 682 def ReadLexique(parent, lang = 'french', filein = None):
 683     if lang != 'other' :
 684         if filein is None :
 685             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 686         else :
 687             parent.lexique = ReadDicoAsDico(filein)
 688     else :
 689         if filein is None :
 690             parent.lexique = {}
 691         else :
 692             parent.lexique = ReadDicoAsDico(filein)
 693
 694 def ReadList(filein, encoding = 'utf8', sep = ';'):
 695     #file = open(filein)
 696     with open(filein, 'r', encoding='utf8') as f :
 697         content = f.read()
 698     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 699     #file = codecs.open(filein, 'r', encoding)
 700     #content = file.readlines()
 701     #file.close()
 702     first = content.pop(0)
 703     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 704     dict = {}
 705     i = 0
 706     for line in content:
 707         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 708         #line = line.split(';')
 709         nline = [line[0]]
 710         for val in line[1:]:
 711             if val == 'NA' :
 712                 don = ''
 713             else:
 714                 try:
 715                     don = int(val)
 716                 except:
 717                     don = float('%.5f' % float(val))
 718             nline.append(don)
 719         dict[i] = nline
 720         i += 1
 721     return dict, first
 722
 723 def exec_RCMD(rpath, command) :
 724     log.info('R CMD INSTALL %s' % command)
 725     rpath = rpath.replace('\\','\\\\')
 726     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 727     return error
 728
 729 def exec_rcode(rpath, rcode, wait = True, graph = False):
 730     log.info("R Script : %s" % rcode)
 731     needX11 = False
 732     if sys.platform == 'darwin' :
 733         try :
 734             macversion = platform.mac_ver()[0].split('.')
 735             if int(macversion[1]) < 5 :
 736                 needX11 = True
 737             else :
 738                 needX11 = False
 739         except :
 740             needX11 = False
 741     rpath = rpath.replace('\\','\\\\')
 742     env = os.environ.copy()
 743     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 744         env['LC_ALL'] = 'en_US.UTF-8'
 745     if not graph :
 746         if wait :
 747             if sys.platform == 'win32':
 748                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 749             else :
 750                 error = call([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], env = env)
 751             return error
 752         else :
 753             if sys.platform == 'win32':
 754                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 755             else :
 756                 pid = Popen([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8') #PIPE ou STDOUT ?
 757             return pid
 758     else :
 759         if wait :
 760             if sys.platform == 'win32':
 761                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 762             elif sys.platform == 'darwin' and needX11:
 763                 os.environ['DISPLAY'] = ':0.0'
 764                 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
 765             else :
 766                 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
 767             return error
 768         else :
 769             if sys.platform == 'win32':
 770                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 771             elif sys.platform == 'darwin' and needX11:
 772                 os.environ['DISPLAY'] = ':0.0'
 773                 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
 774             else :
 775                 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
 776             return pid
 777
 778 def check_Rresult(parent, pid) :
 779     if isinstance(pid, Popen) :
 780         if pid.returncode != 0 :
 781             error = pid.communicate()
 782             error = [str(error[0]), error[1]]
 783             if error[1] is None :
 784                 error[1] = 'None'
 785             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 786             try :
 787                 raise Exception('\n'.join(['Erreur R', '\n'.join(error[1:])]))
 788             except :
 789                 BugReport(parent)
 790             return False
 791         else :
 792             return True
 793     else :
 794         if pid != 0 :
 795             try :
 796                 raise Exception('Erreur R')
 797             except :
 798                 BugReport(parent)
 799             return False
 800         else :
 801             return True
 802
 803
 804 def launchcommand(mycommand):
 805     Popen(mycommand)
 806
 807 def print_liste(filename,liste):
 808     with open(filename,'w', encoding='utf8') as f :
 809         for graph in liste :
 810             f.write(';'.join(graph) +'\n')
 811
 812 def read_list_file(filename, encoding = 'utf8'):
 813     with open(filename,'r', encoding='utf8') as f:
 814         content=f.readlines()
 815         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 816     return ncontent
 817
 818 def progressbar(self, maxi):
 819     ira = wx.GetApp().GetTopWindow()
 820     parent = ira
 821     try:
 822         maxi = int(maxi)
 823     except:
 824         maxi = 1
 825     prog = wx.ProgressDialog("Traitements",
 826                              "Veuillez patienter...",
 827                              maximum=maxi,
 828                              parent=parent,
 829                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 830                              )
 831                              # parent ???
 832     # le ABORT n'est pas géré à tous les coups ???
 833     prog.SetSize((400,150))
 834     #prog.SetIcon(ira._icon)
 835     return prog
 836
 837 def treat_var_mod(variables) :
 838     var_mod = {}
 839     variables = list(set(variables))
 840     varmod = [variable.split('_') for variable in variables]
 841     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 842     for var in vars :
 843         mods = ['_'.join(v) for v in varmod if v[0] == var]
 844         var_mod[var] = mods
 845
 846 #     for variable in variables :
 847 #         if '_' in variable :
 848 #             forme = variable.split('_')
 849 #             var = forme[0]
 850 #             mod = forme[1]
 851 #             if not var in var_mod :
 852 #                 var_mod[var] = [variable]
 853 #             else :
 854 #                 if not mod in var_mod[var] :
 855 #                     var_mod[var].append(variable)
 856     return var_mod
 857
 858 def doconcorde(corpus, uces, mots, uci = False) :
 859     if not uci :
 860         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 861     else :
 862         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 863     ucestxt1 = dict(ucestxt1)
 864     ucestxt = []
 865     ucis_txt = []
 866     listmot = [corpus.getlems()[lem].formes for lem in mots]
 867     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 868     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 869     dmots = dict(list(zip(listmot, mothtml)))
 870     for uce in uces :
 871         ucetxt = ucestxt1[uce].split()
 872         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 873         if not uci :
 874             uciid = corpus.getucefromid(uce).uci
 875             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '<a href="%i_%i"> *%i_%i</a></b></p>' % (uciid, uce, uciid, uce))
 876         else :
 877             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 878         ucestxt.append(ucetxt)
 879     return ucis_txt, ucestxt
 880
 881
 882 def getallstcarac(corpus, analyse) :
 883    pathout = PathOut(analyse['ira'])
 884    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, 'utf8')
 885    print(profils)
 886
 887 def read_chd(filein, fileout):
 888     with open(filein, 'r') as f :
 889         content = f.read()
 890     #content = [line[3:].replace('"',"").replace(' ','') for line in content.splitlines()]
 891     content = [line.split('\t') for line in content.splitlines()]
 892     chd = {'name':1, 'children':[]}
 893     mere={}
 894     for i, line in enumerate(content) :
 895         if i == 0 :
 896             chd['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 897             mere[line[1]] = chd['children'][0]
 898             mere[line[2]] = chd['children'][1]
 899         elif not i % 2 :
 900             if 'children' in mere[line[0]]:
 901                 mere[line[0]]['children'].append({'name': line[1],'size' : content[i+1][0]})
 902                 mere[line[1]] = mere[line[0]]['children'][-1]
 903                 mere[line[0]]['children'].append({'name': line[2],'size' : content[i+1][1]})
 904                 mere[line[2]] = mere[line[0]]['children'][-1]
 905             else :
 906                 mere[line[0]]['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 907                 mere[line[1]] = mere[line[0]]['children'][-2]
 908                 mere[line[2]] = mere[line[0]]['children'][-1]
 909     with open(fileout, 'w') as f :
 910         f.write(json.dumps(chd))
 911
 912
 913 translation_languages = {"Afrikaans":"af", "Albanian":"sq", "Amharic":"am", "Arabic":"ar", "Armenian":"hy", "Azeerbaijani":"az", "Basque":"eu", "Belarusian":"be", "Bengali":"bn", "Bosnian":"bs", "Bulgarian":"bg", "Catalan":"ca", "Cebuano":"ceb", "Chichewa":"ny", "Chinese (Simplified)":"zh-CN", "Chinese (Traditional)":"zh-TW", "Corsican":"co", "Croatian":"hr", "Czech":"cs", "Danish":"da", "Dutch":"nl", "English":"en", "Esperanto":"eo", "Estonian":"et", "Filipino":"tl", "Finnish":"fi", "French":"fr", "Frisian":"fy", "Galician":"gl", "Georgian":"ka", "German":"de", "Greek":"el", "Gujarati":"gu", "Haitian Creole":"ht", "Hausa":"ha", "Hawaiian":"haw", "Hebrew":"iw", "Hindi":"hi", "Hmong":"hmn ", "Hungarian":"hu", "Icelandic":"is", "Igbo":"ig", "Indonesian":"id", "Irish":"ga", "Italian":"it", "Japanese":"ja", "Javanese":"jw", "Kannada":"kn", "Kazakh":"kk", "Khmer":"km", "Korean":"ko", "Kurdish":"ku", "Kyrgyz":"ky", "Lao":"lo", "Latin":"la", "Latvian":"lv", "Lithuanian":"lt", "Luxembourgish":"lb", "Macedonian":"mk", "Malagasy":"mg", "Malay":"ms", "Malayalam":"ml", "Maltese":"mt", "Maori":"mi", "Marathi":"mr", "Mongolian":"mn", "Burmese":"my", "Nepali":"ne", "Norwegian":"no", "Pashto":"ps", "Persian":"fa", "Polish":"pl", "Portuguese":"pt", "Punjabi":"ma", "Romanian":"ro", "Russian":"ru", "Samoan":"sm", "Scots Gaelic":"gd", "Serbian":"sr", "Sesotho":"st", "Shona":"sn", "Sindhi":"sd", "Sinhala":"si", "Slovak":"sk", "Slovenian":"sl", "Somali":"so", "Spanish":"es", "Sundanese":"su", "Swahili":"sw", "Swedish":"sv", "Tajik":"tg", "Tamil":"ta", "Telugu":"te", "Thai":"th", "Turkish":"tr", "Ukrainian":"uk", "Urdu":"ur", "Uzbek":"uz", "Vietnamese":"vi", "Welsh":"cy", "Xhosa":"xh", "Yiddish":"yi", "Yoruba":"yo", "Zulu":"zu", }
 914
 915
 916 def gettranslation(words, lf, lt) :
 917     import urllib.request, urllib.error, urllib.parse
 918     import json
 919     agent = {'User-Agent':
 920     "Mozilla/4.0 (\
 921     compatible;\
 922     MSIE 6.0;\
 923     Windows NT 5.1;\
 924     SV1;\
 925     .NET CLR 1.1.4322;\
 926     .NET CLR 2.0.50727;\
 927     .NET CLR 3.0.04506.30\
 928     )"}
 929     base_link = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=%s&tl=%s&dt=t&q=%s"
 930     print(len(words))
 931     totrans = urllib.parse.quote('\n'.join(words))
 932     link = base_link % (lf, lt, totrans)
 933     request = urllib.request.Request(link, headers=agent)
 934     raw_data = urllib.request.urlopen(request).read()
 935     data = json.loads(raw_data)
 936     return [line[0].replace("'", '_').replace(' | ', '|').replace(' ', '_').replace('-','_').replace('\n','') for line in data[0]]
 937
 938 def makenprof(prof, trans, deb=0) :
 939     nprof=[]
 940     if deb == 0 :
 941         nprof.append(prof[0])
 942     for i, val in enumerate(trans) :
 943         line = prof[deb+i+1][:]
 944         line[6] = val
 945         nprof.append(line)
 946     return nprof
 947
 948 def treatempty(val) :
 949     if val.strip() == '' :
 950         return '_'
 951     else :
 952         return val
 953
 954 def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) :
 955     nprof = {}
 956     lems = {}
 957     for i in range(len(dictprofile)) :
 958         prof = dictprofile[repr(i+1)]
 959         try :
 960             lenact = prof.index(['*****', '*', '*', '*', '*', '*', '', ''])
 961             lensup = -1
 962         except ValueError:
 963             try :
 964                 lenact = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
 965                 lensup = 0
 966             except ValueError:
 967                 lenact = len(prof)
 968                 lensup = 0
 969         try :
 970             lensup += prof.index(['*', '*', '*', '*', '*', '*', '', ''])
 971             lensup = lensup - lenact
 972         except ValueError:
 973             lensup += len(prof) - lenact
 974         if lenact != 0 :
 975             if lenact > maxword :
 976                 nlenact = maxword
 977             else :
 978                 nlenact = lenact
 979             actori = [line[6] for line in prof[1:nlenact]]
 980             act = [val.replace('_', ' ') for val in actori]
 981             act = gettranslation(act, lf, lt)
 982             for j, val in enumerate(actori) :
 983                 if act[j] not in lems :
 984                     lems[act[j]] = val
 985                 else :
 986                     while act[j] in lems :
 987                         act[j] = act[j] + "+"
 988                     lems[act[j]] = val
 989             nprof[repr(i+1)] = makenprof(prof, act)
 990
 991         if lensup != 0 :
 992             if lensup > maxword :
 993                 nlensup = maxword
 994             else :
 995                 nlensup = lensup
 996             supori = [line[6] for line in prof[(1+lenact):(lenact+nlensup)]]
 997             sup = [val.replace('_', ' ') for val in supori]
 998             sup = [treatempty(val) for val in sup]
 999             sup = gettranslation(sup, lf, lt)
1000             for j, val in enumerate(supori) :
1001                 if sup[j] not in lems :
1002                     lems[sup[j]] = val
1003                 else :
1004                     while sup[j] in lems :
1005                         sup[j] = sup[j] + "+"
1006                     lems[sup[j]] = val
1007             nprof[repr(i+1)].append(['*****', '*', '*', '*', '*', '*', '', ''])
1008             nprof[repr(i+1)] += makenprof(prof, sup, deb=lenact)
1009
1010         try :
1011             lenet = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
1012             nprof[repr(i+1)].append(['*', '*', '*', '*', '*', '*', '', ''])
1013             nprof[repr(i+1)] += prof[(lenet+1):]
1014         except :
1015             pass
1016     return nprof, lems
1017
1018 def write_translation_profile(prof, lems, language, dictpathout) :
1019     if os.path.exists(dictpathout['translations.txt']) :
1020         with open(dictpathout['translations.txt'], 'r', encoding='utf8') as f :
1021             translist = f.read()
1022         translist = [line.split('\t') for line in translist.splitlines()]
1023     else :
1024         translist = []
1025     toprint = []
1026     toprint.append(['','','','','',''])
1027     toprint.append(['***', 'nb classes', repr(len(prof)), '***', '', ''])
1028     for i in range(len(prof)) :
1029         toprint.append(['**', 'classe', repr(i+1), '**', '', ''])
1030         toprint.append(['****'] + prof[repr(i+1)][0] + ['****'])
1031         rest = [[repr(line[1]), repr(line[2]), repr(line[3]), repr(line[4]), line[6], line[7].replace('< 0,0001', '0.00009').replace('NS (','').replace(')','')] for line in prof[repr(i+1)][1:]]
1032         for i, line in enumerate(prof[repr(i+1)][1:]) :
1033             if line[0] == '*' :
1034                 rest[i] = ['*', '*', '*', '*', '*', '*']
1035             elif line[0] == '*****' :
1036                 rest[i] = ['*****','*','*', '*', '*', '*']
1037         toprint += rest
1038     with open(dictpathout['translation_profile_%s.csv' % language], 'w', encoding='utf8') as f :
1039         f.write('\n'.join([';'.join(line) for line in toprint]))
1040     with open(dictpathout['translation_words_%s.csv' % language], 'w', encoding='utf8') as f :
1041         f.write('\n'.join(['\t'.join([val, lems[val]]) for val in lems]))
1042     if 'translation_profile_%s.csv' % language not in [val[0] for val in translist] :
1043         translist.append(['translation_profile_%s.csv' % language, 'translation_words_%s.csv' % language])
1044         with open(dictpathout['translations.txt'], 'w', encoding='utf8') as f :
1045             f.write('\n'.join(['\t'.join(line) for line in translist]))
1046
1047 def makesentidict(infile, language) :
1048     with codecs.open(infile,'r', 'utf8') as f :
1049         content = f.read()
1050     content = [line.split('\t') for line in content.splitlines()]
1051     titles = content.pop(0)
1052     senti = ['Positive', 'Negative', 'Anger', 'Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust']
1053     sentid = {}
1054     for sent in senti :
1055         sentid[sent] = titles.index(sent)
1056     frtitle = [val for val in titles if '(fr)' in val]
1057     frid = titles.index(frtitle[0])
1058     sentidict = [[line[frid].lower(), [line[sentid[sent]] for sent in senti]] for line in content]
1059     pos = ['positive'] + [line[0] for line in sentidict if line[1][0] == '1']
1060     neg = ['negative'] + [line[0] for line in sentidict if line[1][1] == '1']
1061     anger = ['anger'] + [line[0] for line in sentidict if line[1][2] == '1']
1062     anticipation = ['anticipation'] + [line[0] for line in sentidict if line[1][3] == '1']
1063     disgust = ['disgust'] + [line[0] for line in sentidict if line[1][4] == '1']
1064     fear = ['fear'] + [line[0] for line in sentidict if line[1][5] == '1']
1065     joy = ['joy'] + [line[0] for line in sentidict if line[1][6] == '1']
1066     sadness = ['sadness'] + [line[0] for line in sentidict if line[1][7] == '1']
1067     surprise = ['surprise'] + [line[0] for line in sentidict if line[1][8] == '1']
1068     trust = ['trust'] + [line[0] for line in sentidict if line[1][9] == '1']
1069     with open('/tmp/tgenemo.csv', 'w') as f :
1070         for val in [pos, neg, anger, anticipation, disgust, fear, joy, sadness, surprise, trust] :
1071             f.write('\t'.join(val) + '\n')
1072
1073 def countsentfromprof(prof, encoding, sentidict) :
1074     with codecs.open(prof, 'r', encoding) as f :
1075         content = f.read()
1076     content = [line.split(';') for line in content.splitlines()]
1077     print(content)
1078     content = [[line[0], [int(val) for val in line[1:]]] for line in content]
1079     print(content)
1080     content = dict(content)
1081     print(content)
1082
1083 def iratolexico(infile, outfile, encoding) :
1084     with codecs.open(infile, 'r', encoding) as f :
1085         for line in f :
1086             if line.startswith('**** ') :
1087                 line = line.split()
1088