iramuteq.org Git - iramuteq/blob - functions.py

   1 # -*- coding: utf-8 -*-
   2 #Author: Pierre Ratinaud
   3 #Copyright (c) 2008-2020 Pierre Ratinaud
   4 #modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
   5 #License: GNU/GPL
   6
   7 #------------------------------------
   8 # import des modules python
   9 #------------------------------------
  10 import re
  11 from subprocess import Popen, call, PIPE
  12 import _thread
  13 import os
  14 import ast
  15 import sys
  16 import csv
  17 import platform
  18 import traceback
  19 import codecs
  20 import locale
  21 import datetime
  22 from copy import copy
  23 from shutil import copyfile
  24 import shelve
  25 import json
  26 #from dialog import BugDialog
  27 import logging
  28 from operator import itemgetter
  29
  30 #------------------------------------
  31 # import des modules wx
  32 #------------------------------------
  33 import wx
  34 import wx.adv
  35
  36 #------------------------------------
  37 # import des fichiers du projet
  38 #------------------------------------
  39 from configparser import ConfigParser
  40
  41
  42 log = logging.getLogger('iramuteq')
  43
  44
  45 indices_simi = ['cooccurrence' ,'pourcentage de cooccurrence','Russel','Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  46
  47 def open_folder(folder):
  48     if sys.platform == "win32":
  49         os.startfile(folder)
  50     else:
  51         opener ="open" if sys.platform == "darwin" else "xdg-open"
  52         #call([opener, folder])
  53         call(["%s %s &" % (opener, folder)], shell=True)
  54
  55 def normpath_win32(path) :
  56     if not sys.platform == 'win32' :
  57         return path
  58     while '\\\\' in path :
  59         path = path.replace('\\\\', '\\')
  60     if path.startswith('\\') and not path.startswith('\\\\') :
  61         path = '\\' + path
  62     return path
  63
  64 class TGen :
  65     def __init__(self, path = None, encoding = 'utf8'):
  66         self.path = path
  67         self.tgen = {}
  68         self.encoding = encoding
  69
  70     def __getitem__(self, key):
  71         return self.tgen[key]
  72
  73     def read(self, path = None):
  74         if path is None :
  75             path = self.path
  76         with codecs.open(path, 'r', self.encoding) as f :
  77             tgen = f.read()
  78         tgen = [line.split('\t') for line in tgen.splitlines()]
  79         tgen = dict([[line[0], line[1:]] for line in tgen])
  80         self.tgen = tgen
  81         self.path = path
  82
  83     def write(self, path = None):
  84         if path is None :
  85             path = self.path
  86         with open(path, 'w') as f :
  87             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]))
  88
  89     def writetable(self, pathout, tgens, totocc):
  90         etoiles = list(totocc.keys())
  91         etoiles.sort()
  92         with open(pathout, 'w') as f :
  93             line = '\t'.join(['tgens'] + etoiles) + '\n'
  94             f.write(line)
  95             for t in tgens :
  96                 line = '\t'.join([t] + [repr(tgens[t][et]) for et in etoiles]) + '\n'
  97                 f.write(line)
  98             i = 0
  99             totname = 'total'
 100             while totname + repr(i) in tgens :
 101                 i += 1
 102             totname = totname + repr(i)
 103             line = '\t'.join([totname] + [repr(totocc[et]) for et in etoiles]) + '\n'
 104             f.write(line)
 105
 106 class History :
 107     def __init__(self, filein, syscoding = 'utf8') :
 108         self.filein = filein
 109         self.syscoding = syscoding
 110         self.corpus = {}
 111         self.openedcorpus = {}
 112         self.openedmatrix = {}
 113         self.orph = []
 114         self.analyses = {}
 115         self.history = []
 116         self.opened = {}
 117         self.read()
 118
 119     def read(self) :
 120         d = shelve.open(self.filein)
 121         self.history = d.get('history', [])
 122         self.matrix = d.get('matrix', [])
 123         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 124         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 125         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 126         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 127         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 128         d.close()
 129
 130     def write(self) :
 131         d = shelve.open(self.filein)
 132         d['history'] = self.history
 133         d['matrix'] = self.matrix
 134         d.close()
 135
 136     def add(self, analyse) :
 137         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 138         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 139         if tosave['uuid'] in self.corpus :
 140             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 141             return
 142         if analyse.get('corpus', False) :
 143             if analyse['uuid'] in self.analyses :
 144                 return
 145             tosave['corpus'] = analyse['corpus']
 146             tosave['name'] = analyse['name']
 147             acorpus_uuid =  analyse['corpus']
 148             if acorpus_uuid in self.corpus :
 149                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 150                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 151                 else :
 152                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 153             else :
 154                 self.orph.append(tosave)
 155         else :
 156             tosave['corpus_name'] = analyse['corpus_name']
 157             #self.ordercorpus[tosave['uuid']] = len(history)
 158             #self.corpus[tosave['uuid']] = analyse
 159             self.history.append(tosave)
 160         self.write()
 161         self.read()
 162
 163     def addMatrix(self, analyse) :
 164         tosave = analyse
 165         #tosave['matrix_name'] = analyse['matrix_name']
 166         tosave['analyses'] = []
 167         self.matrix.append(tosave)
 168         self.write()
 169         self.read()
 170
 171     def addMatrixAnalyse(self, analyse) :
 172         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 173         tosave['name'] = analyse['name']
 174         if tosave['matrix'] in self.ordermatrix :
 175             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 176         self.write()
 177         self.read()
 178
 179     def addmultiple(self, analyses) :
 180         log.info('add multiple')
 181         for analyse in analyses :
 182             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 183             corpus = analyse['corpus']
 184             tosave['corpus'] = corpus
 185             tosave['name'] = analyse['name']
 186             if corpus in self.corpus :
 187                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 188                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 189                 else :
 190                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 191         self.write()
 192         self.read()
 193
 194     def delete(self, analyse, corpus = False) :
 195         log.info('delete %s' % analyse.get('name', 'noname'))
 196         if corpus :
 197             self.history.pop(self.ordercorpus[analyse['uuid']])
 198             if analyse['uuid'] in self.openedcorpus :
 199                 del self.openedcorpus[analyse['uuid']]
 200             log.info('delete corpus : %s' % analyse['uuid'])
 201         elif analyse['uuid'] in self.analyses :
 202             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 203             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 204         elif analyse['uuid'] in self.matrixanalyse :
 205             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 206         elif analyse.get('matrix', False) in self.matrixanalyse :
 207             analyses = self.matrix[self.ordermatrix[analyse['matrix']]]['analyses']
 208             topop = [i for i, val in enumerate(analyses) if analyse['uuid'] == val['uuid']][0]
 209             analyses.pop(topop)
 210             self.matrix[self.ordermatrix[analyse['matrix']]]['analyses'] = analyses
 211         self.write()
 212         self.read()
 213
 214     def addtab(self, analyse) :
 215         self.opened[analyse['uuid']] = analyse
 216
 217     def rmtab(self, analyse) :
 218         del self.opened[analyse['uuid']]
 219
 220     def update(self, analyse) :
 221         if 'matrix_name' in analyse :
 222             self.matrixanalyse[analyse['uuid']].update(analyse)
 223         elif 'corpus_name' in analyse :
 224             self.corpus[analyse['uuid']].update(analyse)
 225         elif 'corpus' in analyse :
 226             self.analyses[analyse['uuid']].update(analyse)
 227         else :
 228             toupdate = [an for an in self.matrixanalyse[analyse['matrix']]['analyses'] if an['uuid'] == analyse['uuid']]
 229             toupdate[0].update(analyse)
 230         self.write()
 231         self.read()
 232
 233     def clean(self) :
 234         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 235         print(corpustodel)
 236         for corpus in corpustodel :
 237             print('cleaning :', corpus['corpus_name'])
 238             self.delete(corpus, corpus = True)
 239         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 240         for analyse in anatodel :
 241             print('cleaning :', analyse['name'])
 242             self.delete(analyse)
 243
 244     def dostat(self):
 245         todel = {}
 246         tokens = 0
 247         corpusnb = {}
 248         subnb = 0
 249         analysenb = 0
 250         hours = 0
 251         minutes = 0
 252         secondes = 0
 253         ha = 0
 254         ma = 0
 255         sa = 0
 256         for corpus in self.history :
 257             analysenb += len(corpus.get('analyses', []))
 258             analyses = corpus.get('analyses', [])
 259             for analyse in analyses :
 260                 if os.path.exists(analyse['ira']) :
 261                     ana = DoConf(analyse['ira']).getoptions()
 262                     if 'time' in ana :
 263                         time = ana['time'].split()
 264                         ha += int(time[0].replace('h','')) * 3600
 265                         ma += int(time[1].replace('m','')) * 60
 266                         sa += int(time[2].replace('s',''))
 267             if os.path.exists(corpus['ira']) :
 268                 param = DoConf(corpus['ira']).getoptions()
 269                 time = param.get('time','0h 0m 0s')
 270                 time = time.split()
 271                 hours += int(time[0].replace('h','')) * 3600
 272                 minutes += int(time[1].replace('m','')) * 60
 273                 secondes += int(time[2].replace('s',''))
 274                 if param.get('originalpath', False) :
 275                     if param['originalpath'] in corpusnb :
 276                         corpusnb[param['originalpath']] += 1
 277                         tokens += int(param['occurrences'])
 278                     else :
 279                         corpusnb[param['originalpath']] = 1
 280                     #print param
 281                 else :
 282                     subnb += 1
 283             else :
 284                 if corpus['ira'] in todel :
 285                     todel['ira'] += 1
 286                 else :
 287                     todel['ira'] = 1
 288         print('Nbr total de corpus : %s' % len(self.history))
 289         corpus_nb = len(corpusnb) + len(todel)
 290         print('Nbr de corpus différents : %s' % corpus_nb)
 291         lentodel = len(todel)
 292         print('Nbr de corpus à supprimer : %s' % lentodel)
 293         print('Nbr de sous corpus : %s' % subnb)
 294         print("Nbr total d'occurrences : %s" % tokens)
 295         print('Moyenne occurrences par corpus : %f' % (tokens/corpus_nb))
 296         print('---------------------')
 297         print("Nbr total d'analyses : %s" % analysenb)
 298         print('Temps total indexation : %f h' % ((hours+minutes+secondes) / 3600))
 299         print('Temps total analyses :  %f h' % ((ha+ma+sa) / 3600))
 300
 301     def __str__(self) :
 302         return str(self.history)
 303
 304 class DoConf :
 305     def __init__(self, configfile=None, diff = None, parametres = None) :
 306         self.configfile = configfile
 307         self.conf = ConfigParser(interpolation=None) # pourquoi ce paramètre ???
 308
 309         if configfile is not None :
 310             configfile = normpath_win32(configfile)
 311             self.conf.read_file(codecs.open(configfile, 'r', 'utf8'))
 312         self.parametres = {}
 313         if parametres is not None :
 314             self.doparametres(parametres)
 315
 316     def doparametres(self, parametres) :
 317         return parametres
 318
 319     def getsections(self) :
 320         return self.conf.sections()
 321
 322     def getoptions(self, section = None, diff = None):
 323         parametres = {}
 324         if section is None :
 325             section = self.conf.sections()[0]
 326         for option in self.conf.options(section) :
 327             if self.conf.get(section, option).isdigit() :
 328                 parametres[option] = int(self.conf.get(section, option))
 329             elif self.conf.get(section, option) == 'False' :
 330                 parametres[option] = False
 331             elif self.conf.get(section, option) == 'True' :
 332                 parametres[option] = True
 333             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 334                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 335             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 336                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 337             else :
 338                 parametres[option] = self.conf.get(section, option)
 339         if 'type' not in parametres :
 340             parametres['type'] = section
 341         return parametres
 342
 343     def makeoptions(self, sections, parametres, outfile = None) :
 344         txt = ''
 345         for i, section in enumerate(sections) :
 346             txt += '[%s]\n' % section
 347             if not self.conf.has_section(section) :
 348                 self.conf.add_section(section)
 349             for option in parametres[i] :
 350                 if isinstance(parametres[i][option], int) :
 351                     self.conf.set(section, option, repr(parametres[i][option]))
 352                     txt += '%s = %i\n' % (option, parametres[i][option])
 353                 elif isinstance(parametres[i][option], str) :
 354                     self.conf.set(section, option, parametres[i][option])
 355                     txt += '%s = %s\n' % (option, parametres[i][option])
 356                 elif isinstance(parametres[i][option], wx.Colour) :
 357                     self.conf.set(section, option, str(parametres[i][option]))
 358                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 359                 elif option == 'analyses' :
 360                     pass
 361                 else :
 362                     self.conf.set(section, option, repr(parametres[i][option]))
 363                     txt += '%s = %s\n' % (option, repr(parametres[i][option]))
 364         if outfile is None :
 365             outfile = self.configfile
 366         outfile = normpath_win32(outfile)
 367         with open(outfile, 'w', encoding="utf-8") as f :
 368             f.write(txt)
 369             #self.conf.write(f)
 370
 371     def totext(self, parametres) :
 372         #txt = ['Corpus']
 373         txt = []
 374         for val in parametres :
 375             if isinstance(parametres[val], int) :
 376                 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
 377             elif isinstance(parametres[val], str) :
 378                 txt.append(' \t\t: '.join([val, parametres[val]]))
 379             elif val in ['listet', 'stars'] :
 380                 pass
 381             else :
 382                 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
 383         return '\n'.join(txt)
 384
 385
 386 def write_tab(tab, fileout) :
 387         csvWriter = csv.writer(open(fileout, 'w'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 388         csvWriter.writerows(tab)
 389
 390 class BugDialog(wx.Dialog):
 391     def __init__(self, *args, **kwds):
 392         # begin wxGlade: MyDialog.__init__
 393         kwds["style"] = wx.DEFAULT_DIALOG_STYLE | wx.STAY_ON_TOP
 394         kwds["size"] = wx.Size(500, 200)
 395         wx.Dialog.__init__(self, *args, **kwds)
 396         self.SetTitle(kwds['title'])
 397         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 398         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 399         self.button_1 = wx.Button(self, wx.ID_OK, "")
 400
 401         self.__set_properties()
 402         self.__do_layout()
 403         # end wxGlade
 404
 405     def __set_properties(self):
 406         # begin wxGlade: MyDialog.__set_properties
 407         self.SetMinSize(wx.Size(500, 200))
 408         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 409
 410         # end wxGlade
 411
 412     def __do_layout(self):
 413         # begin wxGlade: MyDialog.__do_layout
 414         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 415         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 416         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 417         self.SetSizer(sizer_1)
 418         sizer_1.Fit(self)
 419         self.Layout()
 420
 421
 422 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 423     AnalyseConf = ConfigParser()
 424     AnalyseConf.read(DictPathOut['ira'])
 425     AnalyseConf.add_section(section)
 426     date = datetime.datetime.now().ctime()
 427     AnalyseConf.set(section, 'date', str(date))
 428     AnalyseConf.set(section, 'clusternb', clusternb)
 429     AnalyseConf.set(section, 'corpus_name', corpname)
 430
 431     fileout = open(DictPathOut['ira'], 'w')
 432     AnalyseConf.write(fileout)
 433     fileout.close()
 434
 435 def multisort(liste2d, ordre, indices_tri):
 436
 437     """
 438     methode destinée à remplacer 'comp' qui a disparu en Python 3
 439         tri de tuples sur l'un des éléments du tuple
 440         en principe, elle doit renvoyer les éléments triés selon le principe d'avant
 441         tel que décrit dans la docstring de 'sortedby'
 442
 443         probablement à améliorer pour la rendre d'usage plus général
 444         en acceptant un nombre variable de parametres ???
 445     """
 446
 447     indices_triTuple = indices_tri.Tuple(int, ...)
 448     for key in reversed(indices_tri):
 449         liste2d.sort(key=attrgetter(key), reverse=ordre)
 450     return liste2d
 451
 452 def sortedby(liste2d, direct, *indices):
 453
 454     """
 455         sortedby: sort a list of lists (e.g. a table) by one or more indices
 456                   (columns of the table) and return the sorted list
 457
 458         e.g.
 459          for list = [[2,3],[1,2],[3,1]]:
 460          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 461          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 462
 463          elle n'est pas remplacée par la méthode 'multisort' ???
 464
 465     """
 466
 467 # iramuteq original
 468 #    nlist = map(lambda x, indices=indices:
 469 #                 map(lambda i, x=x: x[i], indices) + [x],
 470 #                 list)
 471
 472 # iramuteq passé à 2to3
 473 #    nlist = list(map(lambda x, indices=indices:
 474 #                 list(map(lambda i, x=x: x[i], indices)) + [x],
 475 #                 liste2d))
 476
 477     for key in reversed(indices):
 478         liste2d.sort(key=itemgetter(key), reverse=(direct==2))
 479     return liste2d
 480
 481
 482 #    if direct == 1:
 483 #        nlist.sort()
 484 #         sorted_list = multisort(liste2d, direct, *indices)
 485
 486 #    elif direct == 2:
 487 #        nlist.sort(reverse=True)
 488 #         sorted_list = multisort(liste2d, direct, *indices)
 489
 490 #    return [l[-1] for l in nlist]
 491 #    return sorted_list
 492
 493 def add_type(line, dictlem):
 494     if line[4] in dictlem:
 495         line.append(dictlem[line[4]])
 496     else :
 497         line.append('')
 498     return line
 499
 500 def treat_line_alceste(i, line) :
 501     if line[0] == '*' or line[0] == '*****' :
 502         return line + ['']
 503     if line[5] == 'NA':
 504         print('NA', line[5])
 505         pass
 506     elif float(line[5].replace(',', '.')) < 0.0001:
 507         line[5] = '< 0,0001'
 508     elif float(line[5].replace(',', '.')) > 0.05:
 509         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 510     else:
 511         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 512     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 513
 514 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 515     dictlem = {}
 516     print('lecture des profiles')
 517     FileReader = codecs.open(File, 'r', encoding)
 518     Filecontent = FileReader.readlines()
 519     FileReader.close()
 520     DictProfile = {}
 521     count = 0
 522     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 523     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 524     rows.pop(0)
 525     ClusterNb = rows[0][2]
 526     rows.pop(0)
 527     clusters = [row[2] for row in rows if row[0] == '**']
 528     valclusters = [row[1:4] for row in rows if row[0] == '****']
 529     lp = [i for i, line in enumerate(rows) if line[0] == '****']
 530     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 531     if Alceste :
 532         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 533         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 534     else :
 535         prof = [[line + [''] for line in pr] for pr in prof]
 536         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 537     for i, cluster in enumerate(clusters):
 538         DictProfile[cluster] = [valclusters[i]] + prof[i]
 539     return DictProfile
 540
 541 def GetTxtProfile(dictprofile, cluster_size) :
 542     proflist = []
 543     for classe in range(0, len(dictprofile)) :
 544         prof = dictprofile[str(classe + 1)]
 545         clinfo = cluster_size[classe]
 546         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 547     return '\n\n'.join(proflist)
 548
 549 def formatExceptionInfo(maxTBlevel=5):
 550     cla, exc, trbk = sys.exc_info()
 551     try :
 552         excName = cla.__name__
 553     except :
 554         excName = 'None'
 555     try:
 556         excArgs = exc.args[0]
 557     except :
 558         excArgs = "<no args>"
 559     excTb = traceback.format_tb(trbk, maxTBlevel)
 560     return (excName, excArgs, excTb)
 561
 562
 563 #fonction des etudiants de l'iut
 564 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 565     """
 566         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 567         Si on trouve un '$', c'est fini.
 568         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 569     """
 570     separateurs = [['.', 60.0], ['?', 60.0], ['!', 60.0], ['£$£', 60], [':', 50.0], [';', 40.0], [',', 10.0], [' ', 0.1]]
 571     trouve = False                 # si on a trouvé un bon séparateur
 572     iDecoupe = 0                # indice du caractere ou il faut decouper
 573
 574     # on découpe la chaine pour avoir au maximum 240 caractères
 575     longueur = min(longueur, len(chaine) - 1)
 576     chaineTravail = chaine[:longueur + 1]
 577     nbCar = longueur
 578     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 579
 580     # on vérifie si on ne trouve pas un '$'
 581     indice = chaineTravail.find('$')
 582     if indice > -1:
 583         trouve = True
 584         iDecoupe = indice
 585
 586     # si on ne trouve rien, on cherche le meilleur séparateur
 587     if not trouve:
 588         while nbCar >= 0:
 589             caractere = chaineTravail[nbCar]
 590             distance = abs(longueurOptimale - nbCar) + 1
 591             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 592
 593             # on vérifie si le caractére courant est une marque de ponctuation
 594             for s in separateurs:
 595                 if caractere == s[0]:
 596                     # si c'est une ponctuation
 597
 598                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 599                         # print nbCar, s[0]
 600                         meilleur[0] = s[0]
 601                         meilleur[1] = s[1]
 602                         meilleur[2] = nbCar
 603                         trouve = True
 604                         iDecoupe = nbCar
 605
 606                     # et on termine la recherche
 607                     break
 608
 609             # on passe au caractère précédant
 610             nbCar = nbCar - 1
 611
 612     # si on a trouvé
 613     if trouve:
 614         fin = chaine[iDecoupe + 1:]
 615         retour = chaineTravail[:iDecoupe]
 616         return len(retour) > 0, retour.split(), fin
 617     # si on a rien trouvé
 618     return False, chaine.split(), ''
 619
 620
 621 exceptions = {'paragrapheOT' : "Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 622               'EmptyText' : "Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 623               'CorpusEncoding' : "Problème d'encodage.",
 624               'TextBeforeTextMark' : "Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 625               'MissingAnalyse' : 'Aucun fichier à cet emplacement :\n',
 626 }
 627
 628 def BugReport(parent, error = None):
 629     for ch in parent.GetChildren():
 630         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 631             ch.Destroy()
 632     excName, exc, excTb = formatExceptionInfo()
 633     if excName == 'Exception' :
 634         print(exc)
 635         if len(exc.split()) == 2 :
 636             mss, linenb = exc.split()
 637             if mss in exceptions :
 638                 txt = exceptions[mss] + linenb
 639             else :
 640                 txt = exc
 641         else :
 642             if exc in exceptions :
 643                 txt = exceptions[exc]
 644             else :
 645                 txt = exc
 646         title = "Information"
 647     else :
 648         txt = '\n            !== BUG ==!       \n'
 649         txt += '*************************************\n'
 650         txt += '\n'.join(excTb).replace('    ', ' ')
 651         txt += excName + '\n'
 652         txt += repr(exc)
 653         title = "Bug"
 654
 655     dial = BugDialog(parent, **{'title' : title})
 656     if 'Rerror' in dir(parent) :
 657         txt += parent.Rerror
 658         parent.Rerror = ''
 659     log.info(txt)
 660     dial.text_ctrl_1.write(txt)
 661     dial.CenterOnParent()
 662     dial.ShowModal()
 663     dial.Destroy()
 664
 665 def PlaySound(parent):
 666     if parent.pref.getboolean('iramuteq', 'sound') :
 667         try:
 668             if "gtk2" in wx.PlatformInfo:
 669                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 670             else :
 671                 sound = wx.adv.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 672                 sound.Play(wx.adv.SOUND_SYNC)
 673         except :
 674             print('pas de son')
 675
 676 def ReadDicoAsDico(dicopath):
 677     with codecs.open(dicopath, 'r', 'UTF8') as f:
 678         content = f.readlines()
 679     lines = [line.rstrip('\n\r').replace('\n', '').replace('"', '').split('\t') for line in content if line != '']
 680     return dict([[line[0], line[1:]] for line in lines])
 681
 682 def ReadLexique(parent, lang = 'french', filein = None):
 683     if lang != 'other' :
 684         if filein is None :
 685             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 686         else :
 687             parent.lexique = ReadDicoAsDico(filein)
 688     else :
 689         if filein is None :
 690             parent.lexique = {}
 691         else :
 692             parent.lexique = ReadDicoAsDico(filein)
 693
 694 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 695     #file = open(filein)
 696     with codecs.open(filein, 'r', encoding) as f :
 697         content = f.read()
 698     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 699     #file = codecs.open(filein, 'r', encoding)
 700     #content = file.readlines()
 701     #file.close()
 702     first = content.pop(0)
 703     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 704     dict = {}
 705     i = 0
 706     for line in content:
 707         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 708         #line = line.split(';')
 709         nline = [line[0]]
 710         for val in line[1:]:
 711             if val == 'NA' :
 712                 don = ''
 713             else:
 714                 try:
 715                     don = int(val)
 716                 except:
 717                     don = float('%.5f' % float(val))
 718             nline.append(don)
 719         dict[i] = nline
 720         i += 1
 721     return dict, first
 722
 723 def exec_RCMD(rpath, command) :
 724     log.info('R CMD INSTALL %s' % command)
 725     rpath = rpath.replace('\\','\\\\')
 726     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 727     return error
 728
 729 def exec_rcode(rpath, rcode, wait = True, graph = False):
 730     log.info("R Script : %s" % rcode)
 731     needX11 = False
 732     if sys.platform == 'darwin' :
 733         try :
 734             macversion = platform.mac_ver()[0].split('.')
 735             if int(macversion[1]) < 5 :
 736                 needX11 = True
 737             else :
 738                 needX11 = False
 739         except :
 740             needX11 = False
 741     rpath = rpath.replace('\\','\\\\')
 742     env = os.environ.copy()
 743     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 744         env['LC_ALL'] = 'en_US.UTF-8'
 745     if not graph :
 746         if wait :
 747             if sys.platform == 'win32':
 748                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 749             else :
 750                 error = call([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], env = env)
 751             return error
 752         else :
 753             if sys.platform == 'win32':
 754                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 755             else :
 756                 pid = Popen([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8') #PIPE ou STDOUT ?
 757             return pid
 758     else :
 759         if wait :
 760             if sys.platform == 'win32':
 761                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 762             elif sys.platform == 'darwin' and needX11:
 763                 os.environ['DISPLAY'] = ':0.0'
 764                 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
 765             else :
 766                 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
 767             return error
 768         else :
 769             if sys.platform == 'win32':
 770                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 771             elif sys.platform == 'darwin' and needX11:
 772                 os.environ['DISPLAY'] = ':0.0'
 773                 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
 774             else :
 775                 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
 776             return pid
 777
 778 def check_Rresult(parent, pid) :
 779     if isinstance(pid, Popen) :
 780         if pid.returncode != 0 :
 781             error = pid.communicate()
 782             error = [str(error[0]), error[1]]
 783             if error[1] is None :
 784                 error[1] = 'None'
 785             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 786             try :
 787                 raise Exception('\n'.join(['Erreur R', '\n'.join(error[1:])]))
 788             except :
 789                 BugReport(parent)
 790             return False
 791         else :
 792             return True
 793     else :
 794         if pid != 0 :
 795             try :
 796                 raise Exception('Erreur R')
 797             except :
 798                 BugReport(parent)
 799             return False
 800         else :
 801             return True
 802
 803
 804 def launchcommand(mycommand):
 805     Popen(mycommand)
 806
 807 def print_liste(filename,liste):
 808     with open(filename,'w') as f :
 809         for graph in liste :
 810             f.write(';'.join(graph) +'\n')
 811 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 812     with codecs.open(filename,'r', encoding) as f:
 813         content=f.readlines()
 814         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 815     return ncontent
 816
 817 def progressbar(self, maxi):
 818     ira = wx.GetApp().GetTopWindow()
 819     parent = ira
 820     try:
 821         maxi = int(maxi)
 822     except:
 823         maxi = 1
 824     prog = wx.ProgressDialog("Traitements",
 825                              "Veuillez patienter...",
 826                              maximum=maxi,
 827                              parent=parent,
 828                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 829                              )
 830                              # parent ???
 831     # le ABORT n'est pas géré à tous les coups ???
 832     prog.SetSize((400,150))
 833     #prog.SetIcon(ira._icon)
 834     return prog
 835
 836 def treat_var_mod(variables) :
 837     var_mod = {}
 838     variables = list(set(variables))
 839     varmod = [variable.split('_') for variable in variables]
 840     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 841     for var in vars :
 842         mods = ['_'.join(v) for v in varmod if v[0] == var]
 843         var_mod[var] = mods
 844
 845 #     for variable in variables :
 846 #         if '_' in variable :
 847 #             forme = variable.split('_')
 848 #             var = forme[0]
 849 #             mod = forme[1]
 850 #             if not var in var_mod :
 851 #                 var_mod[var] = [variable]
 852 #             else :
 853 #                 if not mod in var_mod[var] :
 854 #                     var_mod[var].append(variable)
 855     return var_mod
 856
 857 def doconcorde(corpus, uces, mots, uci = False) :
 858     if not uci :
 859         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 860     else :
 861         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 862     ucestxt1 = dict(ucestxt1)
 863     ucestxt = []
 864     ucis_txt = []
 865     listmot = [corpus.getlems()[lem].formes for lem in mots]
 866     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 867     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 868     dmots = dict(list(zip(listmot, mothtml)))
 869     for uce in uces :
 870         ucetxt = ucestxt1[uce].split()
 871         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 872         if not uci :
 873             uciid = corpus.getucefromid(uce).uci
 874             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '<a href="%i_%i"> *%i_%i</a></b></p>' % (uciid, uce, uciid, uce))
 875         else :
 876             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 877         ucestxt.append(ucetxt)
 878     return ucis_txt, ucestxt
 879
 880
 881 def getallstcarac(corpus, analyse) :
 882    pathout = PathOut(analyse['ira'])
 883    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 884    print(profils)
 885
 886 def read_chd(filein, fileout):
 887     with open(filein, 'r') as f :
 888         content = f.read()
 889     #content = [line[3:].replace('"',"").replace(' ','') for line in content.splitlines()]
 890     content = [line.split('\t') for line in content.splitlines()]
 891     chd = {'name':1, 'children':[]}
 892     mere={}
 893     for i, line in enumerate(content) :
 894         if i == 0 :
 895             chd['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 896             mere[line[1]] = chd['children'][0]
 897             mere[line[2]] = chd['children'][1]
 898         elif not i % 2 :
 899             if 'children' in mere[line[0]]:
 900                 mere[line[0]]['children'].append({'name': line[1],'size' : content[i+1][0]})
 901                 mere[line[1]] = mere[line[0]]['children'][-1]
 902                 mere[line[0]]['children'].append({'name': line[2],'size' : content[i+1][1]})
 903                 mere[line[2]] = mere[line[0]]['children'][-1]
 904             else :
 905                 mere[line[0]]['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 906                 mere[line[1]] = mere[line[0]]['children'][-2]
 907                 mere[line[2]] = mere[line[0]]['children'][-1]
 908     with open(fileout, 'w') as f :
 909         f.write(json.dumps(chd))
 910
 911
 912 translation_languages = {"Afrikaans":"af", "Albanian":"sq", "Amharic":"am", "Arabic":"ar", "Armenian":"hy", "Azeerbaijani":"az", "Basque":"eu", "Belarusian":"be", "Bengali":"bn", "Bosnian":"bs", "Bulgarian":"bg", "Catalan":"ca", "Cebuano":"ceb", "Chichewa":"ny", "Chinese (Simplified)":"zh-CN", "Chinese (Traditional)":"zh-TW", "Corsican":"co", "Croatian":"hr", "Czech":"cs", "Danish":"da", "Dutch":"nl", "English":"en", "Esperanto":"eo", "Estonian":"et", "Filipino":"tl", "Finnish":"fi", "French":"fr", "Frisian":"fy", "Galician":"gl", "Georgian":"ka", "German":"de", "Greek":"el", "Gujarati":"gu", "Haitian Creole":"ht", "Hausa":"ha", "Hawaiian":"haw", "Hebrew":"iw", "Hindi":"hi", "Hmong":"hmn ", "Hungarian":"hu", "Icelandic":"is", "Igbo":"ig", "Indonesian":"id", "Irish":"ga", "Italian":"it", "Japanese":"ja", "Javanese":"jw", "Kannada":"kn", "Kazakh":"kk", "Khmer":"km", "Korean":"ko", "Kurdish":"ku", "Kyrgyz":"ky", "Lao":"lo", "Latin":"la", "Latvian":"lv", "Lithuanian":"lt", "Luxembourgish":"lb", "Macedonian":"mk", "Malagasy":"mg", "Malay":"ms", "Malayalam":"ml", "Maltese":"mt", "Maori":"mi", "Marathi":"mr", "Mongolian":"mn", "Burmese":"my", "Nepali":"ne", "Norwegian":"no", "Pashto":"ps", "Persian":"fa", "Polish":"pl", "Portuguese":"pt", "Punjabi":"ma", "Romanian":"ro", "Russian":"ru", "Samoan":"sm", "Scots Gaelic":"gd", "Serbian":"sr", "Sesotho":"st", "Shona":"sn", "Sindhi":"sd", "Sinhala":"si", "Slovak":"sk", "Slovenian":"sl", "Somali":"so", "Spanish":"es", "Sundanese":"su", "Swahili":"sw", "Swedish":"sv", "Tajik":"tg", "Tamil":"ta", "Telugu":"te", "Thai":"th", "Turkish":"tr", "Ukrainian":"uk", "Urdu":"ur", "Uzbek":"uz", "Vietnamese":"vi", "Welsh":"cy", "Xhosa":"xh", "Yiddish":"yi", "Yoruba":"yo", "Zulu":"zu", }
 913
 914
 915 def gettranslation(words, lf, lt) :
 916     import urllib.request, urllib.error, urllib.parse
 917     import json
 918     agent = {'User-Agent':
 919     "Mozilla/4.0 (\
 920     compatible;\
 921     MSIE 6.0;\
 922     Windows NT 5.1;\
 923     SV1;\
 924     .NET CLR 1.1.4322;\
 925     .NET CLR 2.0.50727;\
 926     .NET CLR 3.0.04506.30\
 927     )"}
 928     base_link = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=%s&tl=%s&dt=t&q=%s"
 929     print(len(words))
 930     totrans = urllib.parse.quote('\n'.join(words))
 931     link = base_link % (lf, lt, totrans)
 932     request = urllib.request.Request(link, headers=agent)
 933     raw_data = urllib.request.urlopen(request).read()
 934     data = json.loads(raw_data)
 935     return [line[0].replace("'", '_').replace(' | ', '|').replace(' ', '_').replace('-','_').replace('\n','') for line in data[0]]
 936
 937 def makenprof(prof, trans, deb=0) :
 938     nprof=[]
 939     if deb == 0 :
 940         nprof.append(prof[0])
 941     for i, val in enumerate(trans) :
 942         line = prof[deb+i+1][:]
 943         line[6] = val
 944         nprof.append(line)
 945     return nprof
 946
 947 def treatempty(val) :
 948     if val.strip() == '' :
 949         return '_'
 950     else :
 951         return val
 952
 953 def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) :
 954     nprof = {}
 955     lems = {}
 956     for i in range(len(dictprofile)) :
 957         prof = dictprofile[repr(i+1)]
 958         try :
 959             lenact = prof.index(['*****', '*', '*', '*', '*', '*', '', ''])
 960             lensup = -1
 961         except ValueError:
 962             try :
 963                 lenact = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
 964                 lensup = 0
 965             except ValueError:
 966                 lenact = len(prof)
 967                 lensup = 0
 968         try :
 969             lensup += prof.index(['*', '*', '*', '*', '*', '*', '', ''])
 970             lensup = lensup - lenact
 971         except ValueError:
 972             lensup += len(prof) - lenact
 973         if lenact != 0 :
 974             if lenact > maxword :
 975                 nlenact = maxword
 976             else :
 977                 nlenact = lenact
 978             actori = [line[6] for line in prof[1:nlenact]]
 979             act = [val.replace('_', ' ') for val in actori]
 980             act = gettranslation(act, lf, lt)
 981             for j, val in enumerate(actori) :
 982                 if act[j] not in lems :
 983                     lems[act[j]] = val
 984                 else :
 985                     while act[j] in lems :
 986                         act[j] = act[j] + "+"
 987                     lems[act[j]] = val
 988             nprof[repr(i+1)] = makenprof(prof, act)
 989
 990         if lensup != 0 :
 991             if lensup > maxword :
 992                 nlensup = maxword
 993             else :
 994                 nlensup = lensup
 995             supori = [line[6] for line in prof[(1+lenact):(lenact+nlensup)]]
 996             sup = [val.replace('_', ' ') for val in supori]
 997             sup = [treatempty(val) for val in sup]
 998             sup = gettranslation(sup, lf, lt)
 999             for j, val in enumerate(supori) :
1000                 if sup[j] not in lems :
1001                     lems[sup[j]] = val
1002                 else :
1003                     while sup[j] in lems :
1004                         sup[j] = sup[j] + "+"
1005                     lems[sup[j]] = val
1006             nprof[repr(i+1)].append(['*****', '*', '*', '*', '*', '*', '', ''])
1007             nprof[repr(i+1)] += makenprof(prof, sup, deb=lenact)
1008
1009         try :
1010             lenet = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
1011             nprof[repr(i+1)].append(['*', '*', '*', '*', '*', '*', '', ''])
1012             nprof[repr(i+1)] += prof[(lenet+1):]
1013         except :
1014             pass
1015     return nprof, lems
1016
1017 def write_translation_profile(prof, lems, language, dictpathout) :
1018     if os.path.exists(dictpathout['translations.txt']) :
1019         with codecs.open(dictpathout['translations.txt'], 'r', 'utf8') as f :
1020             translist = f.read()
1021         translist = [line.split('\t') for line in translist.splitlines()]
1022     else :
1023         translist = []
1024     toprint = []
1025     toprint.append(['','','','','',''])
1026     toprint.append(['***', 'nb classes', repr(len(prof)), '***', '', ''])
1027     for i in range(len(prof)) :
1028         toprint.append(['**', 'classe', repr(i+1), '**', '', ''])
1029         toprint.append(['****'] + prof[repr(i+1)][0] + ['****'])
1030         rest = [[repr(line[1]), repr(line[2]), repr(line[3]), repr(line[4]), line[6], line[7].replace('< 0,0001', '0.00009').replace('NS (','').replace(')','')] for line in prof[repr(i+1)][1:]]
1031         for i, line in enumerate(prof[repr(i+1)][1:]) :
1032             if line[0] == '*' :
1033                 rest[i] = ['*', '*', '*', '*', '*', '*']
1034             elif line[0] == '*****' :
1035                 rest[i] = ['*****','*','*', '*', '*', '*']
1036         toprint += rest
1037     with open(dictpathout['translation_profile_%s.csv' % language], 'w') as f :
1038         f.write('\n'.join([';'.join(line) for line in toprint]))
1039     with open(dictpathout['translation_words_%s.csv' % language], 'w') as f :
1040         f.write('\n'.join(['\t'.join([val, lems[val]]) for val in lems]))
1041     if 'translation_profile_%s.csv' % language not in [val[0] for val in translist] :
1042         translist.append(['translation_profile_%s.csv' % language, 'translation_words_%s.csv' % language])
1043         with open(dictpathout['translations.txt'], 'w') as f :
1044             f.write('\n'.join(['\t'.join(line) for line in translist]))
1045
1046 def makesentidict(infile, language) :
1047     with codecs.open(infile,'r', 'utf8') as f :
1048         content = f.read()
1049     content = [line.split('\t') for line in content.splitlines()]
1050     titles = content.pop(0)
1051     senti = ['Positive', 'Negative', 'Anger', 'Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust']
1052     sentid = {}
1053     for sent in senti :
1054         sentid[sent] = titles.index(sent)
1055     frtitle = [val for val in titles if '(fr)' in val]
1056     frid = titles.index(frtitle[0])
1057     sentidict = [[line[frid].lower(), [line[sentid[sent]] for sent in senti]] for line in content]
1058     pos = ['positive'] + [line[0] for line in sentidict if line[1][0] == '1']
1059     neg = ['negative'] + [line[0] for line in sentidict if line[1][1] == '1']
1060     anger = ['anger'] + [line[0] for line in sentidict if line[1][2] == '1']
1061     anticipation = ['anticipation'] + [line[0] for line in sentidict if line[1][3] == '1']
1062     disgust = ['disgust'] + [line[0] for line in sentidict if line[1][4] == '1']
1063     fear = ['fear'] + [line[0] for line in sentidict if line[1][5] == '1']
1064     joy = ['joy'] + [line[0] for line in sentidict if line[1][6] == '1']
1065     sadness = ['sadness'] + [line[0] for line in sentidict if line[1][7] == '1']
1066     surprise = ['surprise'] + [line[0] for line in sentidict if line[1][8] == '1']
1067     trust = ['trust'] + [line[0] for line in sentidict if line[1][9] == '1']
1068     with open('/tmp/tgenemo.csv', 'w') as f :
1069         for val in [pos, neg, anger, anticipation, disgust, fear, joy, sadness, surprise, trust] :
1070             f.write('\t'.join(val) + '\n')
1071
1072 def countsentfromprof(prof, encoding, sentidict) :
1073     with codecs.open(prof, 'r', encoding) as f :
1074         content = f.read()
1075     content = [line.split(';') for line in content.splitlines()]
1076     print(content)
1077     content = [[line[0], [int(val) for val in line[1:]]] for line in content]
1078     print(content)
1079     content = dict(content)
1080     print(content)
1081
1082 def iratolexico(infile, outfile, encoding) :
1083     with codecs.open(infile, 'r', encoding) as f :
1084         for line in f :
1085             if line.startswith('**** ') :
1086                 line = line.split()
1087