iramuteq.org Git - iramuteq/blob - functions.py

   1 # -*- coding: utf-8 -*-
   2 #Author: Pierre Ratinaud
   3 #Copyright (c) 2008-2020 Pierre Ratinaud
   4 #modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
   5 #License: GNU/GPL
   6
   7 #------------------------------------
   8 # import des modules python
   9 #------------------------------------
  10 import re
  11 from subprocess import Popen, call, PIPE
  12 import _thread
  13 import os
  14 import ast
  15 import sys
  16 import csv
  17 import platform
  18 import traceback
  19 import codecs
  20 import locale
  21 import datetime
  22 from copy import copy
  23 from shutil import copyfile
  24 import shelve
  25 import json
  26 #from dialog import BugDialog
  27 import logging
  28 from operator import itemgetter
  29
  30 #------------------------------------
  31 # import des modules wx
  32 #------------------------------------
  33 import wx
  34 import wx.adv
  35
  36 #------------------------------------
  37 # import des fichiers du projet
  38 #------------------------------------
  39 from configparser import ConfigParser
  40
  41
  42 log = logging.getLogger('iramuteq')
  43
  44
  45 indices_simi = ['cooccurrence' ,'pourcentage de cooccurrence','Russel','Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  46
  47 def open_folder(folder):
  48     if sys.platform == "win32":
  49         os.startfile(folder)
  50     else:
  51         opener ="open" if sys.platform == "darwin" else "xdg-open"
  52         #call([opener, folder])
  53         call(["%s '%s' &" % (opener, folder)], shell=True)
  54
  55 def normpath_win32(path) :
  56     if not sys.platform == 'win32' :
  57         return path
  58     while '\\\\' in path :
  59         path = path.replace('\\\\', '\\')
  60     if path.startswith('\\') and not path.startswith('\\\\') :
  61         path = '\\' + path
  62     return path
  63
  64 class TGen :
  65     def __init__(self, path = None, encoding = 'utf8'):
  66         self.path = path
  67         self.tgen = {}
  68         self.encoding = encoding
  69
  70     def __getitem__(self, key):
  71         return self.tgen[key]
  72
  73     def read(self, path = None):
  74         if path is None :
  75             path = self.path
  76         with codecs.open(path, 'r', self.encoding) as f :
  77             tgen = f.read()
  78         tgen = [line.split('\t') for line in tgen.splitlines()]
  79         tgen = dict([[line[0], line[1:]] for line in tgen])
  80         self.tgen = tgen
  81         self.path = path
  82
  83     def write(self, path = None):
  84         if path is None :
  85             path = self.path
  86         with open(path, 'w', encoding='utf8') as f :
  87             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]))
  88
  89     def writetable(self, pathout, tgens, totocc):
  90         etoiles = list(totocc.keys())
  91         etoiles.sort()
  92         with open(pathout, 'w', encoding='utf8') as f :
  93             line = '\t'.join(['tgens'] + etoiles) + '\n'
  94             f.write(line)
  95             for t in tgens :
  96                 line = '\t'.join([t] + [repr(tgens[t][et]) for et in etoiles]) + '\n'
  97                 f.write(line)
  98             i = 0
  99             totname = 'total'
 100             while totname + repr(i) in tgens :
 101                 i += 1
 102             totname = totname + repr(i)
 103             line = '\t'.join([totname] + [repr(totocc[et]) for et in etoiles]) + '\n'
 104             f.write(line)
 105
 106 class History :
 107     def __init__(self, filein, syscoding = 'utf8') :
 108         self.filein = filein
 109         self.syscoding = syscoding
 110         self.corpus = {}
 111         self.openedcorpus = {}
 112         self.openedmatrix = {}
 113         self.orph = []
 114         self.analyses = {}
 115         self.history = []
 116         self.opened = {}
 117         self.read()
 118
 119     def read(self) :
 120         with open(self.filein, 'r') as fjson :
 121             d = json.load(fjson)
 122 #        d = shelve.open(self.filein, protocol=1)
 123         self.history = d.get('history', [])
 124         self.matrix = d.get('matrix', [])
 125         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 126         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 127         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 128         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 129         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 130 #        d.close()
 131
 132     def write(self) :
 133         d = {}
 134         d['history'] = self.history
 135         d['matrix'] = self.matrix
 136         with open(self.filein, 'w') as f :
 137             f.write(json.dumps(d, indent=4, default=str))
 138        #d = shelve.open(self.filein, protocol=1)
 139        #d.close()
 140
 141     def add(self, analyse) :
 142         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 143         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 144         if tosave['uuid'] in self.corpus :
 145             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 146             return
 147         if analyse.get('corpus', False) :
 148             if analyse['uuid'] in self.analyses :
 149                 return
 150             tosave['corpus'] = analyse['corpus']
 151             tosave['name'] = analyse['name']
 152             acorpus_uuid =  analyse['corpus']
 153             if acorpus_uuid in self.corpus :
 154                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 155                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 156                 else :
 157                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 158             else :
 159                 self.orph.append(tosave)
 160         else :
 161             tosave['corpus_name'] = analyse['corpus_name']
 162             #self.ordercorpus[tosave['uuid']] = len(history)
 163             #self.corpus[tosave['uuid']] = analyse
 164             self.history.append(tosave)
 165         self.write()
 166         self.read()
 167
 168     def addMatrix(self, analyse) :
 169         tosave = analyse
 170         #tosave['matrix_name'] = analyse['matrix_name']
 171         tosave['analyses'] = []
 172         self.matrix.append(tosave)
 173         self.write()
 174         self.read()
 175
 176     def addMatrixAnalyse(self, analyse) :
 177         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 178         tosave['name'] = analyse['name']
 179         if tosave['matrix'] in self.ordermatrix :
 180             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 181         self.write()
 182         self.read()
 183
 184     def addmultiple(self, analyses) :
 185         log.info('add multiple')
 186         for analyse in analyses :
 187             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 188             corpus = analyse['corpus']
 189             tosave['corpus'] = corpus
 190             tosave['name'] = analyse['name']
 191             if corpus in self.corpus :
 192                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 193                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 194                 else :
 195                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 196         self.write()
 197         self.read()
 198
 199     def delete(self, analyse, corpus = False) :
 200         log.info('delete %s' % analyse.get('name', 'noname'))
 201         if corpus :
 202             self.history.pop(self.ordercorpus[analyse['uuid']])
 203             if analyse['uuid'] in self.openedcorpus :
 204                 del self.openedcorpus[analyse['uuid']]
 205             log.info('delete corpus : %s' % analyse['uuid'])
 206         elif analyse['uuid'] in self.analyses :
 207             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 208             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 209         elif analyse['uuid'] in self.matrixanalyse :
 210             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 211         elif analyse.get('matrix', False) in self.matrixanalyse :
 212             analyses = self.matrix[self.ordermatrix[analyse['matrix']]]['analyses']
 213             topop = [i for i, val in enumerate(analyses) if analyse['uuid'] == val['uuid']][0]
 214             analyses.pop(topop)
 215             self.matrix[self.ordermatrix[analyse['matrix']]]['analyses'] = analyses
 216         self.write()
 217         self.read()
 218
 219     def addtab(self, analyse) :
 220         self.opened[analyse['uuid']] = analyse
 221
 222     def rmtab(self, analyse) :
 223         del self.opened[analyse['uuid']]
 224
 225     def update(self, analyse) :
 226         if 'matrix_name' in analyse :
 227             self.matrixanalyse[analyse['uuid']].update(analyse)
 228         elif 'corpus_name' in analyse :
 229             self.corpus[analyse['uuid']].update(analyse)
 230         elif 'corpus' in analyse :
 231             self.analyses[analyse['uuid']].update(analyse)
 232         else :
 233             toupdate = [an for an in self.matrixanalyse[analyse['matrix']]['analyses'] if an['uuid'] == analyse['uuid']]
 234             toupdate[0].update(analyse)
 235         self.write()
 236         self.read()
 237
 238     def clean(self) :
 239         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 240         print(corpustodel)
 241         for corpus in corpustodel :
 242             print('cleaning :', corpus['corpus_name'])
 243             self.delete(corpus, corpus = True)
 244         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 245         for analyse in anatodel :
 246             print('cleaning :', analyse['name'])
 247             self.delete(analyse)
 248
 249     def dostat(self):
 250         todel = {}
 251         tokens = 0
 252         corpusnb = {}
 253         subnb = 0
 254         analysenb = 0
 255         hours = 0
 256         minutes = 0
 257         secondes = 0
 258         ha = 0
 259         ma = 0
 260         sa = 0
 261         for corpus in self.history :
 262             analysenb += len(corpus.get('analyses', []))
 263             analyses = corpus.get('analyses', [])
 264             for analyse in analyses :
 265                 if os.path.exists(analyse['ira']) :
 266                     ana = DoConf(analyse['ira']).getoptions()
 267                     if 'time' in ana :
 268                         time = ana['time'].split()
 269                         ha += int(time[0].replace('h','')) * 3600
 270                         ma += int(time[1].replace('m','')) * 60
 271                         sa += int(time[2].replace('s',''))
 272             if os.path.exists(corpus['ira']) :
 273                 param = DoConf(corpus['ira']).getoptions()
 274                 time = param.get('time','0h 0m 0s')
 275                 time = time.split()
 276                 hours += int(time[0].replace('h','')) * 3600
 277                 minutes += int(time[1].replace('m','')) * 60
 278                 secondes += int(time[2].replace('s',''))
 279                 if param.get('originalpath', False) :
 280                     if param['originalpath'] in corpusnb :
 281                         corpusnb[param['originalpath']] += 1
 282                         tokens += int(param['occurrences'])
 283                     else :
 284                         corpusnb[param['originalpath']] = 1
 285                     #print param
 286                 else :
 287                     subnb += 1
 288             else :
 289                 if corpus['ira'] in todel :
 290                     todel['ira'] += 1
 291                 else :
 292                     todel['ira'] = 1
 293         print('Nbr total de corpus : %s' % len(self.history))
 294         corpus_nb = len(corpusnb) + len(todel)
 295         print('Nbr de corpus différents : %s' % corpus_nb)
 296         lentodel = len(todel)
 297         print('Nbr de corpus à supprimer : %s' % lentodel)
 298         print('Nbr de sous corpus : %s' % subnb)
 299         print("Nbr total d'occurrences : %s" % tokens)
 300         print('Moyenne occurrences par corpus : %f' % (tokens/corpus_nb))
 301         print('---------------------')
 302         print("Nbr total d'analyses : %s" % analysenb)
 303         print('Temps total indexation : %f h' % ((hours+minutes+secondes) / 3600))
 304         print('Temps total analyses :  %f h' % ((ha+ma+sa) / 3600))
 305
 306     def __str__(self) :
 307         return str(self.history)
 308
 309 class DoConf :
 310     def __init__(self, configfile=None, diff = None, parametres = None) :
 311         self.configfile = configfile
 312         self.conf = ConfigParser(interpolation=None) # pourquoi ce paramètre ???
 313
 314         if configfile is not None :
 315             configfile = normpath_win32(configfile)
 316             self.conf.read_file(codecs.open(configfile, 'r', 'utf8'))
 317         self.parametres = {}
 318         if parametres is not None :
 319             self.doparametres(parametres)
 320
 321     def doparametres(self, parametres) :
 322         return parametres
 323
 324     def getsections(self) :
 325         return self.conf.sections()
 326
 327     def getoptions(self, section = None, diff = None):
 328         parametres = {}
 329         if section is None :
 330             section = self.conf.sections()[0]
 331         for option in self.conf.options(section) :
 332             if self.conf.get(section, option).isdigit() :
 333                 parametres[option] = int(self.conf.get(section, option))
 334             elif self.conf.get(section, option) == 'False' :
 335                 parametres[option] = False
 336             elif self.conf.get(section, option) == 'True' :
 337                 parametres[option] = True
 338             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 339                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 340             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 341                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 342             else :
 343                 parametres[option] = self.conf.get(section, option)
 344         if 'type' not in parametres :
 345             parametres['type'] = section
 346         return parametres
 347
 348     def makeoptions(self, sections, parametres, outfile = None) :
 349         txt = ''
 350         for i, section in enumerate(sections) :
 351             txt += '[%s]\n' % section
 352             if not self.conf.has_section(section) :
 353                 self.conf.add_section(section)
 354             for option in parametres[i] :
 355                 if isinstance(parametres[i][option], int) :
 356                     self.conf.set(section, option, repr(parametres[i][option]))
 357                     txt += '%s = %i\n' % (option, parametres[i][option])
 358                 elif isinstance(parametres[i][option], str) :
 359                     self.conf.set(section, option, parametres[i][option])
 360                     txt += '%s = %s\n' % (option, parametres[i][option])
 361                 elif isinstance(parametres[i][option], wx.Colour) :
 362                     self.conf.set(section, option, str(parametres[i][option]))
 363                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 364                 elif option == 'analyses' :
 365                     pass
 366                 else :
 367                     self.conf.set(section, option, repr(parametres[i][option]))
 368                     txt += '%s = %s\n' % (option, repr(parametres[i][option]))
 369         if outfile is None :
 370             outfile = self.configfile
 371         outfile = normpath_win32(outfile)
 372         with open(outfile, 'w', encoding="utf-8") as f :
 373             f.write(txt)
 374             #self.conf.write(f)
 375
 376     def totext(self, parametres) :
 377         #txt = ['Corpus']
 378         txt = []
 379         for val in parametres :
 380             if isinstance(parametres[val], int) :
 381                 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
 382             elif isinstance(parametres[val], str) :
 383                 txt.append(' \t\t: '.join([val, parametres[val]]))
 384             elif val in ['listet', 'stars'] :
 385                 pass
 386             else :
 387                 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
 388         return '\n'.join(txt)
 389
 390
 391 def write_tab(tab, fileout) :
 392         csvWriter = csv.writer(open(fileout, 'w', newline='', encoding='utf8'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 393         csvWriter.writerows(tab)
 394
 395 class BugDialog(wx.Dialog):
 396     def __init__(self, *args, **kwds):
 397         # begin wxGlade: MyDialog.__init__
 398         kwds["style"] = wx.DEFAULT_DIALOG_STYLE | wx.STAY_ON_TOP
 399         kwds["size"] = wx.Size(500, 200)
 400         wx.Dialog.__init__(self, *args, **kwds)
 401         self.SetTitle(kwds['title'])
 402         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 403         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 404         self.button_1 = wx.Button(self, wx.ID_OK, "")
 405
 406         self.__set_properties()
 407         self.__do_layout()
 408         # end wxGlade
 409
 410     def __set_properties(self):
 411         # begin wxGlade: MyDialog.__set_properties
 412         self.SetMinSize(wx.Size(500, 200))
 413         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 414
 415         # end wxGlade
 416
 417     def __do_layout(self):
 418         # begin wxGlade: MyDialog.__do_layout
 419         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 420         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 421         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 422         self.SetSizer(sizer_1)
 423         sizer_1.Fit(self)
 424         self.Layout()
 425
 426
 427 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 428     AnalyseConf = ConfigParser()
 429     AnalyseConf.read(DictPathOut['ira'])
 430     AnalyseConf.add_section(section)
 431     date = datetime.datetime.now().ctime()
 432     AnalyseConf.set(section, 'date', str(date))
 433     AnalyseConf.set(section, 'clusternb', clusternb)
 434     AnalyseConf.set(section, 'corpus_name', corpname)
 435
 436     fileout = open(DictPathOut['ira'], 'w', encoding='utf8')
 437     AnalyseConf.write(fileout)
 438     fileout.close()
 439
 440 def multisort(liste2d, ordre, indices_tri):
 441
 442     """
 443     methode destinée à remplacer 'comp' qui a disparu en Python 3
 444         tri de tuples sur l'un des éléments du tuple
 445         en principe, elle doit renvoyer les éléments triés selon le principe d'avant
 446         tel que décrit dans la docstring de 'sortedby'
 447
 448         probablement à améliorer pour la rendre d'usage plus général
 449         en acceptant un nombre variable de parametres ???
 450     """
 451
 452     indices_triTuple = indices_tri.Tuple(int, ...)
 453     for key in reversed(indices_tri):
 454         liste2d.sort(key=attrgetter(key), reverse=ordre)
 455     return liste2d
 456
 457 def sortedby(liste2d, direct, *indices):
 458
 459     """
 460         sortedby: sort a list of lists (e.g. a table) by one or more indices
 461                   (columns of the table) and return the sorted list
 462
 463         e.g.
 464          for list = [[2,3],[1,2],[3,1]]:
 465          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 466          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 467
 468          elle n'est pas remplacée par la méthode 'multisort' ???
 469
 470     """
 471
 472 # iramuteq original
 473 #    nlist = map(lambda x, indices=indices:
 474 #                 map(lambda i, x=x: x[i], indices) + [x],
 475 #                 list)
 476
 477 # iramuteq passé à 2to3
 478 #    nlist = list(map(lambda x, indices=indices:
 479 #                 list(map(lambda i, x=x: x[i], indices)) + [x],
 480 #                 liste2d))
 481
 482     for key in reversed(indices):
 483         liste2d.sort(key=itemgetter(key), reverse=(direct==2))
 484     return liste2d
 485
 486
 487 #    if direct == 1:
 488 #        nlist.sort()
 489 #         sorted_list = multisort(liste2d, direct, *indices)
 490
 491 #    elif direct == 2:
 492 #        nlist.sort(reverse=True)
 493 #         sorted_list = multisort(liste2d, direct, *indices)
 494
 495 #    return [l[-1] for l in nlist]
 496 #    return sorted_list
 497
 498 def add_type(line, dictlem):
 499     if line[4] in dictlem:
 500         line.append(dictlem[line[4]])
 501     else :
 502         line.append('')
 503     return line
 504
 505 def treat_line_alceste(i, line) :
 506     if line[0] == '*' or line[0] == '*****' :
 507         return line + ['']
 508     if line[5] == 'NA':
 509         print('NA', line[5])
 510         pass
 511     elif float(line[5].replace(',', '.')) < 0.0001:
 512         line[5] = '< 0,0001'
 513     elif float(line[5].replace(',', '.')) > 0.05:
 514         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 515     else:
 516         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 517     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 518
 519 def ReadProfileAsDico(File, Alceste=False, encoding = 'utf8'):
 520     dictlem = {}
 521     print('lecture des profiles')
 522     FileReader = open(File, 'r', encoding='utf8')
 523     Filecontent = FileReader.readlines()
 524     FileReader.close()
 525     DictProfile = {}
 526     count = 0
 527     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 528     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 529     rows.pop(0)
 530     ClusterNb = rows[0][2]
 531     rows.pop(0)
 532     clusters = [row[2] for row in rows if row[0] == '**']
 533     valclusters = [row[1:4] for row in rows if row[0] == '****']
 534     lp = [i for i, line in enumerate(rows) if line[0] == '****']
 535     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 536     if Alceste :
 537         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 538         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 539     else :
 540         prof = [[line + [''] for line in pr] for pr in prof]
 541         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 542     for i, cluster in enumerate(clusters):
 543         DictProfile[cluster] = [valclusters[i]] + prof[i]
 544     return DictProfile
 545
 546 def GetTxtProfile(dictprofile, cluster_size) :
 547     proflist = []
 548     for classe in range(0, len(dictprofile)) :
 549         prof = dictprofile[str(classe + 1)]
 550         clinfo = cluster_size[classe]
 551         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 552     return '\n\n'.join(proflist)
 553
 554 def formatExceptionInfo(maxTBlevel=5):
 555     cla, exc, trbk = sys.exc_info()
 556     try :
 557         excName = cla.__name__
 558     except :
 559         excName = 'None'
 560     try:
 561         excArgs = exc.args[0]
 562     except :
 563         excArgs = "<no args>"
 564     excTb = traceback.format_tb(trbk, maxTBlevel)
 565     return (excName, excArgs, excTb)
 566
 567
 568 #fonction des etudiants de l'iut
 569 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 570     """
 571         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 572         Si on trouve un '$', c'est fini.
 573         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 574     """
 575     separateurs = [['.', 60.0], ['?', 60.0], ['!', 60.0], ['£$£', 60], [':', 50.0], [';', 40.0], [',', 10.0], [' ', 0.1]]
 576     trouve = False                 # si on a trouvé un bon séparateur
 577     iDecoupe = 0                # indice du caractere ou il faut decouper
 578
 579     # on découpe la chaine pour avoir au maximum 240 caractères
 580     longueur = min(longueur, len(chaine) - 1)
 581     chaineTravail = chaine[:longueur + 1]
 582     nbCar = longueur
 583     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 584
 585     # on vérifie si on ne trouve pas un '$'
 586     indice = chaineTravail.find('$')
 587     if indice > -1:
 588         trouve = True
 589         iDecoupe = indice
 590
 591     # si on ne trouve rien, on cherche le meilleur séparateur
 592     if not trouve:
 593         while nbCar >= 0:
 594             caractere = chaineTravail[nbCar]
 595             distance = abs(longueurOptimale - nbCar) + 1
 596             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 597
 598             # on vérifie si le caractére courant est une marque de ponctuation
 599             for s in separateurs:
 600                 if caractere == s[0]:
 601                     # si c'est une ponctuation
 602
 603                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 604                         # print nbCar, s[0]
 605                         meilleur[0] = s[0]
 606                         meilleur[1] = s[1]
 607                         meilleur[2] = nbCar
 608                         trouve = True
 609                         iDecoupe = nbCar
 610
 611                     # et on termine la recherche
 612                     break
 613
 614             # on passe au caractère précédant
 615             nbCar = nbCar - 1
 616
 617     # si on a trouvé
 618     if trouve:
 619         fin = chaine[iDecoupe + 1:]
 620         retour = chaineTravail[:iDecoupe]
 621         return len(retour) > 0, retour.split(), fin
 622     # si on a rien trouvé
 623     return False, chaine.split(), ''
 624
 625
 626 exceptions = {'paragrapheOT' : "Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 627               'EmptyText' : "Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 628               'CorpusEncoding' : "Problème d'encodage.",
 629               'TextBeforeTextMark' : "Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 630               'MissingAnalyse' : 'Aucun fichier à cet emplacement :\n',
 631 }
 632
 633 def BugReport(parent, error = None):
 634     for ch in parent.GetChildren():
 635         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 636             ch.Destroy()
 637     excName, exc, excTb = formatExceptionInfo()
 638     if excName == 'Exception' :
 639         print(exc)
 640         if len(exc.split()) == 2 :
 641             mss, linenb = exc.split()
 642             if mss in exceptions :
 643                 txt = exceptions[mss] + linenb
 644             else :
 645                 txt = exc
 646         else :
 647             if exc in exceptions :
 648                 txt = exceptions[exc]
 649             else :
 650                 txt = exc
 651         title = "Information"
 652     else :
 653         txt = '\n            !== BUG ==!       \n'
 654         txt += '*************************************\n'
 655         txt += '\n'.join(excTb).replace('    ', ' ')
 656         txt += excName + '\n'
 657         txt += repr(exc)
 658         title = "Bug"
 659
 660     dial = BugDialog(parent, **{'title' : title})
 661     if 'Rerror' in dir(parent) :
 662         txt += parent.Rerror
 663         parent.Rerror = ''
 664     log.info(txt)
 665     dial.text_ctrl_1.write(txt)
 666     dial.CenterOnParent()
 667     dial.ShowModal()
 668     dial.Destroy()
 669
 670 def PlaySound(parent):
 671     if parent.pref.getboolean('iramuteq', 'sound') :
 672         try:
 673             if "gtk2" in wx.PlatformInfo:
 674                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 675             else :
 676                 sound = wx.adv.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 677                 sound.Play(wx.adv.SOUND_SYNC)
 678         except :
 679             print('pas de son')
 680
 681 def ReadDicoAsDico(dicopath):
 682     with open(dicopath, 'r', encoding='UTF8') as f:
 683         content = f.readlines()
 684     lines = [line.rstrip('\n\r').replace('\n', '').replace('"', '').split('\t') for line in content if line != '']
 685     return dict([[line[0], line[1:]] for line in lines])
 686
 687 def ReadLexique(parent, lang = 'french', filein = None):
 688     if lang != 'other' :
 689         if filein is None :
 690             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 691         else :
 692             parent.lexique = ReadDicoAsDico(filein)
 693     else :
 694         if filein is None :
 695             parent.lexique = {}
 696         else :
 697             parent.lexique = ReadDicoAsDico(filein)
 698
 699 def ReadList(filein, encoding = 'utf8', sep = ';'):
 700     #file = open(filein)
 701     with open(filein, 'r', encoding='utf8') as f :
 702         content = f.read()
 703     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 704     #file = codecs.open(filein, 'r', encoding)
 705     #content = file.readlines()
 706     #file.close()
 707     first = content.pop(0)
 708     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 709     dict = {}
 710     i = 0
 711     for line in content:
 712         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 713         #line = line.split(';')
 714         nline = [line[0]]
 715         for val in line[1:]:
 716             if val == 'NA' :
 717                 don = ''
 718             else:
 719                 try:
 720                     don = int(val)
 721                 except:
 722                     don = float('%.5f' % float(val))
 723             nline.append(don)
 724         dict[i] = nline
 725         i += 1
 726     return dict, first
 727
 728 def exec_RCMD(rpath, command) :
 729     log.info('R CMD INSTALL %s' % command)
 730     rpath = rpath.replace('\\','\\\\')
 731     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 732     return error
 733
 734 def exec_rcode(rpath, rcode, wait = True, graph = False):
 735     log.info("R Script : %s" % rcode)
 736     needX11 = False
 737     if sys.platform == 'darwin' :
 738         try :
 739             macversion = platform.mac_ver()[0].split('.')
 740             if int(macversion[1]) < 5 :
 741                 needX11 = True
 742             else :
 743                 needX11 = False
 744         except :
 745             needX11 = False
 746     rpath = rpath.replace('\\','\\\\')
 747     env = os.environ.copy()
 748     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 749         env['LC_ALL'] = 'en_US.UTF-8'
 750     if not graph :
 751         if wait :
 752             if sys.platform == 'win32':
 753                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 754             else :
 755                 error = call([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], env = env)
 756             return error
 757         else :
 758             if sys.platform == 'win32':
 759                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 760             else :
 761                 pid = Popen([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8') #PIPE ou STDOUT ?
 762             return pid
 763     else :
 764         if wait :
 765             if sys.platform == 'win32':
 766                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 767             elif sys.platform == 'darwin' and needX11:
 768                 os.environ['DISPLAY'] = ':0.0'
 769                 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
 770             else :
 771                 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
 772             return error
 773         else :
 774             if sys.platform == 'win32':
 775                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 776             elif sys.platform == 'darwin' and needX11:
 777                 os.environ['DISPLAY'] = ':0.0'
 778                 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
 779             else :
 780                 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
 781             return pid
 782
 783 def check_Rresult(parent, pid) :
 784     if isinstance(pid, Popen) :
 785         if pid.returncode != 0 :
 786             error = pid.communicate()
 787             error = [str(error[0]), error[1]]
 788             if error[1] is None :
 789                 error[1] = 'None'
 790             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 791             try :
 792                 raise Exception('\n'.join(['Erreur R', '\n'.join(error[1:])]))
 793             except :
 794                 BugReport(parent)
 795             return False
 796         else :
 797             return True
 798     else :
 799         if pid != 0 :
 800             try :
 801                 raise Exception('Erreur R')
 802             except :
 803                 BugReport(parent)
 804             return False
 805         else :
 806             return True
 807
 808
 809 def launchcommand(mycommand):
 810     Popen(mycommand)
 811
 812 def print_liste(filename,liste):
 813     with open(filename,'w', encoding='utf8') as f :
 814         for graph in liste :
 815             f.write(';'.join(graph) +'\n')
 816
 817 def read_list_file(filename, encoding = 'utf8'):
 818     with open(filename,'r', encoding='utf8') as f:
 819         content=f.readlines()
 820         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 821     return ncontent
 822
 823 def progressbar(self, maxi):
 824     ira = wx.GetApp().GetTopWindow()
 825     parent = ira
 826     try:
 827         maxi = int(maxi)
 828     except:
 829         maxi = 1
 830     prog = wx.ProgressDialog("Traitements",
 831                              "Veuillez patienter...",
 832                              maximum=maxi,
 833                              parent=parent,
 834                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 835                              )
 836                              # parent ???
 837     # le ABORT n'est pas géré à tous les coups ???
 838     prog.SetSize((400,150))
 839     #prog.SetIcon(ira._icon)
 840     return prog
 841
 842 def treat_var_mod(variables) :
 843     var_mod = {}
 844     variables = list(set(variables))
 845     varmod = [variable.split('_') for variable in variables]
 846     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 847     for var in vars :
 848         mods = ['_'.join(v) for v in varmod if v[0] == var]
 849         var_mod[var] = mods
 850
 851 #     for variable in variables :
 852 #         if '_' in variable :
 853 #             forme = variable.split('_')
 854 #             var = forme[0]
 855 #             mod = forme[1]
 856 #             if not var in var_mod :
 857 #                 var_mod[var] = [variable]
 858 #             else :
 859 #                 if not mod in var_mod[var] :
 860 #                     var_mod[var].append(variable)
 861     return var_mod
 862
 863 def doconcorde(corpus, uces, mots, uci = False, fontsize=16) :
 864     if not uci :
 865         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 866     else :
 867         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 868     ucestxt1 = dict(ucestxt1)
 869     ucestxt = []
 870     ucis_txt = []
 871     listmot = [corpus.getlems()[lem].formes for lem in mots]
 872     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 873     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 874     dmots = dict(list(zip(listmot, mothtml)))
 875     presfont = '<p><b><font size="%i">' % fontsize
 876     font = '<font size="%i">' % fontsize
 877     for uce in uces :
 878         ucetxt = ucestxt1[uce].split()
 879         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 880         if not uci :
 881             uciid = corpus.getucefromid(uce).uci
 882             ucis_txt.append(presfont + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '<a href="%i_%i"> *%i_%i</a></font></b></p>' % (uciid, uce, uciid, uce))
 883         else :
 884             ucis_txt.append(presfont + ' '.join(corpus.ucis[uce].etoiles) + '</font></b></p>')
 885         ucestxt.append(font + ucetxt + '</font>')
 886     return ucis_txt, ucestxt
 887
 888
 889 def getallstcarac(corpus, analyse) :
 890    pathout = PathOut(analyse['ira'])
 891    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, 'utf8')
 892    print(profils)
 893
 894 def read_chd(filein, fileout):
 895     with open(filein, 'r') as f :
 896         content = f.read()
 897     #content = [line[3:].replace('"',"").replace(' ','') for line in content.splitlines()]
 898     content = [line.split('\t') for line in content.splitlines()]
 899     chd = {'name':1, 'children':[]}
 900     mere={}
 901     for i, line in enumerate(content) :
 902         if i == 0 :
 903             chd['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 904             mere[line[1]] = chd['children'][0]
 905             mere[line[2]] = chd['children'][1]
 906         elif not i % 2 :
 907             if 'children' in mere[line[0]]:
 908                 mere[line[0]]['children'].append({'name': line[1],'size' : content[i+1][0]})
 909                 mere[line[1]] = mere[line[0]]['children'][-1]
 910                 mere[line[0]]['children'].append({'name': line[2],'size' : content[i+1][1]})
 911                 mere[line[2]] = mere[line[0]]['children'][-1]
 912             else :
 913                 mere[line[0]]['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 914                 mere[line[1]] = mere[line[0]]['children'][-2]
 915                 mere[line[2]] = mere[line[0]]['children'][-1]
 916     with open(fileout, 'w') as f :
 917         f.write(json.dumps(chd))
 918
 919
 920 translation_languages = {"Afrikaans":"af", "Albanian":"sq", "Amharic":"am", "Arabic":"ar", "Armenian":"hy", "Azeerbaijani":"az", "Basque":"eu", "Belarusian":"be", "Bengali":"bn", "Bosnian":"bs", "Bulgarian":"bg", "Catalan":"ca", "Cebuano":"ceb", "Chichewa":"ny", "Chinese (Simplified)":"zh-CN", "Chinese (Traditional)":"zh-TW", "Corsican":"co", "Croatian":"hr", "Czech":"cs", "Danish":"da", "Dutch":"nl", "English":"en", "Esperanto":"eo", "Estonian":"et", "Filipino":"tl", "Finnish":"fi", "French":"fr", "Frisian":"fy", "Galician":"gl", "Georgian":"ka", "German":"de", "Greek":"el", "Gujarati":"gu", "Haitian Creole":"ht", "Hausa":"ha", "Hawaiian":"haw", "Hebrew":"iw", "Hindi":"hi", "Hmong":"hmn ", "Hungarian":"hu", "Icelandic":"is", "Igbo":"ig", "Indonesian":"id", "Irish":"ga", "Italian":"it", "Japanese":"ja", "Javanese":"jw", "Kannada":"kn", "Kazakh":"kk", "Khmer":"km", "Korean":"ko", "Kurdish":"ku", "Kyrgyz":"ky", "Lao":"lo", "Latin":"la", "Latvian":"lv", "Lithuanian":"lt", "Luxembourgish":"lb", "Macedonian":"mk", "Malagasy":"mg", "Malay":"ms", "Malayalam":"ml", "Maltese":"mt", "Maori":"mi", "Marathi":"mr", "Mongolian":"mn", "Burmese":"my", "Nepali":"ne", "Norwegian":"no", "Pashto":"ps", "Persian":"fa", "Polish":"pl", "Portuguese":"pt", "Punjabi":"ma", "Romanian":"ro", "Russian":"ru", "Samoan":"sm", "Scots Gaelic":"gd", "Serbian":"sr", "Sesotho":"st", "Shona":"sn", "Sindhi":"sd", "Sinhala":"si", "Slovak":"sk", "Slovenian":"sl", "Somali":"so", "Spanish":"es", "Sundanese":"su", "Swahili":"sw", "Swedish":"sv", "Tajik":"tg", "Tamil":"ta", "Telugu":"te", "Thai":"th", "Turkish":"tr", "Ukrainian":"uk", "Urdu":"ur", "Uzbek":"uz", "Vietnamese":"vi", "Welsh":"cy", "Xhosa":"xh", "Yiddish":"yi", "Yoruba":"yo", "Zulu":"zu", }
 921
 922
 923 def gettranslation(words, lf, lt) :
 924     import urllib.request, urllib.error, urllib.parse
 925     import json
 926     agent = {'User-Agent':
 927     "Mozilla/4.0 (\
 928     compatible;\
 929     MSIE 6.0;\
 930     Windows NT 5.1;\
 931     SV1;\
 932     .NET CLR 1.1.4322;\
 933     .NET CLR 2.0.50727;\
 934     .NET CLR 3.0.04506.30\
 935     )"}
 936     base_link = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=%s&tl=%s&dt=t&q=%s"
 937     print(len(words))
 938     totrans = urllib.parse.quote('\n'.join(words))
 939     link = base_link % (lf, lt, totrans)
 940     request = urllib.request.Request(link, headers=agent)
 941     raw_data = urllib.request.urlopen(request).read()
 942     data = json.loads(raw_data)
 943     return [line[0].replace("'", '_').replace(' | ', '|').replace(' ', '_').replace('-','_').replace('\n','') for line in data[0]]
 944
 945 def makenprof(prof, trans, deb=0) :
 946     nprof=[]
 947     if deb == 0 :
 948         nprof.append(prof[0])
 949     for i, val in enumerate(trans) :
 950         line = prof[deb+i+1][:]
 951         line[6] = val
 952         nprof.append(line)
 953     return nprof
 954
 955 def treatempty(val) :
 956     if val.strip() == '' :
 957         return '_'
 958     else :
 959         return val
 960
 961 def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) :
 962     nprof = {}
 963     lems = {}
 964     for i in range(len(dictprofile)) :
 965         prof = dictprofile[repr(i+1)]
 966         try :
 967             lenact = prof.index(['*****', '*', '*', '*', '*', '*', '', ''])
 968             lensup = -1
 969         except ValueError:
 970             try :
 971                 lenact = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
 972                 lensup = 0
 973             except ValueError:
 974                 lenact = len(prof)
 975                 lensup = 0
 976         try :
 977             lensup += prof.index(['*', '*', '*', '*', '*', '*', '', ''])
 978             lensup = lensup - lenact
 979         except ValueError:
 980             lensup += len(prof) - lenact
 981         if lenact != 0 :
 982             if lenact > maxword :
 983                 nlenact = maxword
 984             else :
 985                 nlenact = lenact
 986             actori = [line[6] for line in prof[1:nlenact]]
 987             act = [val.replace('_', ' ') for val in actori]
 988             act = gettranslation(act, lf, lt)
 989             for j, val in enumerate(actori) :
 990                 if act[j] not in lems :
 991                     lems[act[j]] = val
 992                 else :
 993                     while act[j] in lems :
 994                         act[j] = act[j] + "+"
 995                     lems[act[j]] = val
 996             nprof[repr(i+1)] = makenprof(prof, act)
 997
 998         if lensup != 0 :
 999             if lensup > maxword :
1000                 nlensup = maxword
1001             else :
1002                 nlensup = lensup
1003             supori = [line[6] for line in prof[(1+lenact):(lenact+nlensup)]]
1004             sup = [val.replace('_', ' ') for val in supori]
1005             sup = [treatempty(val) for val in sup]
1006             sup = gettranslation(sup, lf, lt)
1007             for j, val in enumerate(supori) :
1008                 if sup[j] not in lems :
1009                     lems[sup[j]] = val
1010                 else :
1011                     while sup[j] in lems :
1012                         sup[j] = sup[j] + "+"
1013                     lems[sup[j]] = val
1014             nprof[repr(i+1)].append(['*****', '*', '*', '*', '*', '*', '', ''])
1015             nprof[repr(i+1)] += makenprof(prof, sup, deb=lenact)
1016
1017         try :
1018             lenet = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
1019             nprof[repr(i+1)].append(['*', '*', '*', '*', '*', '*', '', ''])
1020             nprof[repr(i+1)] += prof[(lenet+1):]
1021         except :
1022             pass
1023     return nprof, lems
1024
1025 def write_translation_profile(prof, lems, language, dictpathout) :
1026     if os.path.exists(dictpathout['translations.txt']) :
1027         with open(dictpathout['translations.txt'], 'r', encoding='utf8') as f :
1028             translist = f.read()
1029         translist = [line.split('\t') for line in translist.splitlines()]
1030     else :
1031         translist = []
1032     toprint = []
1033     toprint.append(['','','','','',''])
1034     toprint.append(['***', 'nb classes', repr(len(prof)), '***', '', ''])
1035     for i in range(len(prof)) :
1036         toprint.append(['**', 'classe', repr(i+1), '**', '', ''])
1037         toprint.append(['****'] + prof[repr(i+1)][0] + ['****'])
1038         rest = [[repr(line[1]), repr(line[2]), repr(line[3]), repr(line[4]), line[6], line[7].replace('< 0,0001', '0.00009').replace('NS (','').replace(')','')] for line in prof[repr(i+1)][1:]]
1039         for i, line in enumerate(prof[repr(i+1)][1:]) :
1040             if line[0] == '*' :
1041                 rest[i] = ['*', '*', '*', '*', '*', '*']
1042             elif line[0] == '*****' :
1043                 rest[i] = ['*****','*','*', '*', '*', '*']
1044         toprint += rest
1045     with open(dictpathout['translation_profile_%s.csv' % language], 'w', encoding='utf8') as f :
1046         f.write('\n'.join([';'.join(line) for line in toprint]))
1047     with open(dictpathout['translation_words_%s.csv' % language], 'w', encoding='utf8') as f :
1048         f.write('\n'.join(['\t'.join([val, lems[val]]) for val in lems]))
1049     if 'translation_profile_%s.csv' % language not in [val[0] for val in translist] :
1050         translist.append(['translation_profile_%s.csv' % language, 'translation_words_%s.csv' % language])
1051         with open(dictpathout['translations.txt'], 'w', encoding='utf8') as f :
1052             f.write('\n'.join(['\t'.join(line) for line in translist]))
1053
1054 def makesentidict(infile, language) :
1055     with codecs.open(infile,'r', 'utf8') as f :
1056         content = f.read()
1057     content = [line.split('\t') for line in content.splitlines()]
1058     titles = content.pop(0)
1059     senti = ['Positive', 'Negative', 'Anger', 'Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust']
1060     sentid = {}
1061     for sent in senti :
1062         sentid[sent] = titles.index(sent)
1063     frtitle = [val for val in titles if '(fr)' in val]
1064     frid = titles.index(frtitle[0])
1065     sentidict = [[line[frid].lower(), [line[sentid[sent]] for sent in senti]] for line in content]
1066     pos = ['positive'] + [line[0] for line in sentidict if line[1][0] == '1']
1067     neg = ['negative'] + [line[0] for line in sentidict if line[1][1] == '1']
1068     anger = ['anger'] + [line[0] for line in sentidict if line[1][2] == '1']
1069     anticipation = ['anticipation'] + [line[0] for line in sentidict if line[1][3] == '1']
1070     disgust = ['disgust'] + [line[0] for line in sentidict if line[1][4] == '1']
1071     fear = ['fear'] + [line[0] for line in sentidict if line[1][5] == '1']
1072     joy = ['joy'] + [line[0] for line in sentidict if line[1][6] == '1']
1073     sadness = ['sadness'] + [line[0] for line in sentidict if line[1][7] == '1']
1074     surprise = ['surprise'] + [line[0] for line in sentidict if line[1][8] == '1']
1075     trust = ['trust'] + [line[0] for line in sentidict if line[1][9] == '1']
1076     with open('/tmp/tgenemo.csv', 'w') as f :
1077         for val in [pos, neg, anger, anticipation, disgust, fear, joy, sadness, surprise, trust] :
1078             f.write('\t'.join(val) + '\n')
1079
1080 def countsentfromprof(prof, encoding, sentidict) :
1081     with codecs.open(prof, 'r', encoding) as f :
1082         content = f.read()
1083     content = [line.split(';') for line in content.splitlines()]
1084     print(content)
1085     content = [[line[0], [int(val) for val in line[1:]]] for line in content]
1086     print(content)
1087     content = dict(content)
1088     print(content)
1089
1090 def iratolexico(infile, outfile, encoding) :
1091     with codecs.open(infile, 'r', encoding) as f :
1092         for line in f :
1093             if line.startswith('**** ') :
1094                 line = line.split()
1095