iramuteq.org Git - iramuteq/blob - functions.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2008-2012 Pierre Ratinaud
   5 #License: GNU/GPL
   6
   7 import wx
   8 import re
   9 from ConfigParser import ConfigParser
  10 from subprocess import Popen, call, PIPE
  11 import thread
  12 import os
  13 import ast
  14 import sys
  15 import csv
  16 import platform
  17 import traceback
  18 import codecs
  19 import locale
  20 import datetime
  21 from copy import copy
  22 from shutil import copyfile
  23 import shelve
  24 import json
  25 #from dialog import BugDialog
  26 import logging
  27
  28 log = logging.getLogger('iramuteq')
  29
  30
  31 indices_simi = [u'cooccurrence' ,'pourcentage de cooccurrence',u'Russel',u'Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  32
  33
  34
  35 def open_folder(folder):
  36     if sys.platform == "win32":
  37         os.startfile(folder)
  38     else:
  39         opener ="open" if sys.platform == "darwin" else "xdg-open"
  40         call([opener, folder])
  41
  42 def normpath_win32(path) :
  43     if not sys.platform == 'win32' :
  44         return path
  45     while '\\\\' in path :
  46         path = path.replace('\\\\', '\\')
  47     if path.startswith('\\') and not path.startswith('\\\\') :
  48         path = '\\' + path
  49     return path
  50
  51 class TGen :
  52     def __init__(self, path = None, encoding = 'utf8'):
  53         self.path = path
  54         self.tgen = {}
  55         self.encoding = encoding
  56
  57     def __getitem__(self, key):
  58         return self.tgen[key]
  59
  60     def read(self, path = None):
  61         if path is None :
  62             path = self.path
  63         with codecs.open(path, 'r', self.encoding) as f :
  64             tgen = f.read()
  65         tgen = [line.split('\t') for line in tgen.splitlines()]
  66         tgen = dict([[line[0], line[1:]] for line in tgen])
  67         self.tgen = tgen
  68         self.path = path
  69
  70     def write(self, path = None):
  71         if path is None :
  72             path = self.path
  73         with open(path, 'w') as f :
  74             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]).encode(self.encoding))
  75
  76     def writetable(self, pathout, tgens, totocc):
  77         etoiles = totocc.keys()
  78         etoiles.sort()
  79         with open(pathout, 'w') as f :
  80             line = '\t'.join([u'tgens'] + etoiles) + '\n'
  81             f.write(line.encode(self.encoding))
  82             for t in tgens :
  83                 line = '\t'.join([t] + [`tgens[t][et]` for et in etoiles]) + '\n'
  84                 f.write(line.encode(self.encoding))
  85             i = 0
  86             totname = 'total'
  87             while totname + `i` in tgens :
  88                 i += 1
  89             totname = totname + `i`
  90             line = '\t'.join([totname] + [`totocc[et]` for et in etoiles]) + '\n'
  91             f.write(line.encode(self.encoding))
  92
  93 class History :
  94     def __init__(self, filein, syscoding = 'utf8') :
  95         self.filein = filein
  96         self.syscoding = syscoding
  97         self.corpus = {}
  98         self.openedcorpus = {}
  99         self.openedmatrix = {}
 100         self.orph = []
 101         self.analyses = {}
 102         self.history = []
 103         self.opened = {}
 104         self.read()
 105
 106     def read(self) :
 107         d = shelve.open(self.filein)
 108         self.history = d.get('history', [])
 109         self.matrix = d.get('matrix', [])
 110         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 111         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 112         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 113         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 114         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 115         d.close()
 116
 117     def write(self) :
 118         d = shelve.open(self.filein)
 119         d['history'] = self.history
 120         d['matrix'] = self.matrix
 121         d.close()
 122
 123     def add(self, analyse) :
 124         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 125         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 126         if tosave['uuid'] in self.corpus :
 127             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 128             return
 129         if analyse.get('corpus', False) :
 130             if analyse['uuid'] in self.analyses :
 131                 return
 132             tosave['corpus'] = analyse['corpus']
 133             tosave['name'] = analyse['name']
 134             acorpus_uuid =  analyse['corpus']
 135             if acorpus_uuid in self.corpus :
 136                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 137                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 138                 else :
 139                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 140             else :
 141                 self.orph.append(tosave)
 142         else :
 143             tosave['corpus_name'] = analyse['corpus_name']
 144             #self.ordercorpus[tosave['uuid']] = len(history)
 145             #self.corpus[tosave['uuid']] = analyse
 146             self.history.append(tosave)
 147         self.write()
 148         self.read()
 149
 150     def addMatrix(self, analyse) :
 151         tosave = analyse
 152         #tosave['matrix_name'] = analyse['matrix_name']
 153         tosave['analyses'] = []
 154         self.matrix.append(tosave)
 155         self.write()
 156         self.read()
 157
 158     def addMatrixAnalyse(self, analyse) :
 159         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 160         tosave['name'] = analyse['name']
 161         if tosave['matrix'] in self.ordermatrix :
 162             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 163         self.write()
 164         self.read()
 165
 166     def addmultiple(self, analyses) :
 167         log.info('add multiple')
 168         for analyse in analyses :
 169             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 170             corpus = analyse['corpus']
 171             tosave['corpus'] = corpus
 172             tosave['name'] = analyse['name']
 173             if corpus in self.corpus :
 174                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 175                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 176                 else :
 177                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 178         self.write()
 179         self.read()
 180
 181     def delete(self, analyse, corpus = False) :
 182         log.info('delete %s' % analyse.get('name', 'noname'))
 183         if corpus :
 184             self.history.pop(self.ordercorpus[analyse['uuid']])
 185             if analyse['uuid'] in self.openedcorpus :
 186                 del self.openedcorpus[analyse['uuid']]
 187             log.info('delete corpus : %s' % analyse['uuid'])
 188         elif analyse['uuid'] in self.analyses :
 189             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 190             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 191         elif analyse['uuid'] in self.matrixanalyse :
 192             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 193         self.write()
 194         self.read()
 195
 196     def addtab(self, analyse) :
 197         self.opened[analyse['uuid']] = analyse
 198
 199     def rmtab(self, analyse) :
 200         del self.opened[analyse['uuid']]
 201
 202     def update(self, analyse) :
 203         if 'matrix_name' in analyse :
 204             self.matrixanalyse[analyse['uuid']].update(analyse)
 205         elif 'corpus_name' in analyse :
 206             self.corpus[analyse['uuid']].update(analyse)
 207         elif 'corpus' in analyse :
 208             self.analyses[analyse['uuid']].update(analyse)
 209         else :
 210             toupdate = [an for an in self.matrixanalyse[analyse['matrix']]['analyses'] if an['uuid'] == analyse['uuid']]
 211             toupdate[0].update(analyse)
 212         self.write()
 213         self.read()
 214
 215     def clean(self) :
 216         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 217         print corpustodel
 218         for corpus in corpustodel :
 219             print 'cleaning :', corpus['corpus_name']
 220             self.delete(corpus, corpus = True)
 221         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 222         for analyse in anatodel :
 223             print 'cleaning :', analyse['name']
 224             self.delete(analyse)
 225
 226     def __str__(self) :
 227         return str(self.history)
 228
 229 class DoConf :
 230     def __init__(self, configfile=None, diff = None, parametres = None) :
 231         self.configfile = configfile
 232         self.conf = ConfigParser()
 233
 234         if configfile is not None :
 235             configfile = normpath_win32(configfile)
 236             self.conf.readfp(codecs.open(configfile, 'r', 'utf8'))
 237         self.parametres = {}
 238         if parametres is not None :
 239             self.doparametres(parametres)
 240
 241     def doparametres(self, parametres) :
 242         return parametres
 243
 244     def getsections(self) :
 245         return self.conf.sections()
 246
 247     def getoptions(self, section = None, diff = None):
 248         parametres = {}
 249         if section is None :
 250             section = self.conf.sections()[0]
 251         for option in self.conf.options(section) :
 252             if self.conf.get(section, option).isdigit() :
 253                 parametres[option] = int(self.conf.get(section, option))
 254             elif self.conf.get(section, option) == 'False' :
 255                 parametres[option] = False
 256             elif self.conf.get(section, option) == 'True' :
 257                 parametres[option] = True
 258             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 259                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 260             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 261                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 262             else :
 263                 parametres[option] = self.conf.get(section, option)
 264         if 'type' not in parametres :
 265             parametres['type'] = section
 266         return parametres
 267
 268     def makeoptions(self, sections, parametres, outfile = None) :
 269         txt = ''
 270         for i, section in enumerate(sections) :
 271             txt += '[%s]\n' % section
 272             if not self.conf.has_section(section) :
 273                 self.conf.add_section(section)
 274             for option in parametres[i] :
 275                 if isinstance(parametres[i][option], int) :
 276                     self.conf.set(section, option, `parametres[i][option]`)
 277                     txt += '%s = %i\n' % (option, parametres[i][option])
 278                 elif isinstance(parametres[i][option], basestring) :
 279                     self.conf.set(section, option, parametres[i][option].encode('utf8'))
 280                     txt += '%s = %s\n' % (option, parametres[i][option])
 281                 elif isinstance(parametres[i][option], wx.Colour) :
 282                     self.conf.set(section, option, str(parametres[i][option]))
 283                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 284                 elif option == 'analyses' :
 285                     pass
 286                 else :
 287                     self.conf.set(section, option, `parametres[i][option]`)
 288                     txt += '%s = %s\n' % (option, `parametres[i][option]`)
 289         if outfile is None :
 290             outfile = self.configfile
 291         outfile = normpath_win32(outfile)
 292         with open(outfile, 'w') as f :
 293             f.write(txt.encode('utf8'))
 294             #self.conf.write(f)
 295
 296     def totext(self, parametres) :
 297         #txt = ['Corpus']
 298         txt = []
 299         for val in parametres :
 300             if isinstance(parametres[val], int) :
 301                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 302             elif isinstance(parametres[val], basestring) :
 303                 txt.append(' \t\t: '.join([val, parametres[val]]))
 304             elif val in ['listet', 'stars'] :
 305                 pass
 306             else :
 307                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 308         return '\n'.join(txt)
 309
 310
 311 def write_tab(tab, fileout) :
 312         writer = csv.writer(open(fileout, 'wb'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 313         writer.writerows(tab)
 314
 315 class BugDialog(wx.Dialog):
 316     def __init__(self, *args, **kwds):
 317         # begin wxGlade: MyDialog.__init__
 318         kwds["style"] = wx.DEFAULT_DIALOG_STYLE | wx.STAY_ON_TOP
 319         kwds["size"] = wx.Size(500, 200)
 320         wx.Dialog.__init__(self, *args, **kwds)
 321         self.SetTitle(kwds['title'])
 322         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 323         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 324         self.button_1 = wx.Button(self, wx.ID_OK, "")
 325
 326         self.__set_properties()
 327         self.__do_layout()
 328         # end wxGlade
 329
 330     def __set_properties(self):
 331         # begin wxGlade: MyDialog.__set_properties
 332         self.SetMinSize(wx.Size(500, 200))
 333         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 334
 335         # end wxGlade
 336
 337     def __do_layout(self):
 338         # begin wxGlade: MyDialog.__do_layout
 339         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 340         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 341         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 342         self.SetSizer(sizer_1)
 343         sizer_1.Fit(self)
 344         self.Layout()
 345
 346
 347 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 348     AnalyseConf = ConfigParser()
 349     AnalyseConf.read(DictPathOut['ira'])
 350     AnalyseConf.add_section(section)
 351     date = datetime.datetime.now().ctime()
 352     AnalyseConf.set(section, 'date', str(date))
 353     AnalyseConf.set(section, 'clusternb', clusternb)
 354     AnalyseConf.set(section, 'corpus_name', corpname)
 355
 356     fileout = open(DictPathOut['ira'], 'w')
 357     AnalyseConf.write(fileout)
 358     fileout.close()
 359
 360 def sortedby(list, direct, *indices):
 361
 362     """
 363         sortedby: sort a list of lists (e.g. a table) by one or more indices
 364                   (columns of the table) and return the sorted list
 365
 366         e.g.
 367          for list = [[2,3],[1,2],[3,1]]:
 368          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 369          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 370     """
 371
 372     nlist = map(lambda x, indices=indices:
 373                  map(lambda i, x=x: x[i], indices) + [x],
 374                  list)
 375     if direct == 1:
 376         nlist.sort()
 377     elif direct == 2:
 378         nlist.sort(reverse=True)
 379     return map(lambda l: l[-1], nlist)
 380
 381 def add_type(line, dictlem):
 382     if line[4] in dictlem:
 383         line.append(dictlem[line[4]])
 384     else :
 385         line.append('')
 386     return line
 387
 388 def treat_line_alceste(i, line) :
 389     if line[0] == '*' or line[0] == '*****' :
 390         return line + ['']
 391     if line[5] == 'NA':
 392         print 'NA', line[5]
 393         pass
 394     elif float(line[5].replace(',', '.')) < 0.0001:
 395         line[5] = '< 0,0001'
 396     elif float(line[5].replace(',', '.')) > 0.05:
 397         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 398     else:
 399         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 400     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 401
 402 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 403     dictlem = {}
 404     print 'lecture des profiles'
 405     FileReader = codecs.open(File, 'r', encoding)
 406     Filecontent = FileReader.readlines()
 407     FileReader.close()
 408     DictProfile = {}
 409     count = 0
 410     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 411     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 412     rows.pop(0)
 413     ClusterNb = rows[0][2]
 414     rows.pop(0)
 415     clusters = [row[2] for row in rows if row[0] == u'**']
 416     valclusters = [row[1:4] for row in rows if row[0] == u'****']
 417     lp = [i for i, line in enumerate(rows) if line[0] == u'****']
 418     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 419     if Alceste :
 420         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 421         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 422     else :
 423         prof = [[line + [''] for line in pr] for pr in prof]
 424         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 425     for i, cluster in enumerate(clusters):
 426         DictProfile[cluster] = [valclusters[i]] + prof[i]
 427     return DictProfile
 428
 429 def GetTxtProfile(dictprofile, cluster_size) :
 430     proflist = []
 431     for classe in range(0, len(dictprofile)) :
 432         prof = dictprofile[str(classe + 1)]
 433         clinfo = cluster_size[classe]
 434         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 435     return '\n\n'.join(proflist)
 436
 437 def formatExceptionInfo(maxTBlevel=5):
 438     cla, exc, trbk = sys.exc_info()
 439     try :
 440         excName = cla.__name__
 441     except :
 442         excName = 'None'
 443     try:
 444         excArgs = exc.args[0]
 445     except :
 446         excArgs = "<no args>"
 447     excTb = traceback.format_tb(trbk, maxTBlevel)
 448     return (excName, excArgs, excTb)
 449
 450
 451 #fonction des etudiants de l'iut
 452 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 453     """
 454         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 455         Si on trouve un '$', c'est fini.
 456         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 457     """
 458     separateurs = [[u'.', 60.0], [u'?', 60.0], [u'!', 60.0], [u'£$£', 60], [u':', 50.0], [u';', 40.0], [u',', 10.0], [u' ', 0.1]]
 459     trouve = False                 # si on a trouvé un bon séparateur
 460     iDecoupe = 0                # indice du caractere ou il faut decouper
 461
 462     # on découpe la chaine pour avoir au maximum 240 caractères
 463     longueur = min(longueur, len(chaine) - 1)
 464     chaineTravail = chaine[:longueur + 1]
 465     nbCar = longueur
 466     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 467
 468     # on vérifie si on ne trouve pas un '$'
 469     indice = chaineTravail.find(u'$')
 470     if indice > -1:
 471         trouve = True
 472         iDecoupe = indice
 473
 474     # si on ne trouve rien, on cherche le meilleur séparateur
 475     if not trouve:
 476         while nbCar >= 0:
 477             caractere = chaineTravail[nbCar]
 478             distance = abs(longueurOptimale - nbCar) + 1
 479             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 480
 481             # on vérifie si le caractére courant est une marque de ponctuation
 482             for s in separateurs:
 483                 if caractere == s[0]:
 484                     # si c'est une ponctuation
 485
 486                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 487                         # print nbCar, s[0]
 488                         meilleur[0] = s[0]
 489                         meilleur[1] = s[1]
 490                         meilleur[2] = nbCar
 491                         trouve = True
 492                         iDecoupe = nbCar
 493
 494                     # et on termine la recherche
 495                     break
 496
 497             # on passe au caractère précédant
 498             nbCar = nbCar - 1
 499
 500     # si on a trouvé
 501     if trouve:
 502         fin = chaine[iDecoupe + 1:]
 503         retour = chaineTravail[:iDecoupe]
 504         return len(retour) > 0, retour.split(), fin
 505     # si on a rien trouvé
 506     return False, chaine.split(), ''
 507
 508
 509 exceptions = {'paragrapheOT' : u"Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 510               'EmptyText' : u"Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 511               'CorpusEncoding' : u"Problème d'encodage.",
 512               'TextBeforeTextMark' : u"Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 513               'MissingAnalyse' : u'Aucun fichier à cet emplacement :\n',
 514 }
 515
 516 def BugReport(parent, error = None):
 517     for ch in parent.GetChildren():
 518         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 519             ch.Destroy()
 520     excName, exc, excTb = formatExceptionInfo()
 521     if excName == 'Exception' :
 522         print exc
 523         if len(exc.split()) == 2 :
 524             mss, linenb = exc.split()
 525             if mss in exceptions :
 526                 txt = exceptions[mss] + linenb
 527             else :
 528                 txt = exc
 529         else :
 530             if exc in exceptions :
 531                 txt = exceptions[exc]
 532             else :
 533                 txt = exc
 534         title = "Information"
 535     else :
 536         txt = u'            !== BUG ==!       \n'
 537         txt += u'*************************************\n'
 538         txt += '\n'.join(excTb).replace('    ', ' ')
 539         txt += excName + '\n'
 540         txt += `exc`
 541         title = "Bug"
 542
 543     dial = BugDialog(parent, **{'title' : title})
 544     if 'Rerror' in dir(parent) :
 545         txt += parent.Rerror
 546         parent.Rerror = ''
 547     log.info(txt)
 548     dial.text_ctrl_1.write(txt)
 549     dial.CenterOnParent()
 550     dial.ShowModal()
 551     dial.Destroy()
 552
 553 def PlaySound(parent):
 554     if parent.pref.getboolean('iramuteq', 'sound') :
 555         try:
 556             if "gtk2" in wx.PlatformInfo:
 557                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 558             else :
 559                 sound = wx.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 560                 sound.Play(wx.SOUND_SYNC)
 561         except :
 562             print 'pas de son'
 563
 564 def ReadDicoAsDico(dicopath):
 565     with codecs.open(dicopath, 'r', 'UTF8') as f:
 566         content = f.readlines()
 567     lines = [line.rstrip('\n\r').replace(u'\n', '').replace('"', '').split('\t') for line in content if line != u'']
 568     return dict([[line[0], line[1:]] for line in lines])
 569
 570 def ReadLexique(parent, lang = 'french', filein = None):
 571     if lang != 'other' :
 572         if filein is None :
 573             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 574         else :
 575             parent.lexique = ReadDicoAsDico(filein)
 576     else :
 577         if filein is None :
 578             parent.lexique = {}
 579         else :
 580             parent.lexique = ReadDicoAsDico(filein)
 581
 582 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 583     #file = open(filein)
 584     with codecs.open(filein, 'r', encoding) as f :
 585         content = f.read()
 586     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 587     #file = codecs.open(filein, 'r', encoding)
 588     #content = file.readlines()
 589     #file.close()
 590     first = content.pop(0)
 591     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 592     dict = {}
 593     i = 0
 594     for line in content:
 595         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 596         #line = line.split(';')
 597         nline = [line[0]]
 598         for val in line[1:]:
 599             if val == u'NA' :
 600                 don = ''
 601             else:
 602                 try:
 603                     don = int(val)
 604                 except:
 605                     don = float('%.5f' % float(val))
 606             nline.append(don)
 607         dict[i] = nline
 608         i += 1
 609     return dict, first
 610
 611 def exec_RCMD(rpath, command) :
 612     log.info('R CMD INSTALL %s' % command)
 613     rpath = rpath.replace('\\','\\\\')
 614     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 615     return error
 616
 617 def exec_rcode(rpath, rcode, wait = True, graph = False):
 618     log.info("R Script : %s" % rcode)
 619     needX11 = False
 620     if sys.platform == 'darwin' :
 621         try :
 622             macversion = platform.mac_ver()[0].split('.')
 623             if int(macversion[1]) < 5 :
 624                 needX11 = True
 625             else :
 626                 needX11 = False
 627         except :
 628             needX11 = False
 629
 630     rpath = rpath.replace('\\','\\\\')
 631     env = os.environ.copy()
 632     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 633         env['LC_ALL'] = 'en_US.UTF-8'
 634     if not graph :
 635         if wait :
 636             if sys.platform == 'win32':
 637                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 638             else :
 639                 error = call([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], env = env)
 640             return error
 641         else :
 642             if sys.platform == 'win32':
 643                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 644             else :
 645                 pid = Popen([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 646             return pid
 647     else :
 648         if wait :
 649             if sys.platform == 'win32':
 650                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 651             elif sys.platform == 'darwin' and needX11:
 652                 os.environ['DISPLAY'] = ':0.0'
 653                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 654             else :
 655                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 656             return error
 657         else :
 658             if sys.platform == 'win32':
 659                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 660             elif sys.platform == 'darwin' and needX11:
 661                 os.environ['DISPLAY'] = ':0.0'
 662                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 663             else :
 664                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 665             return pid
 666
 667 def check_Rresult(parent, pid) :
 668     if isinstance(pid, Popen) :
 669         if pid.returncode != 0 :
 670             error = pid.communicate()
 671             error = [str(error[0]), error[1]]
 672             if error[1] is None :
 673                 error[1] = 'None'
 674             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 675             try :
 676                 raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
 677             except :
 678                 BugReport(parent)
 679             return False
 680         else :
 681             return True
 682     else :
 683         if pid != 0 :
 684             try :
 685                 raise Exception(u'Erreur R')
 686             except :
 687                 BugReport(parent)
 688             return False
 689         else :
 690             return True
 691
 692
 693 def launchcommand(mycommand):
 694     Popen(mycommand)
 695
 696 def print_liste(filename,liste):
 697     with open(filename,'w') as f :
 698         for graph in liste :
 699             f.write(';'.join(graph).encode(sys.getdefaultencoding())+'\n')
 700
 701 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 702     with codecs.open(filename,'rU', encoding) as f :
 703         content=f.readlines()
 704         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 705     return ncontent
 706
 707 def progressbar(self, maxi) :
 708     ira = wx.GetApp().GetTopWindow()
 709     parent = ira
 710     try :
 711         maxi = int(maxi)
 712     except :
 713         maxi = 1
 714     prog = wx.ProgressDialog("Traitements",
 715                              "Veuillez patienter...",
 716                              maximum=maxi,
 717                              parent=parent,
 718                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 719                              )
 720     prog.SetSize((400,150))
 721     #prog.SetIcon(ira._icon)
 722     return prog
 723
 724 def treat_var_mod(variables) :
 725     var_mod = {}
 726     variables = list(set(variables))
 727     varmod = [variable.split('_') for variable in variables]
 728     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 729     for var in vars :
 730         mods = ['_'.join(v) for v in varmod if v[0] == var]
 731         var_mod[var] = mods
 732
 733 #     for variable in variables :
 734 #         if u'_' in variable :
 735 #             forme = variable.split(u'_')
 736 #             var = forme[0]
 737 #             mod = forme[1]
 738 #             if not var in var_mod :
 739 #                 var_mod[var] = [variable]
 740 #             else :
 741 #                 if not mod in var_mod[var] :
 742 #                     var_mod[var].append(variable)
 743     return var_mod
 744
 745 def doconcorde(corpus, uces, mots, uci = False) :
 746     if not uci :
 747         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 748     else :
 749         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 750     ucestxt1 = dict(ucestxt1)
 751     ucestxt = []
 752     ucis_txt = []
 753     listmot = [corpus.getlems()[lem].formes for lem in mots]
 754     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 755     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 756     dmots = dict(zip(listmot, mothtml))
 757     for uce in uces :
 758         ucetxt = ucestxt1[uce].split()
 759         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 760         if not uci :
 761             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '</b></p>')
 762         else :
 763             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 764         ucestxt.append(ucetxt)
 765     return ucis_txt, ucestxt
 766
 767
 768 def getallstcarac(corpus, analyse) :
 769    pathout = PathOut(analyse['ira'])
 770    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 771    print profils
 772
 773 def read_chd(filein, fileout):
 774     with open(filein, 'r') as f :
 775         content = f.read()
 776     #content = [line[3:].replace('"',"").replace(' ','') for line in content.splitlines()]
 777     content = [line.split('\t') for line in content.splitlines()]
 778     chd = {'name':1, 'children':[]}
 779     mere={}
 780     for i, line in enumerate(content) :
 781         if i == 0 :
 782             chd['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 783             mere[line[1]] = chd['children'][0]
 784             mere[line[2]] = chd['children'][1]
 785         elif not i % 2 :
 786             if 'children' in mere[line[0]]:
 787                 mere[line[0]]['children'].append({'name': line[1],'size' : content[i+1][0]})
 788                 mere[line[1]] = mere[line[0]]['children'][-1]
 789                 mere[line[0]]['children'].append({'name': line[2],'size' : content[i+1][1]})
 790                 mere[line[2]] = mere[line[0]]['children'][-1]
 791             else :
 792                 mere[line[0]]['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 793                 mere[line[1]] = mere[line[0]]['children'][-2]
 794                 mere[line[2]] = mere[line[0]]['children'][-1]
 795     with open(fileout, 'w') as f :
 796         f.write(json.dumps(chd))
 797
 798
 799 translation_languages = {"Afrikaans":"af", "Albanian":"sq", "Amharic":"am", "Arabic":"ar", "Armenian":"hy", "Azeerbaijani":"az", "Basque":"eu", "Belarusian":"be", "Bengali":"bn", "Bosnian":"bs", "Bulgarian":"bg", "Catalan":"ca", "Cebuano":"ceb", "Chichewa":"ny", "Chinese (Simplified)":"zh-CN", "Chinese (Traditional)":"zh-TW", "Corsican":"co", "Croatian":"hr", "Czech":"cs", "Danish":"da", "Dutch":"nl", "English":"en", "Esperanto":"eo", "Estonian":"et", "Filipino":"tl", "Finnish":"fi", "French":"fr", "Frisian":"fy", "Galician":"gl", "Georgian":"ka", "German":"de", "Greek":"el", "Gujarati":"gu", "Haitian Creole":"ht", "Hausa":"ha", "Hawaiian":"haw", "Hebrew":"iw", "Hindi":"hi", "Hmong":"hmn ", "Hungarian":"hu", "Icelandic":"is", "Igbo":"ig", "Indonesian":"id", "Irish":"ga", "Italian":"it", "Japanese":"ja", "Javanese":"jw", "Kannada":"kn", "Kazakh":"kk", "Khmer":"km", "Korean":"ko", "Kurdish":"ku", "Kyrgyz":"ky", "Lao":"lo", "Latin":"la", "Latvian":"lv", "Lithuanian":"lt", "Luxembourgish":"lb", "Macedonian":"mk", "Malagasy":"mg", "Malay":"ms", "Malayalam":"ml", "Maltese":"mt", "Maori":"mi", "Marathi":"mr", "Mongolian":"mn", "Burmese":"my", "Nepali":"ne", "Norwegian":"no", "Pashto":"ps", "Persian":"fa", "Polish":"pl", "Portuguese":"pt", "Punjabi":"ma", "Romanian":"ro", "Russian":"ru", "Samoan":"sm", "Scots Gaelic":"gd", "Serbian":"sr", "Sesotho":"st", "Shona":"sn", "Sindhi":"sd", "Sinhala":"si", "Slovak":"sk", "Slovenian":"sl", "Somali":"so", "Spanish":"es", "Sundanese":"su", "Swahili":"sw", "Swedish":"sv", "Tajik":"tg", "Tamil":"ta", "Telugu":"te", "Thai":"th", "Turkish":"tr", "Ukrainian":"uk", "Urdu":"ur", "Uzbek":"uz", "Vietnamese":"vi", "Welsh":"cy", "Xhosa":"xh", "Yiddish":"yi", "Yoruba":"yo", "Zulu":"zu", }
 800
 801
 802 def gettranslation(words, lf, lt) :
 803     import urllib2
 804     import json
 805     agent = {'User-Agent':
 806     "Mozilla/4.0 (\
 807     compatible;\
 808     MSIE 6.0;\
 809     Windows NT 5.1;\
 810     SV1;\
 811     .NET CLR 1.1.4322;\
 812     .NET CLR 2.0.50727;\
 813     .NET CLR 3.0.04506.30\
 814     )"}
 815     base_link = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=%s&tl=%s&dt=t&q=%s"
 816     print len(words)
 817     totrans = urllib2.quote('\n'.join(words).encode('utf8'))
 818     link = base_link % (lf, lt, totrans)
 819     request = urllib2.Request(link, headers=agent)
 820     raw_data = urllib2.urlopen(request).read()
 821     data = json.loads(raw_data)
 822     return [line[0].decode('utf8').replace(u"'", u'_').replace(u' | ', u'|').replace(u' ', u'_').replace(u'-',u'_').replace(u'\n','') for line in data[0]]
 823
 824 def makenprof(prof, trans, deb=0) :
 825     nprof=[]
 826     if deb == 0 :
 827         nprof.append(prof[0])
 828     for i, val in enumerate(trans) :
 829         line = prof[deb+i+1][:]
 830         line[6] = val
 831         nprof.append(line)
 832     return nprof
 833
 834 def treatempty(val) :
 835     if val.strip() == '' :
 836         return '_'
 837     else :
 838         return val
 839
 840 def translateprofile(corpus, dictprofile, lf='it', lt='fr') :
 841     nprof = {}
 842     lems = {}
 843     for i in range(len(dictprofile)) :
 844         prof = dictprofile[`i+1`]
 845         try :
 846             lenact = prof.index([u'*****', u'*', u'*', u'*', u'*', u'*', '', ''])
 847             lensup = -1
 848         except ValueError:
 849             try :
 850                 lenact = prof.index([u'*', u'*', u'*', u'*', u'*', u'*', '', ''])
 851                 lensup = 0
 852             except ValueError:
 853                 lenact = len(prof)
 854                 lensup = 0
 855         try :
 856             lensup += prof.index([u'*', u'*', u'*', u'*', u'*', u'*', '', ''])
 857             lensup = lensup - lenact
 858         except ValueError:
 859             lensup += len(prof) - lenact
 860         if lenact != 0 :
 861             if lenact > 400 :
 862                 nlenact = 400
 863             else :
 864                 nlenact = lenact
 865             actori = [line[6] for line in prof[1:nlenact]]
 866             act = [val.replace(u'_', u' ') for val in actori]
 867             act = gettranslation(act, lf, lt)
 868             for j, val in enumerate(actori) :
 869                 if act[j] not in lems :
 870                     lems[act[j]] = val
 871                 else :
 872                     while act[j] in lems :
 873                         act[j] = act[j] + u"+"
 874                     lems[act[j]] = val
 875             nprof[`i+1`] = makenprof(prof, act)
 876
 877         if lensup != 0 :
 878             if lensup > 400 :
 879                 nlensup = 400
 880             else :
 881                 nlensup = lensup
 882             supori = [line[6] for line in prof[(1+lenact):(lenact+nlensup)]]
 883             sup = [val.replace(u'_', u' ') for val in supori]
 884             sup = [treatempty(val) for val in sup]
 885             sup = gettranslation(sup, lf, lt)
 886             for j, val in enumerate(supori) :
 887                 if sup[j] not in lems :
 888                     lems[sup[j]] = val
 889                 else :
 890                     while sup[j] in lems :
 891                         sup[j] = sup[j] + u"+"
 892                     lems[sup[j]] = val
 893             nprof[`i+1`].append([u'*****', u'*', u'*', u'*', u'*', u'*', '', ''])
 894             nprof[`i+1`] += makenprof(prof, sup, deb=lenact)
 895
 896         try :
 897             lenet = prof.index([u'*', u'*', u'*', u'*', u'*', u'*', '', ''])
 898             nprof[`i+1`].append([u'*', u'*', u'*', u'*', u'*', u'*', '', ''])
 899             nprof[`i+1`] += prof[(lenet+1):]
 900         except :
 901             pass
 902     return nprof, lems
 903
 904
 905 def write_translation_profile(prof, lems, language, dictpathout) :
 906     if os.path.exists(dictpathout['translations.txt']) :
 907         with codecs.open(dictpathout['translations.txt'], 'r', 'utf8') as f :
 908             translist = f.read()
 909         translist = [line.split('\t') for line in translist.splitlines()]
 910     else :
 911         translist = []
 912     toprint = []
 913     toprint.append(['','','','','',''])
 914     toprint.append([u'***', u'nb classes', `len(prof)`, u'***', '', ''])
 915     for i in range(len(prof)) :
 916         toprint.append([u'**', u'classe', `i+1`, u'**', '', ''])
 917         toprint.append([u'****'] + prof[`i+1`][0] + [u'****'])
 918         rest = [[`line[1]`, `line[2]`, `line[3]`, `line[4]`, line[6], line[7].replace('< 0,0001', '0.00009').replace('NS (','').replace(')','')] for line in prof[`i+1`][1:]]
 919         for i, line in enumerate(prof[`i+1`][1:]) :
 920             if line[0] == u'*' :
 921                 rest[i] = [u'*', u'*', u'*', u'*', u'*', u'*']
 922             elif line[0] == u'*****' :
 923                 rest[i] = [u'*****',u'*',u'*', u'*', u'*', u'*']
 924         toprint += rest
 925     with open(dictpathout['translation_profile_%s.csv' % language], 'w') as f :
 926         f.write('\n'.join([';'.join(line) for line in toprint]).encode('utf8'))
 927     with open(dictpathout['translation_words_%s.csv' % language], 'w') as f :
 928         f.write('\n'.join(['\t'.join([val, lems[val]]) for val in lems]).encode('utf8'))
 929     if 'translation_profile_%s.csv' % language not in [val[0] for val in translist] :
 930         translist.append(['translation_profile_%s.csv' % language, 'translation_words_%s.csv' % language])
 931         with open(dictpathout['translations.txt'], 'w') as f :
 932             f.write('\n'.join(['\t'.join(line) for line in translist]).encode('utf8'))