iramuteq.org Git - iramuteq/blob - functions.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2008-2012 Pierre Ratinaud
   5 #License: GNU/GPL
   6
   7 import wx
   8 import re
   9 from ConfigParser import ConfigParser
  10 from subprocess import Popen, call, PIPE
  11 import thread
  12 import os
  13 import ast
  14 import sys
  15 import csv
  16 import platform
  17 import traceback
  18 import codecs
  19 import locale
  20 import datetime
  21 from copy import copy
  22 from shutil import copyfile
  23 import shelve
  24 #from dialog import BugDialog
  25 import logging
  26
  27 log = logging.getLogger('iramuteq')
  28
  29
  30 indices_simi = [u'cooccurrence' ,'pourcentage de cooccurrence',u'Russel',u'Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  31
  32
  33 def normpath_win32(path) :
  34     if not sys.platform == 'win32' :
  35         return path
  36     while '\\\\' in path :
  37         path = path.replace('\\\\', '\\')
  38     if path.startswith('\\') and not path.startswith('\\\\') :
  39         path = '\\' + path
  40     return path
  41
  42 class TGen :
  43     def __init__(self, path = None, encoding = 'utf8'):
  44         self.path = path
  45         self.tgen = {}
  46         self.encoding = encoding
  47
  48     def __getitem__(self, key):
  49         return self.tgen[key]
  50
  51     def read(self, path = None):
  52         if path is None :
  53             path = self.path
  54         with codecs.open(path, 'r', self.encoding) as f :
  55             tgen = f.read()
  56         tgen = [line.split('\t') for line in tgen.splitlines()]
  57         tgen = dict([[line[0], line[1:]] for line in tgen])
  58         self.tgen = tgen
  59         self.path = path
  60
  61     def write(self, path = None):
  62         if path is None :
  63             path = self.path
  64         with open(path, 'w') as f :
  65             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]).encode(self.encoding))
  66
  67     def writetable(self, pathout, tgens, totocc):
  68         etoiles = totocc.keys()
  69         etoiles.sort()
  70         with open(pathout, 'w') as f :
  71             line = '\t'.join([u'tgens'] + etoiles) + '\n'
  72             f.write(line.encode(self.encoding))
  73             for t in tgens :
  74                 line = '\t'.join([t] + [`tgens[t][et]` for et in etoiles]) + '\n'
  75                 f.write(line.encode(self.encoding))
  76             i = 0
  77             totname = 'total'
  78             while totname + `i` in tgens :
  79                 i += 1
  80             totname = totname + `i`
  81             line = '\t'.join([totname] + [`totocc[et]` for et in etoiles]) + '\n'
  82             f.write(line.encode(self.encoding))
  83
  84 class History :
  85     def __init__(self, filein, syscoding = 'utf8') :
  86         self.filein = filein
  87         self.syscoding = syscoding
  88         self.corpus = {}
  89         self.openedcorpus = {}
  90         self.openedmatrix = {}
  91         self.orph = []
  92         self.analyses = {}
  93         self.history = []
  94         self.opened = {}
  95         self.read()
  96
  97     def read(self) :
  98         d = shelve.open(self.filein)
  99         self.history = d.get('history', [])
 100         self.matrix = d.get('matrix', [])
 101         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 102         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 103         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 104         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 105         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 106         d.close()
 107
 108     def write(self) :
 109         d = shelve.open(self.filein)
 110         d['history'] = self.history
 111         d['matrix'] = self.matrix
 112         d.close()
 113
 114     def add(self, analyse) :
 115         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 116         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 117         if tosave['uuid'] in self.corpus :
 118             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 119             return
 120         if analyse.get('corpus', False) :
 121             if analyse['uuid'] in self.analyses :
 122                 return
 123             tosave['corpus'] = analyse['corpus']
 124             tosave['name'] = analyse['name']
 125             acorpus_uuid =  analyse['corpus']
 126             if acorpus_uuid in self.corpus :
 127                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 128                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 129                 else :
 130                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 131             else :
 132                 self.orph.append(tosave)
 133         else :
 134             tosave['corpus_name'] = analyse['corpus_name']
 135             #self.ordercorpus[tosave['uuid']] = len(history)
 136             #self.corpus[tosave['uuid']] = analyse
 137             self.history.append(tosave)
 138         self.write()
 139         self.read()
 140
 141     def addMatrix(self, analyse) :
 142         tosave = analyse
 143         #tosave['matrix_name'] = analyse['matrix_name']
 144         tosave['analyses'] = []
 145         self.matrix.append(tosave)
 146         self.write()
 147         self.read()
 148
 149     def addMatrixAnalyse(self, analyse) :
 150         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 151         tosave['name'] = analyse['name']
 152         if tosave['matrix'] in self.ordermatrix :
 153             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 154         self.write()
 155         self.read()
 156
 157     def addmultiple(self, analyses) :
 158         log.info('add multiple')
 159         for analyse in analyses :
 160             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 161             corpus = analyse['corpus']
 162             tosave['corpus'] = corpus
 163             tosave['name'] = analyse['name']
 164             if corpus in self.corpus :
 165                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 166                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 167                 else :
 168                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 169         self.write()
 170         self.read()
 171
 172     def delete(self, analyse, corpus = False) :
 173         log.info('delete %s' % analyse.get('name', 'noname'))
 174         if corpus :
 175             self.history.pop(self.ordercorpus[analyse['uuid']])
 176             if analyse['uuid'] in self.openedcorpus :
 177                 del self.openedcorpus[analyse['uuid']]
 178             log.info('delete corpus : %s' % analyse['uuid'])
 179         elif analyse['uuid'] in self.analyses :
 180             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 181             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 182         elif analyse['uuid'] in self.matrixanalyse :
 183             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 184         self.write()
 185         self.read()
 186
 187     def addtab(self, analyse) :
 188         self.opened[analyse['uuid']] = analyse
 189
 190     def rmtab(self, analyse) :
 191         del self.opened[analyse['uuid']]
 192
 193     def update(self, analyse) :
 194         if 'matrix_name' in analyse :
 195             self.matrixanalyse[analyse['uuid']].update(analyse)
 196         elif 'corpus_name' in analyse :
 197             self.corpus[analyse['uuid']].update(analyse)
 198         elif 'corpus' in analyse :
 199             self.analyses[analyse['uuid']].update(analyse)
 200         else :
 201             toupdate = [an for an in self.matrixanalyse[analyse['matrix']]['analyses'] if an['uuid'] == analyse['uuid']]
 202             toupdate[0].update(analyse)
 203         self.write()
 204         self.read()
 205
 206     def clean(self) :
 207         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 208         print corpustodel
 209         for corpus in corpustodel :
 210             print 'cleaning :', corpus['corpus_name']
 211             self.delete(corpus, corpus = True)
 212         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 213         for analyse in anatodel :
 214             print 'cleaning :', analyse['name']
 215             self.delete(analyse)
 216
 217     def __str__(self) :
 218         return str(self.history)
 219
 220 class DoConf :
 221     def __init__(self, configfile=None, diff = None, parametres = None) :
 222         self.configfile = configfile
 223         self.conf = ConfigParser()
 224
 225         if configfile is not None :
 226             configfile = normpath_win32(configfile)
 227             self.conf.readfp(codecs.open(configfile, 'r', 'utf8'))
 228         self.parametres = {}
 229         if parametres is not None :
 230             self.doparametres(parametres)
 231
 232     def doparametres(self, parametres) :
 233         return parametres
 234
 235     def getsections(self) :
 236         return self.conf.sections()
 237
 238     def getoptions(self, section = None, diff = None):
 239         parametres = {}
 240         if section is None :
 241             section = self.conf.sections()[0]
 242         for option in self.conf.options(section) :
 243             if self.conf.get(section, option).isdigit() :
 244                 parametres[option] = int(self.conf.get(section, option))
 245             elif self.conf.get(section, option) == 'False' :
 246                 parametres[option] = False
 247             elif self.conf.get(section, option) == 'True' :
 248                 parametres[option] = True
 249             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 250                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 251             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 252                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 253             else :
 254                 parametres[option] = self.conf.get(section, option)
 255         if 'type' not in parametres :
 256             parametres['type'] = section
 257         return parametres
 258
 259     def makeoptions(self, sections, parametres, outfile = None) :
 260         txt = ''
 261         for i, section in enumerate(sections) :
 262             txt += '[%s]\n' % section
 263             if not self.conf.has_section(section) :
 264                 self.conf.add_section(section)
 265             for option in parametres[i] :
 266                 if isinstance(parametres[i][option], int) :
 267                     self.conf.set(section, option, `parametres[i][option]`)
 268                     txt += '%s = %i\n' % (option, parametres[i][option])
 269                 elif isinstance(parametres[i][option], basestring) :
 270                     self.conf.set(section, option, parametres[i][option].encode('utf8'))
 271                     txt += '%s = %s\n' % (option, parametres[i][option])
 272                 elif isinstance(parametres[i][option], wx.Colour) :
 273                     self.conf.set(section, option, str(parametres[i][option]))
 274                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 275                 elif option == 'analyses' :
 276                     pass
 277                 else :
 278                     self.conf.set(section, option, `parametres[i][option]`)
 279                     txt += '%s = %s\n' % (option, `parametres[i][option]`)
 280         if outfile is None :
 281             outfile = self.configfile
 282         outfile = normpath_win32(outfile)
 283         with open(outfile, 'w') as f :
 284             f.write(txt.encode('utf8'))
 285             #self.conf.write(f)
 286
 287     def totext(self, parametres) :
 288         #txt = ['Corpus']
 289         txt = []
 290         for val in parametres :
 291             if isinstance(parametres[val], int) :
 292                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 293             elif isinstance(parametres[val], basestring) :
 294                 txt.append(' \t\t: '.join([val, parametres[val]]))
 295             elif val in ['listet', 'stars'] :
 296                 pass
 297             else :
 298                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 299         return '\n'.join(txt)
 300
 301
 302 def write_tab(tab, fileout) :
 303         writer = csv.writer(open(fileout, 'wb'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 304         writer.writerows(tab)
 305
 306 class BugDialog(wx.Dialog):
 307     def __init__(self, *args, **kwds):
 308         # begin wxGlade: MyDialog.__init__
 309         kwds["style"] = wx.DEFAULT_DIALOG_STYLE | wx.STAY_ON_TOP
 310         kwds["size"] = wx.Size(500, 200)
 311         wx.Dialog.__init__(self, *args, **kwds)
 312         self.SetTitle(kwds['title'])
 313         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 314         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 315         self.button_1 = wx.Button(self, wx.ID_OK, "")
 316
 317         self.__set_properties()
 318         self.__do_layout()
 319         # end wxGlade
 320
 321     def __set_properties(self):
 322         # begin wxGlade: MyDialog.__set_properties
 323         self.SetMinSize(wx.Size(500, 200))
 324         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 325
 326         # end wxGlade
 327
 328     def __do_layout(self):
 329         # begin wxGlade: MyDialog.__do_layout
 330         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 331         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 332         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 333         self.SetSizer(sizer_1)
 334         sizer_1.Fit(self)
 335         self.Layout()
 336
 337
 338 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 339     AnalyseConf = ConfigParser()
 340     AnalyseConf.read(DictPathOut['ira'])
 341     AnalyseConf.add_section(section)
 342     date = datetime.datetime.now().ctime()
 343     AnalyseConf.set(section, 'date', str(date))
 344     AnalyseConf.set(section, 'clusternb', clusternb)
 345     AnalyseConf.set(section, 'corpus_name', corpname)
 346
 347     fileout = open(DictPathOut['ira'], 'w')
 348     AnalyseConf.write(fileout)
 349     fileout.close()
 350
 351 def sortedby(list, direct, *indices):
 352
 353     """
 354         sortedby: sort a list of lists (e.g. a table) by one or more indices
 355                   (columns of the table) and return the sorted list
 356
 357         e.g.
 358          for list = [[2,3],[1,2],[3,1]]:
 359          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 360          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 361     """
 362
 363     nlist = map(lambda x, indices=indices:
 364                  map(lambda i, x=x: x[i], indices) + [x],
 365                  list)
 366     if direct == 1:
 367         nlist.sort()
 368     elif direct == 2:
 369         nlist.sort(reverse=True)
 370     return map(lambda l: l[-1], nlist)
 371
 372 def add_type(line, dictlem):
 373     if line[4] in dictlem:
 374         line.append(dictlem[line[4]])
 375     else :
 376         line.append('')
 377     return line
 378
 379 def treat_line_alceste(i, line) :
 380     if line[0] == '*' or line[0] == '*****' :
 381         return line + ['']
 382     if line[5] == 'NA':
 383         print 'NA', line[5]
 384         pass
 385     elif float(line[5].replace(',', '.')) < 0.0001:
 386         line[5] = '< 0,0001'
 387     elif float(line[5].replace(',', '.')) > 0.05:
 388         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 389     else:
 390         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 391     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 392
 393 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 394     dictlem = {}
 395     print 'lecture des profiles'
 396     FileReader = codecs.open(File, 'r', encoding)
 397     Filecontent = FileReader.readlines()
 398     FileReader.close()
 399     DictProfile = {}
 400     count = 0
 401     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 402     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 403     rows.pop(0)
 404     ClusterNb = rows[0][2]
 405     rows.pop(0)
 406     clusters = [row[2] for row in rows if row[0] == u'**']
 407     valclusters = [row[1:4] for row in rows if row[0] == u'****']
 408     lp = [i for i, line in enumerate(rows) if line[0] == u'****']
 409     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 410     if Alceste :
 411         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 412         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 413     else :
 414         prof = [[line + [''] for line in pr] for pr in prof]
 415         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 416     for i, cluster in enumerate(clusters):
 417         DictProfile[cluster] = [valclusters[i]] + prof[i]
 418     return DictProfile
 419
 420 def GetTxtProfile(dictprofile, cluster_size) :
 421     proflist = []
 422     for classe in range(0, len(dictprofile)) :
 423         prof = dictprofile[str(classe + 1)]
 424         clinfo = cluster_size[classe]
 425         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 426     return '\n\n'.join(proflist)
 427
 428 def formatExceptionInfo(maxTBlevel=5):
 429     cla, exc, trbk = sys.exc_info()
 430     try :
 431         excName = cla.__name__
 432     except :
 433         excName = 'None'
 434     try:
 435         excArgs = exc.args[0]
 436     except :
 437         excArgs = "<no args>"
 438     excTb = traceback.format_tb(trbk, maxTBlevel)
 439     return (excName, excArgs, excTb)
 440
 441
 442 #fonction des etudiants de l'iut
 443 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 444     """
 445         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 446         Si on trouve un '$', c'est fini.
 447         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 448     """
 449     separateurs = [[u'.', 60.0], [u'?', 60.0], [u'!', 60.0], [u'£$£', 60], [u':', 50.0], [u';', 40.0], [u',', 10.0], [u' ', 0.1]]
 450     trouve = False                 # si on a trouvé un bon séparateur
 451     iDecoupe = 0                # indice du caractere ou il faut decouper
 452
 453     # on découpe la chaine pour avoir au maximum 240 caractères
 454     longueur = min(longueur, len(chaine) - 1)
 455     chaineTravail = chaine[:longueur + 1]
 456     nbCar = longueur
 457     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 458
 459     # on vérifie si on ne trouve pas un '$'
 460     indice = chaineTravail.find(u'$')
 461     if indice > -1:
 462         trouve = True
 463         iDecoupe = indice
 464
 465     # si on ne trouve rien, on cherche le meilleur séparateur
 466     if not trouve:
 467         while nbCar >= 0:
 468             caractere = chaineTravail[nbCar]
 469             distance = abs(longueurOptimale - nbCar) + 1
 470             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 471
 472             # on vérifie si le caractére courant est une marque de ponctuation
 473             for s in separateurs:
 474                 if caractere == s[0]:
 475                     # si c'est une ponctuation
 476
 477                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 478                         # print nbCar, s[0]
 479                         meilleur[0] = s[0]
 480                         meilleur[1] = s[1]
 481                         meilleur[2] = nbCar
 482                         trouve = True
 483                         iDecoupe = nbCar
 484
 485                     # et on termine la recherche
 486                     break
 487
 488             # on passe au caractère précédant
 489             nbCar = nbCar - 1
 490
 491     # si on a trouvé
 492     if trouve:
 493         fin = chaine[iDecoupe + 1:]
 494         retour = chaineTravail[:iDecoupe]
 495         return len(retour) > 0, retour.split(), fin
 496     # si on a rien trouvé
 497     return False, chaine.split(), ''
 498
 499
 500 exceptions = {'paragrapheOT' : u"Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 501               'EmptyText' : u"Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 502               'CorpusEncoding' : u"Problème d'encodage.",
 503               'TextBeforeTextMark' : u"Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 504               'MissingAnalyse' : u'Aucun fichier à cet emplacement :\n',
 505 }
 506
 507 def BugReport(parent, error = None):
 508     for ch in parent.GetChildren():
 509         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 510             ch.Destroy()
 511     excName, exc, excTb = formatExceptionInfo()
 512     if excName == 'Exception' :
 513         print exc
 514         if len(exc.split()) == 2 :
 515             mss, linenb = exc.split()
 516             if mss in exceptions :
 517                 txt = exceptions[mss] + linenb
 518             else :
 519                 txt = exc
 520         else :
 521             if exc in exceptions :
 522                 txt = exceptions[exc]
 523             else :
 524                 txt = exc
 525         title = "Information"
 526     else :
 527         txt = u'            !== BUG ==!       \n'
 528         txt += u'*************************************\n'
 529         txt += '\n'.join(excTb).replace('    ', ' ')
 530         txt += excName + '\n'
 531         txt += `exc`
 532         title = "Bug"
 533
 534     dial = BugDialog(parent, **{'title' : title})
 535     if 'Rerror' in dir(parent) :
 536         txt += parent.Rerror
 537         parent.Rerror = ''
 538     log.info(txt)
 539     dial.text_ctrl_1.write(txt)
 540     dial.CenterOnParent()
 541     dial.ShowModal()
 542     dial.Destroy()
 543
 544 def PlaySound(parent):
 545     if parent.pref.getboolean('iramuteq', 'sound') :
 546         try:
 547             if "gtk2" in wx.PlatformInfo:
 548                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 549             else :
 550                 sound = wx.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 551                 sound.Play(wx.SOUND_SYNC)
 552         except :
 553             print 'pas de son'
 554
 555 def ReadDicoAsDico(dicopath):
 556     with codecs.open(dicopath, 'r', 'UTF8') as f:
 557         content = f.readlines()
 558     lines = [line.rstrip('\n\r').replace(u'\n', '').replace('"', '').split('\t') for line in content if line != u'']
 559     return dict([[line[0], line[1:]] for line in lines])
 560
 561 def ReadLexique(parent, lang = 'french', filein = None):
 562     if lang != 'other' :
 563         if filein is None :
 564             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 565         else :
 566             parent.lexique = ReadDicoAsDico(filein)
 567     else :
 568         if filein is None :
 569             parent.lexique = {}
 570         else :
 571             parent.lexique = ReadDicoAsDico(filein)
 572
 573 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 574     #file = open(filein)
 575     with codecs.open(filein, 'r', encoding) as f :
 576         content = f.read()
 577     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 578     #file = codecs.open(filein, 'r', encoding)
 579     #content = file.readlines()
 580     #file.close()
 581     first = content.pop(0)
 582     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 583     dict = {}
 584     i = 0
 585     for line in content:
 586         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 587         #line = line.split(';')
 588         nline = [line[0]]
 589         for val in line[1:]:
 590             if val == u'NA' :
 591                 don = ''
 592             else:
 593                 try:
 594                     don = int(val)
 595                 except:
 596                     don = float('%.5f' % float(val))
 597             nline.append(don)
 598         dict[i] = nline
 599         i += 1
 600     return dict, first
 601
 602 def exec_RCMD(rpath, command) :
 603     log.info('R CMD INSTALL %s' % command)
 604     rpath = rpath.replace('\\','\\\\')
 605     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 606     return error
 607
 608 def exec_rcode(rpath, rcode, wait = True, graph = False):
 609     log.info("R Script : %s" % rcode)
 610     needX11 = False
 611     if sys.platform == 'darwin' :
 612         try :
 613             macversion = platform.mac_ver()[0].split('.')
 614             if int(macversion[1]) < 5 :
 615                 needX11 = True
 616             else :
 617                 needX11 = False
 618         except :
 619             needX11 = False
 620
 621     rpath = rpath.replace('\\','\\\\')
 622     env = os.environ.copy()
 623     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 624         env['LC_ALL'] = 'en_US.UTF-8'
 625     if not graph :
 626         if wait :
 627             if sys.platform == 'win32':
 628                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 629             else :
 630                 error = call([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], env = env)
 631             return error
 632         else :
 633             if sys.platform == 'win32':
 634                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 635             else :
 636                 pid = Popen([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 637             return pid
 638     else :
 639         if wait :
 640             if sys.platform == 'win32':
 641                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 642             elif sys.platform == 'darwin' and needX11:
 643                 os.environ['DISPLAY'] = ':0.0'
 644                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 645             else :
 646                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 647             return error
 648         else :
 649             if sys.platform == 'win32':
 650                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 651             elif sys.platform == 'darwin' and needX11:
 652                 os.environ['DISPLAY'] = ':0.0'
 653                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 654             else :
 655                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 656             return pid
 657
 658 def check_Rresult(parent, pid) :
 659     if isinstance(pid, Popen) :
 660         if pid.returncode != 0 :
 661             error = pid.communicate()
 662             error = [str(error[0]), error[1]]
 663             if error[1] is None :
 664                 error[1] = 'None'
 665             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 666             try :
 667                 raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
 668             except :
 669                 BugReport(parent)
 670             return False
 671         else :
 672             return True
 673     else :
 674         if pid != 0 :
 675             try :
 676                 raise Exception(u'Erreur R')
 677             except :
 678                 BugReport(parent)
 679             return False
 680         else :
 681             return True
 682
 683 def print_liste(filename,liste):
 684     with open(filename,'w') as f :
 685         for graph in liste :
 686             f.write(';'.join(graph)+'\n')
 687
 688 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 689     with codecs.open(filename,'rU', encoding) as f :
 690         content=f.readlines()
 691         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 692     return ncontent
 693
 694
 695
 696
 697 def progressbar(self, maxi) :
 698     ira = wx.GetApp().GetTopWindow()
 699     parent = ira
 700     try :
 701         maxi = int(maxi)
 702     except :
 703         maxi = 1
 704     prog = wx.ProgressDialog("Traitements",
 705                              "Veuillez patienter...",
 706                              maximum=maxi,
 707                              parent=parent,
 708                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 709                              )
 710     prog.SetSize((400,150))
 711     prog.SetIcon(ira._icon)
 712     return prog
 713
 714 def treat_var_mod(variables) :
 715     var_mod = {}
 716     variables = list(set(variables))
 717     varmod = [variable.split('_') for variable in variables]
 718     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 719     for var in vars :
 720         mods = ['_'.join(v) for v in varmod if v[0] == var]
 721         var_mod[var] = mods
 722
 723 #     for variable in variables :
 724 #         if u'_' in variable :
 725 #             forme = variable.split(u'_')
 726 #             var = forme[0]
 727 #             mod = forme[1]
 728 #             if not var in var_mod :
 729 #                 var_mod[var] = [variable]
 730 #             else :
 731 #                 if not mod in var_mod[var] :
 732 #                     var_mod[var].append(variable)
 733     return var_mod
 734
 735 def doconcorde(corpus, uces, mots, uci = False) :
 736     if not uci :
 737         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 738     else :
 739         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 740     ucestxt1 = dict(ucestxt1)
 741     ucestxt = []
 742     ucis_txt = []
 743     listmot = [corpus.getlems()[lem].formes for lem in mots]
 744     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 745     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 746     dmots = dict(zip(listmot, mothtml))
 747     for uce in uces :
 748         ucetxt = ucestxt1[uce].split()
 749         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 750         if not uci :
 751             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '</b></p>')
 752         else :
 753             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 754         ucestxt.append(ucetxt)
 755     return ucis_txt, ucestxt
 756
 757
 758 def getallstcarac(corpus, analyse) :
 759    pathout = PathOut(analyse['ira'])
 760    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 761    print profils