iramuteq.org Git - iramuteq/blob - functions.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2008-2012 Pierre Ratinaud
   5 #License: GNU/GPL
   6
   7 import wx
   8 import re
   9 from ConfigParser import ConfigParser
  10 from subprocess import Popen, call, PIPE
  11 import thread
  12 import os
  13 import ast
  14 import sys
  15 import csv
  16 import platform
  17 import traceback
  18 import codecs
  19 import locale
  20 import datetime
  21 from copy import copy
  22 from shutil import copyfile
  23 import shelve
  24 #from dialog import BugDialog
  25 import logging
  26
  27 log = logging.getLogger('iramuteq')
  28
  29
  30 indices_simi = [u'cooccurrence' ,'pourcentage de cooccurrence',u'Russel',u'Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  31
  32
  33 def normpath_win32(path) :
  34     if not sys.platform == 'win32' :
  35         return path
  36     while '\\\\' in path :
  37         path = path.replace('\\\\', '\\')
  38     if sys.platform == 'win32' and path.startswith('\\') and not path.startswith('\\\\') :
  39         path = '\\' + path
  40     return path
  41
  42 class TGen :
  43     def __init__(self, path = None, encoding = 'utf8'):
  44         self.path = path
  45         self.tgen = {}
  46         self.encoding = encoding
  47
  48     def __getitem__(self, key):
  49         return self.tgen[key]
  50
  51     def read(self, path):
  52         with codecs.open(path, 'r', self.encoding) as f :
  53             tgen = f.read()
  54         tgen = [line.split('\t') for line in tgen.splitlines()]
  55         tgen = dict([[line[0], line[1:]] for line in tgen])
  56         self.tgen = tgen
  57         self.path = path
  58
  59     def write(self, path = None):
  60         if path is None :
  61             path = self.path
  62         with open(path, 'w') as f :
  63             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]).encode(self.encoding))
  64
  65     def writetable(self, pathout, tgens, totocc):
  66         etoiles = totocc.keys()
  67         etoiles.sort()
  68         with open(pathout, 'w') as f :
  69             line = '\t'.join([u'tgens'] + etoiles) + '\n'
  70             f.write(line.encode(self.encoding))
  71             for t in tgens :
  72                 line = '\t'.join([t] + [`tgens[t][et]` for et in etoiles]) + '\n'
  73                 f.write(line.encode(self.encoding))
  74             i = 0
  75             totname = 'total'
  76             while totname + `i` in tgens :
  77                 i += 1
  78             totname = totname + `i`
  79             line = '\t'.join([totname] + [`totocc[et]` for et in etoiles]) + '\n'
  80             f.write(line.encode(self.encoding))
  81
  82 class History :
  83     def __init__(self, filein, syscoding = 'utf8') :
  84         self.filein = filein
  85         self.syscoding = syscoding
  86         self.corpus = {}
  87         self.openedcorpus = {}
  88         self.openedmatrix = {}
  89         self.orph = []
  90         self.analyses = {}
  91         self.history = []
  92         self.opened = {}
  93         self.read()
  94
  95     def read(self) :
  96         d = shelve.open(self.filein)
  97         self.history = d.get('history', [])
  98         self.matrix = d.get('matrix', [])
  99         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 100         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 101         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 102         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 103         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 104         d.close()
 105
 106     def write(self) :
 107         d = shelve.open(self.filein)
 108         d['history'] = self.history
 109         d['matrix'] = self.matrix
 110         d.close()
 111
 112     def add(self, analyse) :
 113         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 114         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 115         if tosave['uuid'] in self.corpus :
 116             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 117             return
 118         if analyse.get('corpus', False) :
 119             if analyse['uuid'] in self.analyses :
 120                 return
 121             tosave['corpus'] = analyse['corpus']
 122             tosave['name'] = analyse['name']
 123             acorpus_uuid =  analyse['corpus']
 124             if acorpus_uuid in self.corpus :
 125                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 126                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 127                 else :
 128                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 129             else :
 130                 self.orph.append(tosave)
 131         else :
 132             tosave['corpus_name'] = analyse['corpus_name']
 133             #self.ordercorpus[tosave['uuid']] = len(history)
 134             #self.corpus[tosave['uuid']] = analyse
 135             self.history.append(tosave)
 136         self.write()
 137         self.read()
 138
 139     def addMatrix(self, analyse) :
 140         tosave = analyse
 141         #tosave['matrix_name'] = analyse['matrix_name']
 142         tosave['analyses'] = []
 143         self.matrix.append(tosave)
 144         self.write()
 145         self.read()
 146
 147     def addMatrixAnalyse(self, analyse) :
 148         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 149         tosave['name'] = analyse['name']
 150         if tosave['matrix'] in self.ordermatrix :
 151             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 152         self.write()
 153         self.read()
 154
 155     def addmultiple(self, analyses) :
 156         log.info('add multiple')
 157         for analyse in analyses :
 158             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 159             corpus = analyse['corpus']
 160             tosave['corpus'] = corpus
 161             tosave['name'] = analyse['name']
 162             if corpus in self.corpus :
 163                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 164                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 165                 else :
 166                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 167         self.write()
 168         self.read()
 169
 170     def delete(self, analyse, corpus = False) :
 171         log.info('delete %s' % analyse.get('name', 'noname'))
 172         if corpus :
 173             self.history.pop(self.ordercorpus[analyse['uuid']])
 174             if analyse['uuid'] in self.openedcorpus :
 175                 del self.openedcorpus[analyse['uuid']]
 176             log.info('delete corpus : %s' % analyse['uuid'])
 177         elif analyse['uuid'] in self.analyses :
 178             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 179             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 180         elif analyse['uuid'] in self.matrixanalyse :
 181             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 182         self.write()
 183         self.read()
 184
 185     def addtab(self, analyse) :
 186         self.opened[analyse['uuid']] = analyse
 187
 188     def rmtab(self, analyse) :
 189         del self.opened[analyse['uuid']]
 190
 191     def clean(self) :
 192         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 193         print corpustodel
 194         for corpus in corpustodel :
 195             print 'cleaning :', corpus['corpus_name']
 196             self.delete(corpus, corpus = True)
 197         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 198         for analyse in anatodel :
 199             print 'cleaning :', analyse['name']
 200             self.delete(analyse)
 201
 202     def __str__(self) :
 203         return str(self.history)
 204
 205 class DoConf :
 206     def __init__(self, configfile=None, diff = None, parametres = None) :
 207         self.configfile = configfile
 208         self.conf = ConfigParser()
 209
 210         if configfile is not None :
 211             configfile = normpath_win32(configfile)
 212             self.conf.readfp(codecs.open(configfile, 'r', 'utf8'))
 213         self.parametres = {}
 214         if parametres is not None :
 215             self.doparametres(parametres)
 216
 217     def doparametres(self, parametres) :
 218         return parametres
 219
 220     def getsections(self) :
 221         return self.conf.sections()
 222
 223     def getoptions(self, section = None, diff = None):
 224         parametres = {}
 225         if section is None :
 226             section = self.conf.sections()[0]
 227         for option in self.conf.options(section) :
 228             if self.conf.get(section, option).isdigit() :
 229                 parametres[option] = int(self.conf.get(section, option))
 230             elif self.conf.get(section, option) == 'False' :
 231                 parametres[option] = False
 232             elif self.conf.get(section, option) == 'True' :
 233                 parametres[option] = True
 234             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 235                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 236             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 237                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 238             else :
 239                 parametres[option] = self.conf.get(section, option)
 240         if 'type' not in parametres :
 241             parametres['type'] = section
 242         return parametres
 243
 244     def makeoptions(self, sections, parametres, outfile = None) :
 245         txt = ''
 246         for i, section in enumerate(sections) :
 247             txt += '[%s]\n' % section
 248             if not self.conf.has_section(section) :
 249                 self.conf.add_section(section)
 250             for option in parametres[i] :
 251                 if isinstance(parametres[i][option], int) :
 252                     self.conf.set(section, option, `parametres[i][option]`)
 253                     txt += '%s = %i\n' % (option, parametres[i][option])
 254                 elif isinstance(parametres[i][option], basestring) :
 255                     self.conf.set(section, option, parametres[i][option].encode('utf8'))
 256                     txt += '%s = %s\n' % (option, parametres[i][option])
 257                 elif isinstance(parametres[i][option], wx.Colour) :
 258                     self.conf.set(section, option, str(parametres[i][option]))
 259                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 260                 elif option == 'analyses' :
 261                     pass
 262                 else :
 263                     self.conf.set(section, option, `parametres[i][option]`)
 264                     txt += '%s = %s\n' % (option, `parametres[i][option]`)
 265         if outfile is None :
 266             outfile = self.configfile
 267         outfile = normpath_win32(outfile)
 268         with open(outfile, 'w') as f :
 269             f.write(txt.encode('utf8'))
 270             #self.conf.write(f)
 271
 272     def totext(self, parametres) :
 273         #txt = ['Corpus']
 274         txt = []
 275         for val in parametres :
 276             if isinstance(parametres[val], int) :
 277                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 278             elif isinstance(parametres[val], basestring) :
 279                 txt.append(' \t\t: '.join([val, parametres[val]]))
 280             elif val in ['listet', 'stars'] :
 281                 pass
 282             else :
 283                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 284         return '\n'.join(txt)
 285
 286
 287 def write_tab(tab, fileout) :
 288         writer = csv.writer(open(fileout, 'wb'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 289         writer.writerows(tab)
 290
 291 class BugDialog(wx.Dialog):
 292     def __init__(self, *args, **kwds):
 293         # begin wxGlade: MyDialog.__init__
 294         kwds["style"] = wx.DEFAULT_DIALOG_STYLE
 295         kwds["size"] = wx.Size(500, 200)
 296         wx.Dialog.__init__(self, *args, **kwds)
 297         self.SetTitle(kwds['title'])
 298         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 299         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 300         self.button_1 = wx.Button(self, wx.ID_OK, "")
 301
 302         self.__set_properties()
 303         self.__do_layout()
 304         # end wxGlade
 305
 306     def __set_properties(self):
 307         # begin wxGlade: MyDialog.__set_properties
 308         self.SetMinSize(wx.Size(500, 200))
 309         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 310
 311         # end wxGlade
 312
 313     def __do_layout(self):
 314         # begin wxGlade: MyDialog.__do_layout
 315         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 316         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 317         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 318         self.SetSizer(sizer_1)
 319         sizer_1.Fit(self)
 320         self.Layout()
 321
 322
 323 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 324     AnalyseConf = ConfigParser()
 325     AnalyseConf.read(DictPathOut['ira'])
 326     AnalyseConf.add_section(section)
 327     date = datetime.datetime.now().ctime()
 328     AnalyseConf.set(section, 'date', str(date))
 329     AnalyseConf.set(section, 'clusternb', clusternb)
 330     AnalyseConf.set(section, 'corpus_name', corpname)
 331
 332     fileout = open(DictPathOut['ira'], 'w')
 333     AnalyseConf.write(fileout)
 334     fileout.close()
 335
 336 def sortedby(list, direct, *indices):
 337
 338     """
 339         sortedby: sort a list of lists (e.g. a table) by one or more indices
 340                   (columns of the table) and return the sorted list
 341
 342         e.g.
 343          for list = [[2,3],[1,2],[3,1]]:
 344          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 345          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 346     """
 347
 348     nlist = map(lambda x, indices=indices:
 349                  map(lambda i, x=x: x[i], indices) + [x],
 350                  list)
 351     if direct == 1:
 352         nlist.sort()
 353     elif direct == 2:
 354         nlist.sort(reverse=True)
 355     return map(lambda l: l[-1], nlist)
 356
 357 def add_type(line, dictlem):
 358     if line[4] in dictlem:
 359         line.append(dictlem[line[4]])
 360     else :
 361         line.append('')
 362     return line
 363
 364 def treat_line_alceste(i, line) :
 365     if line[0] == '*' or line[0] == '*****' :
 366         return line + ['']
 367     if line[5] == 'NA':
 368         print 'NA', line[5]
 369         pass
 370     elif float(line[5].replace(',', '.')) < 0.0001:
 371         line[5] = '< 0,0001'
 372     elif float(line[5].replace(',', '.')) > 0.05:
 373         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 374     else:
 375         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 376     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 377
 378 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 379     #print 'lecture des profils : ReadProfileAsDico'
 380     #if Alceste :
 381     #    print 'lecture du dictionnaire de type'
 382     #    dictlem = {}
 383     #    for line in parent.corpus.lem_type_list :
 384     #        dictlem[line[0]] = line[1]
 385     dictlem = {}
 386     print 'lecture des profiles'
 387     #encoding = sys.getdefaultencoding()
 388     FileReader = codecs.open(File, 'r', encoding)
 389     Filecontent = FileReader.readlines()
 390     FileReader.close()
 391     DictProfile = {}
 392     count = 0
 393     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 394     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 395     rows.pop(0)
 396     ClusterNb = rows[0][2]
 397     rows.pop(0)
 398     clusters = [row[2] for row in rows if row[0] == u'**']
 399     valclusters = [row[1:4] for row in rows if row[0] == u'****']
 400     lp = [i for i, line in enumerate(rows) if line[0] == u'****']
 401     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 402     if Alceste :
 403         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 404         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 405     else :
 406         prof = [[line + [''] for line in pr] for pr in prof]
 407         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 408     for i, cluster in enumerate(clusters):
 409         DictProfile[cluster] = [valclusters[i]] + prof[i]
 410     return DictProfile
 411
 412 def GetTxtProfile(dictprofile, cluster_size) :
 413     proflist = []
 414     for classe in range(0, len(dictprofile)) :
 415         prof = dictprofile[str(classe + 1)]
 416         clinfo = cluster_size[classe]
 417         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 418     return '\n\n'.join(proflist)
 419
 420 def formatExceptionInfo(maxTBlevel=5):
 421          cla, exc, trbk = sys.exc_info()
 422          try :
 423             excName = cla.__name__
 424          except :
 425             excName = 'None'
 426          try:
 427              excArgs = exc.args[0]
 428          except :
 429              excArgs = "<no args>"
 430          excTb = traceback.format_tb(trbk, maxTBlevel)
 431          return (excName, excArgs, excTb)
 432
 433
 434 #fonction des etudiants de l'iut
 435 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 436     """
 437         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 438         Si on trouve un '$', c'est fini.
 439         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 440     """
 441     separateurs = [[u'.', 60.0], [u'?', 60.0], [u'!', 60.0], [u'£$£', 60], [u':', 50.0], [u';', 40.0], [u',', 10.0], [u' ', 0.1]]
 442     trouve = False                 # si on a trouvé un bon séparateur
 443     iDecoupe = 0                # indice du caractere ou il faut decouper
 444
 445     # on découpe la chaine pour avoir au maximum 240 caractères
 446     longueur = min(longueur, len(chaine) - 1)
 447     chaineTravail = chaine[:longueur + 1]
 448     nbCar = longueur
 449     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 450
 451     # on vérifie si on ne trouve pas un '$'
 452     indice = chaineTravail.find(u'$')
 453     if indice > -1:
 454         trouve = True
 455         iDecoupe = indice
 456
 457     # si on ne trouve rien, on cherche le meilleur séparateur
 458     if not trouve:
 459         while nbCar >= 0:
 460             caractere = chaineTravail[nbCar]
 461             distance = abs(longueurOptimale - nbCar) + 1
 462             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 463
 464             # on vérifie si le caractére courant est une marque de ponctuation
 465             for s in separateurs:
 466                 if caractere == s[0]:
 467                     # si c'est une ponctuation
 468
 469                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 470                         # print nbCar, s[0]
 471                         meilleur[0] = s[0]
 472                         meilleur[1] = s[1]
 473                         meilleur[2] = nbCar
 474                         trouve = True
 475                         iDecoupe = nbCar
 476
 477                     # et on termine la recherche
 478                     break
 479
 480             # on passe au caractère précédant
 481             nbCar = nbCar - 1
 482
 483     # si on a trouvé
 484     if trouve:
 485         fin = chaine[iDecoupe + 1:]
 486         retour = chaineTravail[:iDecoupe]
 487         return len(retour) > 0, retour.split(), fin
 488     # si on a rien trouvé
 489     return False, chaine.split(), ''
 490
 491
 492 exceptions = {'paragrapheOT' : u"Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 493               'EmptyText' : u"Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 494               'CorpusEncoding' : u"Problème d'encodage.",
 495               'TextBeforeTextMark' : u"Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 496               'MissingAnalyse' : u'Aucun fichier à cet emplacement :\n',
 497 }
 498
 499 def BugReport(parent, error = None):
 500     for ch in parent.GetChildren():
 501         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 502             ch.Destroy()
 503     excName, exc, excTb = formatExceptionInfo()
 504     if excName == 'Exception' :
 505         print exc
 506         if len(exc.split()) == 2 :
 507             mss, linenb = exc.split()
 508             if mss in exceptions :
 509                 txt = exceptions[mss] + linenb
 510             else :
 511                 txt = exc
 512         else :
 513             if exc in exceptions :
 514                 txt = exceptions[exc]
 515             else :
 516                 txt = exc
 517         title = "Information"
 518     else :
 519         txt = u'            !== BUG ==!       \n'
 520         txt += u'*************************************\n'
 521         txt += '\n'.join(excTb).replace('    ', ' ')
 522         txt += excName + '\n'
 523         txt += `exc`
 524         title = "Bug"
 525
 526     dial = BugDialog(parent, **{'title' : title})
 527     if 'Rerror' in dir(parent) :
 528         txt += parent.Rerror
 529         parent.Rerror = ''
 530     log.info(txt)
 531     dial.text_ctrl_1.write(txt)
 532     dial.CenterOnParent()
 533     dial.ShowModal()
 534     dial.Destroy()
 535
 536 def PlaySound(parent):
 537     if parent.pref.getboolean('iramuteq', 'sound') :
 538         try:
 539             if "gtk2" in wx.PlatformInfo:
 540                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 541             else :
 542                 sound = wx.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 543                 sound.Play(wx.SOUND_SYNC)
 544         except :
 545             print 'pas de son'
 546
 547 def ReadDicoAsDico(dicopath):
 548     with codecs.open(dicopath, 'r', 'UTF8') as f:
 549         content = f.readlines()
 550     lines = [line.rstrip('\n\r').replace(u'\n', '').replace('"', '').split('\t') for line in content if line != u'']
 551     return dict([[line[0], line[1:]] for line in lines])
 552
 553 def ReadLexique(parent, lang = 'french', filein = None):
 554     if lang != 'other' :
 555         if filein is None :
 556             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 557         else :
 558             parent.lexique = ReadDicoAsDico(filein)
 559     else :
 560         if filein is None :
 561             parent.lexique = {}
 562         else :
 563             parent.lexique = ReadDicoAsDico(filein)
 564
 565 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 566     #file = open(filein)
 567     with codecs.open(filein, 'r', encoding) as f :
 568         content = f.read()
 569     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 570     #file = codecs.open(filein, 'r', encoding)
 571     #content = file.readlines()
 572     #file.close()
 573     first = content.pop(0)
 574     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 575     dict = {}
 576     i = 0
 577     for line in content:
 578         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 579         #line = line.split(';')
 580         nline = [line[0]]
 581         for val in line[1:]:
 582             if val == u'NA' :
 583                 don = ''
 584             else:
 585                 try:
 586                     don = int(val)
 587                 except:
 588                     don = float('%.5f' % float(val))
 589             nline.append(don)
 590         dict[i] = nline
 591         i += 1
 592     return dict, first
 593
 594 def exec_RCMD(rpath, command) :
 595     log.info('R CMD INSTALL %s' % command)
 596     rpath = rpath.replace('\\','\\\\')
 597     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 598     return error
 599
 600 def exec_rcode(rpath, rcode, wait = True, graph = False):
 601     log.info("R Script : %s" % rcode)
 602     needX11 = False
 603     if sys.platform == 'darwin' :
 604         try :
 605             macversion = platform.mac_ver()[0].split('.')
 606             if int(macversion[1]) < 5 :
 607                 needX11 = True
 608             else :
 609                 needX11 = False
 610         except :
 611             needX11 = False
 612
 613     rpath = rpath.replace('\\','\\\\')
 614     env = os.environ.copy()
 615     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 616         env['LC_ALL'] = 'en_US.UTF-8'
 617     if not graph :
 618         if wait :
 619             if sys.platform == 'win32':
 620                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 621             else :
 622                 error = call([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], env = env)
 623             return error
 624         else :
 625             if sys.platform == 'win32':
 626                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 627             else :
 628                 pid = Popen([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 629             return pid
 630     else :
 631         if wait :
 632             if sys.platform == 'win32':
 633                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 634             elif sys.platform == 'darwin' and needX11:
 635                 os.environ['DISPLAY'] = ':0.0'
 636                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 637             else :
 638                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 639             return error
 640         else :
 641             if sys.platform == 'win32':
 642                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 643             elif sys.platform == 'darwin' and needX11:
 644                 os.environ['DISPLAY'] = ':0.0'
 645                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 646             else :
 647                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 648             return pid
 649
 650 def check_Rresult(parent, pid) :
 651     if isinstance(pid, Popen) :
 652         if pid.returncode != 0 :
 653             error = pid.communicate()
 654             error = [str(error[0]), error[1]]
 655             if error[1] is None :
 656                 error[1] = 'None'
 657             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 658             try :
 659                 raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
 660             except :
 661                 BugReport(parent)
 662             return False
 663         else :
 664             return True
 665     else :
 666         if pid != 0 :
 667             try :
 668                 raise Exception(u'Erreur R')
 669             except :
 670                 BugReport(parent)
 671             return False
 672         else :
 673             return True
 674
 675 def print_liste(filename,liste):
 676     with open(filename,'w') as f :
 677         for graph in liste :
 678             f.write(';'.join(graph)+'\n')
 679
 680 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 681     with codecs.open(filename,'rU', encoding) as f :
 682         content=f.readlines()
 683         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 684     return ncontent
 685
 686
 687
 688
 689 def progressbar(self, maxi) :
 690     if 'parent' in dir(self) :
 691         parent = self.parent
 692     else :
 693         parent = self
 694     try :
 695         print '###horrible hack progressbar'
 696         maxi = int(maxi)
 697     except :
 698         maxi = 1
 699     return wx.ProgressDialog("Traitements",
 700                              "Veuillez patienter...",
 701                              maximum=maxi,
 702                              parent=parent,
 703                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 704                              )
 705
 706
 707 def treat_var_mod(variables) :
 708     var_mod = {}
 709     variables = list(set(variables))
 710     varmod = [variable.split('_') for variable in variables]
 711     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 712     for var in vars :
 713         mods = ['_'.join(v) for v in varmod if v[0] == var]
 714         var_mod[var] = mods
 715
 716 #     for variable in variables :
 717 #         if u'_' in variable :
 718 #             forme = variable.split(u'_')
 719 #             var = forme[0]
 720 #             mod = forme[1]
 721 #             if not var in var_mod :
 722 #                 var_mod[var] = [variable]
 723 #             else :
 724 #                 if not mod in var_mod[var] :
 725 #                     var_mod[var].append(variable)
 726     return var_mod
 727
 728 def doconcorde(corpus, uces, mots, uci = False) :
 729     if not uci :
 730         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 731     else :
 732         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 733     ucestxt1 = dict(ucestxt1)
 734     ucestxt = []
 735     ucis_txt = []
 736     listmot = [corpus.getlems()[lem].formes for lem in mots]
 737     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 738     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 739     dmots = dict(zip(listmot, mothtml))
 740     for uce in uces :
 741         ucetxt = ucestxt1[uce].split()
 742         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 743         if not uci :
 744             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '</b></p>')
 745         else :
 746             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 747         ucestxt.append(ucetxt)
 748     return ucis_txt, ucestxt
 749
 750
 751 def getallstcarac(corpus, analyse) :
 752    pathout = PathOut(analyse['ira'])
 753    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 754    print profils