iramuteq.org Git - iramuteq/blob - functions.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2008-2012 Pierre Ratinaud
   5 #License: GNU/GPL
   6
   7 import wx
   8 import re
   9 from ConfigParser import ConfigParser
  10 from subprocess import Popen, call, PIPE
  11 import thread
  12 import os
  13 import ast
  14 import sys
  15 import csv
  16 import platform
  17 import traceback
  18 import codecs
  19 import locale
  20 import datetime
  21 from copy import copy
  22 from shutil import copyfile
  23 import shelve
  24 #from dialog import BugDialog
  25 import logging
  26
  27 log = logging.getLogger('iramuteq')
  28
  29
  30 indices_simi = [u'cooccurrence' ,'pourcentage de cooccurrence',u'Russel',u'Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  31
  32
  33 class TGen :
  34     def __init__(self, path = None, encoding = 'utf8'):
  35         self.path = path
  36         self.tgen = {}
  37         self.encoding = encoding
  38
  39     def __getitem__(self, key):
  40         return self.tgen[key]
  41
  42     def read(self, path):
  43         with codecs.open(path, 'r', self.encoding) as f :
  44             tgen = f.read()
  45         tgen = [line.split('\t') for line in tgen.splitlines()]
  46         tgen = dict([[line[0], line[1:]] for line in tgen])
  47         self.tgen = tgen
  48         self.path = path
  49
  50     def write(self, path = None):
  51         if path is None :
  52             path = self.path
  53         with open(path, 'w') as f :
  54             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]).encode(self.encoding))
  55
  56     def writetable(self, pathout, tgens, totocc):
  57         etoiles = totocc.keys()
  58         etoiles.sort()
  59         with open(pathout, 'w') as f :
  60             line = '\t'.join([u'tgens'] + etoiles) + '\n'
  61             f.write(line.encode(self.encoding))
  62             for t in tgens :
  63                 line = '\t'.join([t] + [`tgens[t][et]` for et in etoiles]) + '\n'
  64                 f.write(line.encode(self.encoding))
  65             i = 0
  66             totname = 'total'
  67             while totname + `i` in tgens :
  68                 i += 1
  69             totname = totname + `i`
  70             line = '\t'.join([totname] + [`totocc[et]` for et in etoiles]) + '\n'
  71             f.write(line.encode(self.encoding))
  72
  73 class History :
  74     def __init__(self, filein, syscoding = 'utf8') :
  75         self.filein = filein
  76         self.syscoding = syscoding
  77         self.corpus = {}
  78         self.openedcorpus = {}
  79         self.openedmatrix = {}
  80         self.orph = []
  81         self.analyses = {}
  82         self.history = []
  83         self.opened = {}
  84         self.read()
  85
  86     def read(self) :
  87         d = shelve.open(self.filein)
  88         self.history = d.get('history', [])
  89         self.matrix = d.get('matrix', [])
  90         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
  91         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
  92         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
  93         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
  94         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
  95         d.close()
  96
  97     def write(self) :
  98         d = shelve.open(self.filein)
  99         d['history'] = self.history
 100         d['matrix'] = self.matrix
 101         d.close()
 102
 103     def add(self, analyse) :
 104         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 105         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 106         if tosave['uuid'] in self.corpus :
 107             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 108             return
 109         if analyse.get('corpus', False) :
 110             if analyse['uuid'] in self.analyses :
 111                 return
 112             tosave['corpus'] = analyse['corpus']
 113             tosave['name'] = analyse['name']
 114             acorpus_uuid =  analyse['corpus']
 115             if acorpus_uuid in self.corpus :
 116                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 117                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 118                 else :
 119                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 120             else :
 121                 self.orph.append(tosave)
 122         else :
 123             tosave['corpus_name'] = analyse['corpus_name']
 124             #self.ordercorpus[tosave['uuid']] = len(history)
 125             #self.corpus[tosave['uuid']] = analyse
 126             self.history.append(tosave)
 127         self.write()
 128         self.read()
 129
 130     def addMatrix(self, analyse) :
 131         tosave = analyse
 132         #tosave['matrix_name'] = analyse['matrix_name']
 133         tosave['analyses'] = []
 134         self.matrix.append(tosave)
 135         self.write()
 136         self.read()
 137
 138     def addMatrixAnalyse(self, analyse) :
 139         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 140         tosave['name'] = analyse['name']
 141         if tosave['matrix'] in self.ordermatrix :
 142             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 143         self.write()
 144         self.read()
 145
 146     def addmultiple(self, analyses) :
 147         log.info('add multiple')
 148         for analyse in analyses :
 149             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 150             corpus = analyse['corpus']
 151             tosave['corpus'] = corpus
 152             tosave['name'] = analyse['name']
 153             if corpus in self.corpus :
 154                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 155                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 156                 else :
 157                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 158         self.write()
 159         self.read()
 160
 161     def delete(self, analyse, corpus = False) :
 162         log.info('delete %s' % analyse.get('name', 'noname'))
 163         if corpus :
 164             self.history.pop(self.ordercorpus[analyse['uuid']])
 165             if analyse['uuid'] in self.openedcorpus :
 166                 del self.openedcorpus[analyse['uuid']]
 167             log.info('delete corpus : %s' % analyse['uuid'])
 168         elif analyse['uuid'] in self.analyses :
 169             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 170             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 171         elif analyse['uuid'] in self.matrixanalyse :
 172             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 173         self.write()
 174         self.read()
 175
 176     def addtab(self, analyse) :
 177         self.opened[analyse['uuid']] = analyse
 178
 179     def rmtab(self, analyse) :
 180         del self.opened[analyse['uuid']]
 181
 182     def clean(self) :
 183         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 184         print corpustodel
 185         for corpus in corpustodel :
 186             print 'cleaning :', corpus['corpus_name']
 187             self.delete(corpus, corpus = True)
 188         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 189         for analyse in anatodel :
 190             print 'cleaning :', analyse['name']
 191             self.delete(analyse)
 192
 193     def __str__(self) :
 194         return str(self.history)
 195
 196 class DoConf :
 197     def __init__(self, configfile=None, diff = None, parametres = None) :
 198         self.configfile = configfile
 199         self.conf = ConfigParser()
 200         if configfile is not None :
 201             self.conf.readfp(codecs.open(configfile, 'r', 'utf8'))
 202         self.parametres = {}
 203         if parametres is not None :
 204             self.doparametres(parametres)
 205
 206     def doparametres(self, parametres) :
 207         return parametres
 208
 209     def getsections(self) :
 210         return self.conf.sections()
 211
 212     def getoptions(self, section = None, diff = None):
 213         parametres = {}
 214         if section is None :
 215             section = self.conf.sections()[0]
 216         for option in self.conf.options(section) :
 217             if self.conf.get(section, option).isdigit() :
 218                 parametres[option] = int(self.conf.get(section, option))
 219             elif self.conf.get(section, option) == 'False' :
 220                 parametres[option] = False
 221             elif self.conf.get(section, option) == 'True' :
 222                 parametres[option] = True
 223             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 224                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 225             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 226                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 227             else :
 228                 parametres[option] = self.conf.get(section, option)
 229         if 'type' not in parametres :
 230             parametres['type'] = section
 231         return parametres
 232
 233     def makeoptions(self, sections, parametres, outfile = None) :
 234         txt = ''
 235         for i, section in enumerate(sections) :
 236             txt += '[%s]\n' % section
 237             if not self.conf.has_section(section) :
 238                 self.conf.add_section(section)
 239             for option in parametres[i] :
 240                 if isinstance(parametres[i][option], int) :
 241                     self.conf.set(section, option, `parametres[i][option]`)
 242                     txt += '%s = %i\n' % (option, parametres[i][option])
 243                 elif isinstance(parametres[i][option], basestring) :
 244                     self.conf.set(section, option, parametres[i][option].encode('utf8'))
 245                     txt += '%s = %s\n' % (option, parametres[i][option])
 246                 elif isinstance(parametres[i][option], wx.Colour) :
 247                     self.conf.set(section, option, str(parametres[i][option]))
 248                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 249                 elif option == 'analyses' :
 250                     pass
 251                 else :
 252                     self.conf.set(section, option, `parametres[i][option]`)
 253                     txt += '%s = %s\n' % (option, `parametres[i][option]`)
 254         if outfile is None :
 255             outfile = self.configfile
 256         with codecs.open(outfile, 'w', 'utf8') as f :
 257             f.write(txt)
 258             #self.conf.write(f)
 259
 260     def totext(self, parametres) :
 261         #txt = ['Corpus']
 262         txt = []
 263         for val in parametres :
 264             if isinstance(parametres[val], int) :
 265                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 266             elif isinstance(parametres[val], basestring) :
 267                 txt.append(' \t\t: '.join([val, parametres[val]]))
 268             elif val in ['listet', 'stars'] :
 269                 pass
 270             else :
 271                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 272         return '\n'.join(txt)
 273
 274
 275 def write_tab(tab, fileout) :
 276         writer = csv.writer(open(fileout, 'wb'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 277         writer.writerows(tab)
 278
 279 class BugDialog(wx.Dialog):
 280     def __init__(self, *args, **kwds):
 281         # begin wxGlade: MyDialog.__init__
 282         kwds["style"] = wx.DEFAULT_DIALOG_STYLE
 283         kwds["size"] = wx.Size(500, 200)
 284         wx.Dialog.__init__(self, *args, **kwds)
 285         self.SetTitle(kwds['title'])
 286         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 287         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 288         self.button_1 = wx.Button(self, wx.ID_OK, "")
 289
 290         self.__set_properties()
 291         self.__do_layout()
 292         # end wxGlade
 293
 294     def __set_properties(self):
 295         # begin wxGlade: MyDialog.__set_properties
 296         self.SetMinSize(wx.Size(500, 200))
 297         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 298
 299         # end wxGlade
 300
 301     def __do_layout(self):
 302         # begin wxGlade: MyDialog.__do_layout
 303         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 304         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 305         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 306         self.SetSizer(sizer_1)
 307         sizer_1.Fit(self)
 308         self.Layout()
 309
 310
 311 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 312     AnalyseConf = ConfigParser()
 313     AnalyseConf.read(DictPathOut['ira'])
 314     AnalyseConf.add_section(section)
 315     date = datetime.datetime.now().ctime()
 316     AnalyseConf.set(section, 'date', str(date))
 317     AnalyseConf.set(section, 'clusternb', clusternb)
 318     AnalyseConf.set(section, 'corpus_name', corpname)
 319
 320     fileout = open(DictPathOut['ira'], 'w')
 321     AnalyseConf.write(fileout)
 322     fileout.close()
 323
 324 def sortedby(list, direct, *indices):
 325
 326     """
 327         sortedby: sort a list of lists (e.g. a table) by one or more indices
 328                   (columns of the table) and return the sorted list
 329
 330         e.g.
 331          for list = [[2,3],[1,2],[3,1]]:
 332          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 333          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 334     """
 335
 336     nlist = map(lambda x, indices=indices:
 337                  map(lambda i, x=x: x[i], indices) + [x],
 338                  list)
 339     if direct == 1:
 340         nlist.sort()
 341     elif direct == 2:
 342         nlist.sort(reverse=True)
 343     return map(lambda l: l[-1], nlist)
 344
 345 def add_type(line, dictlem):
 346     if line[4] in dictlem:
 347         line.append(dictlem[line[4]])
 348     else :
 349         line.append('')
 350     return line
 351
 352 def treat_line_alceste(i, line) :
 353     if line[0] == '*' or line[0] == '*****' :
 354         return line + ['']
 355     if line[5] == 'NA':
 356         print 'NA', line[5]
 357         pass
 358     elif float(line[5].replace(',', '.')) < 0.0001:
 359         line[5] = '< 0,0001'
 360     elif float(line[5].replace(',', '.')) > 0.05:
 361         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 362     else:
 363         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 364     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 365
 366 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 367     #print 'lecture des profils : ReadProfileAsDico'
 368     #if Alceste :
 369     #    print 'lecture du dictionnaire de type'
 370     #    dictlem = {}
 371     #    for line in parent.corpus.lem_type_list :
 372     #        dictlem[line[0]] = line[1]
 373     dictlem = {}
 374     print 'lecture des profiles'
 375     #encoding = sys.getdefaultencoding()
 376     FileReader = codecs.open(File, 'r', encoding)
 377     Filecontent = FileReader.readlines()
 378     FileReader.close()
 379     DictProfile = {}
 380     count = 0
 381     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 382     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 383     rows.pop(0)
 384     ClusterNb = rows[0][2]
 385     rows.pop(0)
 386     clusters = [row[2] for row in rows if row[0] == u'**']
 387     valclusters = [row[1:4] for row in rows if row[0] == u'****']
 388     lp = [i for i, line in enumerate(rows) if line[0] == u'****']
 389     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 390     if Alceste :
 391         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 392         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 393     else :
 394         prof = [[line + [''] for line in pr] for pr in prof]
 395         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 396     for i, cluster in enumerate(clusters):
 397         DictProfile[cluster] = [valclusters[i]] + prof[i]
 398     return DictProfile
 399
 400 def GetTxtProfile(dictprofile, cluster_size) :
 401     proflist = []
 402     for classe in range(0, len(dictprofile)) :
 403         prof = dictprofile[str(classe + 1)]
 404         clinfo = cluster_size[classe]
 405         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 406     return '\n\n'.join(proflist)
 407
 408 def formatExceptionInfo(maxTBlevel=5):
 409          cla, exc, trbk = sys.exc_info()
 410          try :
 411             excName = cla.__name__
 412          except :
 413             excName = 'None'
 414          try:
 415              excArgs = exc.args[0]
 416          except :
 417              excArgs = "<no args>"
 418          excTb = traceback.format_tb(trbk, maxTBlevel)
 419          return (excName, excArgs, excTb)
 420
 421
 422 #fonction des etudiants de l'iut
 423 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 424     """
 425         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 426         Si on trouve un '$', c'est fini.
 427         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 428     """
 429     separateurs = [[u'.', 60.0], [u'?', 60.0], [u'!', 60.0], [u'£$£', 60], [u':', 50.0], [u';', 40.0], [u',', 10.0], [u' ', 0.1]]
 430     trouve = False                 # si on a trouvé un bon séparateur
 431     iDecoupe = 0                # indice du caractere ou il faut decouper
 432
 433     # on découpe la chaine pour avoir au maximum 240 caractères
 434     longueur = min(longueur, len(chaine) - 1)
 435     chaineTravail = chaine[:longueur + 1]
 436     nbCar = longueur
 437     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 438
 439     # on vérifie si on ne trouve pas un '$'
 440     indice = chaineTravail.find(u'$')
 441     if indice > -1:
 442         trouve = True
 443         iDecoupe = indice
 444
 445     # si on ne trouve rien, on cherche le meilleur séparateur
 446     if not trouve:
 447         while nbCar >= 0:
 448             caractere = chaineTravail[nbCar]
 449             distance = abs(longueurOptimale - nbCar) + 1
 450             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 451
 452             # on vérifie si le caractére courant est une marque de ponctuation
 453             for s in separateurs:
 454                 if caractere == s[0]:
 455                     # si c'est une ponctuation
 456
 457                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 458                         # print nbCar, s[0]
 459                         meilleur[0] = s[0]
 460                         meilleur[1] = s[1]
 461                         meilleur[2] = nbCar
 462                         trouve = True
 463                         iDecoupe = nbCar
 464
 465                     # et on termine la recherche
 466                     break
 467
 468             # on passe au caractère précédant
 469             nbCar = nbCar - 1
 470
 471     # si on a trouvé
 472     if trouve:
 473         fin = chaine[iDecoupe + 1:]
 474         retour = chaineTravail[:iDecoupe]
 475         return len(retour) > 0, retour.split(), fin
 476     # si on a rien trouvé
 477     return False, chaine.split(), ''
 478
 479
 480 exceptions = {'paragrapheOT' : u"Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 481               'EmptyText' : u"Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 482               'CorpusEncoding' : u"Problème d'encodage.",
 483               'TextBeforeTextMark' : u"Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 484 }
 485
 486 def BugReport(parent, error = None):
 487     for ch in parent.GetChildren():
 488         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 489             ch.Destroy()
 490     excName, exc, excTb = formatExceptionInfo()
 491     if excName == 'Exception' :
 492         print exc
 493         if len(exc.split()) == 2 :
 494             mss, linenb = exc.split()
 495             if mss in exceptions :
 496                 txt = exceptions[mss] + linenb
 497             else :
 498                 txt = exc
 499         else :
 500             if exc in exceptions :
 501                 txt = exceptions[exc]
 502             else :
 503                 txt = exc
 504         title = "Information"
 505     else :
 506         txt = u'            !== BUG ==!       \n'
 507         txt += u'*************************************\n'
 508         txt += '\n'.join(excTb).replace('    ', ' ')
 509         txt += excName + '\n'
 510         txt += exc
 511         title = "Bug"
 512
 513     dial = BugDialog(parent, **{'title' : title})
 514     if 'Rerror' in dir(parent) :
 515         txt += parent.Rerror
 516         parent.Rerror = ''
 517     log.info(txt)
 518     dial.text_ctrl_1.write(txt)
 519     dial.CenterOnParent()
 520     dial.ShowModal()
 521     dial.Destroy()
 522
 523 def PlaySound(parent):
 524     if parent.pref.getboolean('iramuteq', 'sound') :
 525         try:
 526             if "gtk2" in wx.PlatformInfo:
 527                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 528             else :
 529                 sound = wx.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 530                 sound.Play(wx.SOUND_SYNC)
 531         except :
 532             print 'pas de son'
 533
 534 def ReadDicoAsDico(dicopath):
 535     with codecs.open(dicopath, 'r', 'UTF8') as f:
 536         content = f.readlines()
 537     lines = [line.rstrip('\n\r').replace(u'\n', '').replace('"', '').split('\t') for line in content if line != u'']
 538     return dict([[line[0], line[1:]] for line in lines])
 539
 540 def ReadLexique(parent, lang = 'french', filein = None):
 541     if lang != 'other' :
 542         if filein is None :
 543             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 544         else :
 545             parent.lexique = ReadDicoAsDico(filein)
 546     else :
 547         parent.lexique = {}
 548
 549 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 550     #file = open(filein)
 551     with codecs.open(filein, 'r', encoding) as f :
 552         content = f.read()
 553     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 554     #file = codecs.open(filein, 'r', encoding)
 555     #content = file.readlines()
 556     #file.close()
 557     first = content.pop(0)
 558     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 559     dict = {}
 560     i = 0
 561     for line in content:
 562         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 563         #line = line.split(';')
 564         nline = [line[0]]
 565         for val in line[1:]:
 566             if val == u'NA' :
 567                 don = ''
 568             else:
 569                 try:
 570                     don = int(val)
 571                 except:
 572                     don = float('%.5f' % float(val))
 573             nline.append(don)
 574         dict[i] = nline
 575         i += 1
 576     return dict, first
 577
 578 def exec_RCMD(rpath, command) :
 579     log.info('R CMD INSTALL %s' % command)
 580     rpath = rpath.replace('\\','\\\\')
 581     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 582     return error
 583
 584 def exec_rcode(rpath, rcode, wait = True, graph = False):
 585     log.info("R Script : %s" % rcode)
 586     needX11 = False
 587     if sys.platform == 'darwin' :
 588         try :
 589             macversion = platform.mac_ver()[0].split('.')
 590             if int(macversion[1]) < 5 :
 591                 needX11 = True
 592             else :
 593                 needX11 = False
 594         except :
 595             needX11 = False
 596
 597     rpath = rpath.replace('\\','\\\\')
 598     env = os.environ.copy()
 599     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 600         env['LC_ALL'] = 'en_US.UTF-8'
 601     if not graph :
 602         if wait :
 603             if sys.platform == 'win32':
 604                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 605             else :
 606                 error = call([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], env = env)
 607             return error
 608         else :
 609             if sys.platform == 'win32':
 610                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 611             else :
 612                 pid = Popen([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 613             return pid
 614     else :
 615         if wait :
 616             if sys.platform == 'win32':
 617                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 618             elif sys.platform == 'darwin' and needX11:
 619                 os.environ['DISPLAY'] = ':0.0'
 620                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 621             else :
 622                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 623             return error
 624         else :
 625             if sys.platform == 'win32':
 626                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 627             elif sys.platform == 'darwin' and needX11:
 628                 os.environ['DISPLAY'] = ':0.0'
 629                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 630             else :
 631                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 632             return pid
 633
 634 def check_Rresult(parent, pid) :
 635     if isinstance(pid, Popen) :
 636         if pid.returncode != 0 :
 637             error = pid.communicate()
 638             error = [str(error[0]), error[1]]
 639             if error[1] is None :
 640                 error[1] = 'None'
 641             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 642             try :
 643                 raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
 644             except :
 645                 BugReport(parent)
 646             return False
 647         else :
 648             return True
 649     else :
 650         if pid != 0 :
 651             try :
 652                 raise Exception(u'Erreur R')
 653             except :
 654                 BugReport(parent)
 655             return False
 656         else :
 657             return True
 658
 659 def print_liste(filename,liste):
 660     with open(filename,'w') as f :
 661         for graph in liste :
 662             f.write(';'.join(graph)+'\n')
 663
 664 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 665     with codecs.open(filename,'rU', encoding) as f :
 666         content=f.readlines()
 667         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 668     return ncontent
 669
 670
 671
 672
 673 def progressbar(self, maxi) :
 674     if 'parent' in dir(self) :
 675         parent = self.parent
 676     else :
 677         parent = self
 678     try :
 679         print '###horrible hack progressbar'
 680         maxi = int(maxi)
 681     except :
 682         maxi = 1
 683     return wx.ProgressDialog("Traitements",
 684                              "Veuillez patienter...",
 685                              maximum=maxi,
 686                              parent=parent,
 687                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 688                              )
 689
 690
 691 def treat_var_mod(variables) :
 692     var_mod = {}
 693     variables = list(set(variables))
 694     varmod = [variable.split('_') for variable in variables]
 695     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 696     for var in vars :
 697         mods = ['_'.join(v) for v in varmod if v[0] == var]
 698         var_mod[var] = mods
 699
 700 #     for variable in variables :
 701 #         if u'_' in variable :
 702 #             forme = variable.split(u'_')
 703 #             var = forme[0]
 704 #             mod = forme[1]
 705 #             if not var in var_mod :
 706 #                 var_mod[var] = [variable]
 707 #             else :
 708 #                 if not mod in var_mod[var] :
 709 #                     var_mod[var].append(variable)
 710     return var_mod
 711
 712 def doconcorde(corpus, uces, mots, uci = False) :
 713     if not uci :
 714         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 715     else :
 716         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 717     ucestxt1 = dict(ucestxt1)
 718     ucestxt = []
 719     ucis_txt = []
 720     listmot = [corpus.getlems()[lem].formes for lem in mots]
 721     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 722     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 723     dmots = dict(zip(listmot, mothtml))
 724     for uce in uces :
 725         ucetxt = ucestxt1[uce].split()
 726         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 727         if not uci :
 728             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '</b></p>')
 729         else :
 730             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 731         ucestxt.append(ucetxt)
 732     return ucis_txt, ucestxt
 733
 734
 735 def getallstcarac(corpus, analyse) :
 736    pathout = PathOut(analyse['ira'])
 737    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 738    print profils