iramuteq.org Git - iramuteq/blob - functions.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2008-2012 Pierre Ratinaud
   5 #License: GNU/GPL
   6
   7 import wx
   8 import re
   9 from ConfigParser import ConfigParser
  10 from subprocess import Popen, call, PIPE
  11 import thread
  12 import os
  13 import ast
  14 import sys
  15 import csv
  16 import platform
  17 import traceback
  18 import codecs
  19 import locale
  20 import datetime
  21 from copy import copy
  22 from shutil import copyfile
  23 import shelve
  24 #from dialog import BugDialog
  25 import logging
  26
  27 log = logging.getLogger('iramuteq')
  28
  29
  30 indices_simi = [u'cooccurrence' ,'pourcentage de cooccurrence',u'Russel',u'Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  31
  32
  33 class TGen :
  34     def __init__(self, path = None, encoding = 'utf8'):
  35         self.path = path
  36         self.tgen = {}
  37         self.encoding = encoding
  38
  39     def __getitem__(self, key):
  40         return self.tgen[key]
  41
  42     def read(self, path):
  43         with codecs.open(path, 'r', self.encoding) as f :
  44             tgen = f.read()
  45         tgen = [line.split('\t') for line in tgen.splitlines()]
  46         tgen = dict([[line[0], line[1:]] for line in tgen])
  47         self.tgen = tgen
  48         self.path = path
  49
  50     def write(self, path = None):
  51         if path is None :
  52             path = self.path
  53         with open(path, 'w') as f :
  54             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]).encode(self.encoding))
  55
  56     def writetable(self, pathout, tgens, totocc):
  57         etoiles = totocc.keys()
  58         with open(pathout, 'w') as f :
  59             line = '\t'.join([u'tgens'] + etoiles) + '\n'
  60             f.write(line.encode(self.encoding))
  61             for t in tgens :
  62                 line = '\t'.join([t] + [`tgens[t][et]` for et in etoiles]) + '\n'
  63                 f.write(line.encode(self.encoding))
  64             i = 0
  65             totname = 'total'
  66             while totname + `i` in tgens :
  67                 i += 1
  68             totname = totname + `i`
  69             line = '\t'.join([totname] + [`totocc[et]` for et in etoiles])
  70             f.write(line.encode(self.encoding))
  71
  72 class History :
  73     def __init__(self, filein, syscoding = 'utf8') :
  74         self.filein = filein
  75         self.syscoding = syscoding
  76         self.corpus = {}
  77         self.openedcorpus = {}
  78         self.openedmatrix = {}
  79         self.orph = []
  80         self.analyses = {}
  81         self.history = []
  82         self.opened = {}
  83         self.read()
  84
  85     def read(self) :
  86         d = shelve.open(self.filein)
  87         self.history = d.get('history', [])
  88         self.matrix = d.get('matrix', [])
  89         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
  90         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
  91         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
  92         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
  93         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
  94         d.close()
  95
  96     def write(self) :
  97         d = shelve.open(self.filein)
  98         d['history'] = self.history
  99         d['matrix'] = self.matrix
 100         d.close()
 101
 102     def add(self, analyse) :
 103         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 104         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 105         if tosave['uuid'] in self.corpus :
 106             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 107             return
 108         if analyse.get('corpus', False) :
 109             if analyse['uuid'] in self.analyses :
 110                 return
 111             tosave['corpus'] = analyse['corpus']
 112             tosave['name'] = analyse['name']
 113             acorpus_uuid =  analyse['corpus']
 114             if acorpus_uuid in self.corpus :
 115                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 116                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 117                 else :
 118                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 119             else :
 120                 self.orph.append(tosave)
 121         else :
 122             tosave['corpus_name'] = analyse['corpus_name']
 123             #self.ordercorpus[tosave['uuid']] = len(history)
 124             #self.corpus[tosave['uuid']] = analyse
 125             self.history.append(tosave)
 126         self.write()
 127         self.read()
 128
 129     def addMatrix(self, analyse) :
 130         tosave = analyse
 131         #tosave['matrix_name'] = analyse['matrix_name']
 132         tosave['analyses'] = []
 133         self.matrix.append(tosave)
 134         self.write()
 135         self.read()
 136
 137     def addMatrixAnalyse(self, analyse) :
 138         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 139         tosave['name'] = analyse['name']
 140         if tosave['matrix'] in self.ordermatrix :
 141             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 142         self.write()
 143         self.read()
 144
 145     def addmultiple(self, analyses) :
 146         log.info('add multiple')
 147         for analyse in analyses :
 148             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 149             corpus = analyse['corpus']
 150             tosave['corpus'] = corpus
 151             tosave['name'] = analyse['name']
 152             if corpus in self.corpus :
 153                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 154                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 155                 else :
 156                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 157         self.write()
 158         self.read()
 159
 160     def delete(self, analyse, corpus = False) :
 161         log.info('delete %s' % analyse.get('name', 'noname'))
 162         if corpus :
 163             self.history.pop(self.ordercorpus[analyse['uuid']])
 164             if analyse['uuid'] in self.openedcorpus :
 165                 del self.openedcorpus[analyse['uuid']]
 166             log.info('delete corpus : %s' % analyse['uuid'])
 167         elif analyse['uuid'] in self.analyses :
 168             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 169             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 170         elif analyse['uuid'] in self.matrixanalyse :
 171             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 172         self.write()
 173         self.read()
 174
 175     def addtab(self, analyse) :
 176         self.opened[analyse['uuid']] = analyse
 177
 178     def rmtab(self, analyse) :
 179         del self.opened[analyse['uuid']]
 180
 181     def clean(self) :
 182         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 183         print corpustodel
 184         for corpus in corpustodel :
 185             print 'cleaning :', corpus['corpus_name']
 186             self.delete(corpus, corpus = True)
 187         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 188         for analyse in anatodel :
 189             print 'cleaning :', analyse['name']
 190             self.delete(analyse)
 191
 192     def __str__(self) :
 193         return str(self.history)
 194
 195 class DoConf :
 196     def __init__(self, configfile=None, diff = None, parametres = None) :
 197         self.configfile = configfile
 198         self.conf = ConfigParser()
 199         if configfile is not None :
 200             self.conf.readfp(codecs.open(configfile, 'r', 'utf8'))
 201         self.parametres = {}
 202         if parametres is not None :
 203             self.doparametres(parametres)
 204
 205     def doparametres(self, parametres) :
 206         return parametres
 207
 208     def getsections(self) :
 209         return self.conf.sections()
 210
 211     def getoptions(self, section = None, diff = None):
 212         parametres = {}
 213         if section is None :
 214             section = self.conf.sections()[0]
 215         for option in self.conf.options(section) :
 216             if self.conf.get(section, option).isdigit() :
 217                 parametres[option] = int(self.conf.get(section, option))
 218             elif self.conf.get(section, option) == 'False' :
 219                 parametres[option] = False
 220             elif self.conf.get(section, option) == 'True' :
 221                 parametres[option] = True
 222             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 223                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 224             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 225                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 226             else :
 227                 parametres[option] = self.conf.get(section, option)
 228         if 'type' not in parametres :
 229             parametres['type'] = section
 230         return parametres
 231
 232     def makeoptions(self, sections, parametres, outfile = None) :
 233         txt = ''
 234         for i, section in enumerate(sections) :
 235             txt += '[%s]\n' % section
 236             if not self.conf.has_section(section) :
 237                 self.conf.add_section(section)
 238             for option in parametres[i] :
 239                 if isinstance(parametres[i][option], int) :
 240                     self.conf.set(section, option, `parametres[i][option]`)
 241                     txt += '%s = %i\n' % (option, parametres[i][option])
 242                 elif isinstance(parametres[i][option], basestring) :
 243                     self.conf.set(section, option, parametres[i][option].encode('utf8'))
 244                     txt += '%s = %s\n' % (option, parametres[i][option])
 245                 elif isinstance(parametres[i][option], wx.Colour) :
 246                     self.conf.set(section, option, str(parametres[i][option]))
 247                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 248                 elif option == 'analyses' :
 249                     pass
 250                 else :
 251                     self.conf.set(section, option, `parametres[i][option]`)
 252                     txt += '%s = %s\n' % (option, `parametres[i][option]`)
 253         if outfile is None :
 254             outfile = self.configfile
 255         with codecs.open(outfile, 'w', 'utf8') as f :
 256             f.write(txt)
 257             #self.conf.write(f)
 258
 259     def totext(self, parametres) :
 260         #txt = ['Corpus']
 261         txt = []
 262         for val in parametres :
 263             if isinstance(parametres[val], int) :
 264                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 265             elif isinstance(parametres[val], basestring) :
 266                 txt.append(' \t\t: '.join([val, parametres[val]]))
 267             elif val in ['listet', 'stars'] :
 268                 pass
 269             else :
 270                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 271         return '\n'.join(txt)
 272
 273
 274 def write_tab(tab, fileout) :
 275         writer = csv.writer(open(fileout, 'wb'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 276         writer.writerows(tab)
 277
 278 class BugDialog(wx.Dialog):
 279     def __init__(self, *args, **kwds):
 280         # begin wxGlade: MyDialog.__init__
 281         kwds["style"] = wx.DEFAULT_DIALOG_STYLE
 282         kwds["size"] = wx.Size(500, 200)
 283         wx.Dialog.__init__(self, *args, **kwds)
 284         self.SetTitle(kwds['title'])
 285         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 286         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 287         self.button_1 = wx.Button(self, wx.ID_OK, "")
 288
 289         self.__set_properties()
 290         self.__do_layout()
 291         # end wxGlade
 292
 293     def __set_properties(self):
 294         # begin wxGlade: MyDialog.__set_properties
 295         self.SetMinSize(wx.Size(500, 200))
 296         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 297
 298         # end wxGlade
 299
 300     def __do_layout(self):
 301         # begin wxGlade: MyDialog.__do_layout
 302         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 303         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 304         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 305         self.SetSizer(sizer_1)
 306         sizer_1.Fit(self)
 307         self.Layout()
 308
 309
 310 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 311     AnalyseConf = ConfigParser()
 312     AnalyseConf.read(DictPathOut['ira'])
 313     AnalyseConf.add_section(section)
 314     date = datetime.datetime.now().ctime()
 315     AnalyseConf.set(section, 'date', str(date))
 316     AnalyseConf.set(section, 'clusternb', clusternb)
 317     AnalyseConf.set(section, 'corpus_name', corpname)
 318
 319     fileout = open(DictPathOut['ira'], 'w')
 320     AnalyseConf.write(fileout)
 321     fileout.close()
 322
 323 def sortedby(list, direct, *indices):
 324
 325     """
 326         sortedby: sort a list of lists (e.g. a table) by one or more indices
 327                   (columns of the table) and return the sorted list
 328
 329         e.g.
 330          for list = [[2,3],[1,2],[3,1]]:
 331          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 332          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 333     """
 334
 335     nlist = map(lambda x, indices=indices:
 336                  map(lambda i, x=x: x[i], indices) + [x],
 337                  list)
 338     if direct == 1:
 339         nlist.sort()
 340     elif direct == 2:
 341         nlist.sort(reverse=True)
 342     return map(lambda l: l[-1], nlist)
 343
 344 def add_type(line, dictlem):
 345     if line[4] in dictlem:
 346         line.append(dictlem[line[4]])
 347     else :
 348         line.append('')
 349     return line
 350
 351 def treat_line_alceste(i, line) :
 352     if line[0] == '*' or line[0] == '*****' :
 353         return line + ['']
 354     if line[5] == 'NA':
 355         print 'NA', line[5]
 356         pass
 357     elif float(line[5].replace(',', '.')) < 0.0001:
 358         line[5] = '< 0,0001'
 359     elif float(line[5].replace(',', '.')) > 0.05:
 360         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 361     else:
 362         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 363     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 364
 365 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 366     #print 'lecture des profils : ReadProfileAsDico'
 367     #if Alceste :
 368     #    print 'lecture du dictionnaire de type'
 369     #    dictlem = {}
 370     #    for line in parent.corpus.lem_type_list :
 371     #        dictlem[line[0]] = line[1]
 372     dictlem = {}
 373     print 'lecture des profiles'
 374     #encoding = sys.getdefaultencoding()
 375     FileReader = codecs.open(File, 'r', encoding)
 376     Filecontent = FileReader.readlines()
 377     FileReader.close()
 378     DictProfile = {}
 379     count = 0
 380     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 381     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 382     rows.pop(0)
 383     ClusterNb = rows[0][2]
 384     rows.pop(0)
 385     clusters = [row[2] for row in rows if row[0] == u'**']
 386     valclusters = [row[1:4] for row in rows if row[0] == u'****']
 387     lp = [i for i, line in enumerate(rows) if line[0] == u'****']
 388     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 389     if Alceste :
 390         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 391         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 392     else :
 393         prof = [[line + [''] for line in pr] for pr in prof]
 394         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 395     for i, cluster in enumerate(clusters):
 396         DictProfile[cluster] = [valclusters[i]] + prof[i]
 397     return DictProfile
 398
 399 def GetTxtProfile(dictprofile, cluster_size) :
 400     proflist = []
 401     for classe in range(0, len(dictprofile)) :
 402         prof = dictprofile[str(classe + 1)]
 403         clinfo = cluster_size[classe]
 404         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 405     return '\n\n'.join(proflist)
 406
 407 def formatExceptionInfo(maxTBlevel=5):
 408          cla, exc, trbk = sys.exc_info()
 409          try :
 410             excName = cla.__name__
 411          except :
 412             excName = 'None'
 413          try:
 414              excArgs = exc.args[0]
 415          except :
 416              excArgs = "<no args>"
 417          excTb = traceback.format_tb(trbk, maxTBlevel)
 418          return (excName, excArgs, excTb)
 419
 420
 421 #fonction des etudiants de l'iut
 422 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 423     """
 424         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 425         Si on trouve un '$', c'est fini.
 426         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 427     """
 428     separateurs = [[u'.', 60.0], [u'?', 60.0], [u'!', 60.0], [u'£$£', 60], [u':', 50.0], [u';', 40.0], [u',', 10.0], [u' ', 0.1]]
 429     trouve = False                 # si on a trouvé un bon séparateur
 430     iDecoupe = 0                # indice du caractere ou il faut decouper
 431
 432     # on découpe la chaine pour avoir au maximum 240 caractères
 433     longueur = min(longueur, len(chaine) - 1)
 434     chaineTravail = chaine[:longueur + 1]
 435     nbCar = longueur
 436     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 437
 438     # on vérifie si on ne trouve pas un '$'
 439     indice = chaineTravail.find(u'$')
 440     if indice > -1:
 441         trouve = True
 442         iDecoupe = indice
 443
 444     # si on ne trouve rien, on cherche le meilleur séparateur
 445     if not trouve:
 446         while nbCar >= 0:
 447             caractere = chaineTravail[nbCar]
 448             distance = abs(longueurOptimale - nbCar) + 1
 449             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 450
 451             # on vérifie si le caractére courant est une marque de ponctuation
 452             for s in separateurs:
 453                 if caractere == s[0]:
 454                     # si c'est une ponctuation
 455
 456                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 457                         # print nbCar, s[0]
 458                         meilleur[0] = s[0]
 459                         meilleur[1] = s[1]
 460                         meilleur[2] = nbCar
 461                         trouve = True
 462                         iDecoupe = nbCar
 463
 464                     # et on termine la recherche
 465                     break
 466
 467             # on passe au caractère précédant
 468             nbCar = nbCar - 1
 469
 470     # si on a trouvé
 471     if trouve:
 472         fin = chaine[iDecoupe + 1:]
 473         retour = chaineTravail[:iDecoupe]
 474         return len(retour) > 0, retour.split(), fin
 475     # si on a rien trouvé
 476     return False, chaine.split(), ''
 477
 478
 479 exceptions = {'paragrapheOT' : u"Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 480               'EmptyText' : u"Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 481               'CorpusEncoding' : u"Problème d'encodage.",
 482               'TextBeforeTextMark' : u"Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 483 }
 484
 485 def BugReport(parent, error = None):
 486     for ch in parent.GetChildren():
 487         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 488             ch.Destroy()
 489     excName, exc, excTb = formatExceptionInfo()
 490     if excName == 'Exception' :
 491         print exc
 492         if len(exc.split()) == 2 :
 493             mss, linenb = exc.split()
 494             if mss in exceptions :
 495                 txt = exceptions[mss] + linenb
 496             else :
 497                 txt = exc
 498         else :
 499             if exc in exceptions :
 500                 txt = exceptions[exc]
 501             else :
 502                 txt = exc
 503         title = "Information"
 504     else :
 505         txt = u'            !== BUG ==!       \n'
 506         txt += u'*************************************\n'
 507         txt += '\n'.join(excTb).replace('    ', ' ')
 508         txt += excName + '\n'
 509         txt += exc
 510         title = "Bug"
 511
 512     dial = BugDialog(parent, **{'title' : title})
 513     if 'Rerror' in dir(parent) :
 514         txt += parent.Rerror
 515         parent.Rerror = ''
 516     log.info(txt)
 517     dial.text_ctrl_1.write(txt)
 518     dial.CenterOnParent()
 519     dial.ShowModal()
 520     dial.Destroy()
 521
 522 def PlaySound(parent):
 523     if parent.pref.getboolean('iramuteq', 'sound') :
 524         try:
 525             if "gtk2" in wx.PlatformInfo:
 526                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 527             else :
 528                 sound = wx.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 529                 sound.Play(wx.SOUND_SYNC)
 530         except :
 531             print 'pas de son'
 532
 533 def ReadDicoAsDico(dicopath):
 534     with codecs.open(dicopath, 'r', 'UTF8') as f:
 535         content = f.readlines()
 536     lines = [line.rstrip('\n\r').replace(u'\n', '').replace('"', '').split('\t') for line in content if line != u'']
 537     return dict([[line[0], line[1:]] for line in lines])
 538
 539 def ReadLexique(parent, lang = 'french', filein = None):
 540     if lang != 'other' :
 541         if filein is None :
 542             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 543         else :
 544             parent.lexique = ReadDicoAsDico(filein)
 545     else :
 546         parent.lexique = {}
 547
 548 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 549     #file = open(filein)
 550     with codecs.open(filein, 'r', encoding) as f :
 551         content = f.read()
 552     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 553     #file = codecs.open(filein, 'r', encoding)
 554     #content = file.readlines()
 555     #file.close()
 556     first = content.pop(0)
 557     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 558     dict = {}
 559     i = 0
 560     for line in content:
 561         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 562         #line = line.split(';')
 563         nline = [line[0]]
 564         for val in line[1:]:
 565             if val == u'NA' :
 566                 don = ''
 567             else:
 568                 try:
 569                     don = int(val)
 570                 except:
 571                     don = float('%.5f' % float(val))
 572             nline.append(don)
 573         dict[i] = nline
 574         i += 1
 575     return dict, first
 576
 577 def exec_RCMD(rpath, command) :
 578     log.info('R CMD INSTALL %s' % command)
 579     rpath = rpath.replace('\\','\\\\')
 580     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 581     return error
 582
 583 def exec_rcode(rpath, rcode, wait = True, graph = False):
 584     log.info("R Script : %s" % rcode)
 585     needX11 = False
 586     if sys.platform == 'darwin' :
 587         try :
 588             macversion = platform.mac_ver()[0].split('.')
 589             if int(macversion[1]) < 5 :
 590                 needX11 = True
 591             else :
 592                 needX11 = False
 593         except :
 594             needX11 = False
 595
 596     rpath = rpath.replace('\\','\\\\')
 597     env = os.environ.copy()
 598     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 599         env['LC_ALL'] = 'en_US.UTF-8'
 600     if not graph :
 601         if wait :
 602             if sys.platform == 'win32':
 603                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 604             else :
 605                 error = call([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], env = env)
 606             return error
 607         else :
 608             if sys.platform == 'win32':
 609                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 610             else :
 611                 pid = Popen([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 612             return pid
 613     else :
 614         if wait :
 615             if sys.platform == 'win32':
 616                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 617             elif sys.platform == 'darwin' and needX11:
 618                 os.environ['DISPLAY'] = ':0.0'
 619                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 620             else :
 621                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 622             return error
 623         else :
 624             if sys.platform == 'win32':
 625                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 626             elif sys.platform == 'darwin' and needX11:
 627                 os.environ['DISPLAY'] = ':0.0'
 628                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 629             else :
 630                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 631             return pid
 632
 633 def check_Rresult(parent, pid) :
 634     if isinstance(pid, Popen) :
 635         if pid.returncode != 0 :
 636             error = pid.communicate()
 637             error = [str(error[0]), error[1]]
 638             if error[1] is None :
 639                 error[1] = 'None'
 640             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 641             try :
 642                 raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
 643             except :
 644                 BugReport(parent)
 645             return False
 646         else :
 647             return True
 648     else :
 649         if pid != 0 :
 650             try :
 651                 raise Exception(u'Erreur R')
 652             except :
 653                 BugReport(parent)
 654             return False
 655         else :
 656             return True
 657
 658 def print_liste(filename,liste):
 659     with open(filename,'w') as f :
 660         for graph in liste :
 661             f.write(';'.join(graph)+'\n')
 662
 663 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 664     with codecs.open(filename,'rU', encoding) as f :
 665         content=f.readlines()
 666         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 667     return ncontent
 668
 669
 670
 671
 672 def progressbar(self, maxi) :
 673     if 'parent' in dir(self) :
 674         parent = self.parent
 675     else :
 676         parent = self
 677     return wx.ProgressDialog("Traitements",
 678                              "Veuillez patienter...",
 679                              maximum=maxi,
 680                              parent=parent,
 681                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 682                              )
 683
 684
 685 def treat_var_mod(variables) :
 686     var_mod = {}
 687     variables = list(set(variables))
 688     varmod = [variable.split('_') for variable in variables]
 689     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 690     for var in vars :
 691         mods = ['_'.join(v) for v in varmod if v[0] == var]
 692         var_mod[var] = mods
 693
 694 #     for variable in variables :
 695 #         if u'_' in variable :
 696 #             forme = variable.split(u'_')
 697 #             var = forme[0]
 698 #             mod = forme[1]
 699 #             if not var in var_mod :
 700 #                 var_mod[var] = [variable]
 701 #             else :
 702 #                 if not mod in var_mod[var] :
 703 #                     var_mod[var].append(variable)
 704     return var_mod
 705
 706 def doconcorde(corpus, uces, mots, uci = False) :
 707     if not uci :
 708         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 709     else :
 710         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 711     ucestxt1 = dict(ucestxt1)
 712     ucestxt = []
 713     ucis_txt = []
 714     listmot = [corpus.getlems()[lem].formes for lem in mots]
 715     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 716     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 717     dmots = dict(zip(listmot, mothtml))
 718     for uce in uces :
 719         ucetxt = ucestxt1[uce].split()
 720         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 721         if not uci :
 722             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '</b></p>')
 723         else :
 724             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 725         ucestxt.append(ucetxt)
 726     return ucis_txt, ucestxt
 727
 728
 729 def getallstcarac(corpus, analyse) :
 730    pathout = PathOut(analyse['ira'])
 731    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 732    print profils