iramuteq.org Git - iramuteq/blob - functions.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2008-2012 Pierre Ratinaud
   5 #License: GNU/GPL
   6
   7 import wx
   8 import re
   9 from ConfigParser import ConfigParser
  10 from subprocess import Popen, call, PIPE
  11 import thread
  12 import os
  13 import ast
  14 import sys
  15 import csv
  16 import platform
  17 import traceback
  18 import codecs
  19 import locale
  20 import datetime
  21 from copy import copy
  22 from shutil import copyfile
  23 import shelve
  24 #from dialog import BugDialog
  25 import logging
  26
  27 log = logging.getLogger('iramuteq')
  28
  29
  30 indices_simi = [u'cooccurrence' ,'pourcentage de cooccurrence',u'Russel',u'Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  31
  32
  33 def normpath_win32(path) :
  34     if not sys.platform == 'win32' :
  35         return path
  36     while '\\\\' in path :
  37         path = path.replace('\\\\', '\\')
  38     if sys.platform == 'win32' and path.startswith('\\') and not path.startswith('\\\\') :
  39         path = '\\' + path
  40     return path
  41
  42 class TGen :
  43     def __init__(self, path = None, encoding = 'utf8'):
  44         self.path = path
  45         self.tgen = {}
  46         self.encoding = encoding
  47
  48     def __getitem__(self, key):
  49         return self.tgen[key]
  50
  51     def read(self, path = None):
  52         if path is None :
  53             path = self.path
  54         with codecs.open(path, 'r', self.encoding) as f :
  55             tgen = f.read()
  56         tgen = [line.split('\t') for line in tgen.splitlines()]
  57         tgen = dict([[line[0], line[1:]] for line in tgen])
  58         self.tgen = tgen
  59         self.path = path
  60
  61     def write(self, path = None):
  62         if path is None :
  63             path = self.path
  64         with open(path, 'w') as f :
  65             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]).encode(self.encoding))
  66
  67     def writetable(self, pathout, tgens, totocc):
  68         etoiles = totocc.keys()
  69         etoiles.sort()
  70         with open(pathout, 'w') as f :
  71             line = '\t'.join([u'tgens'] + etoiles) + '\n'
  72             f.write(line.encode(self.encoding))
  73             for t in tgens :
  74                 line = '\t'.join([t] + [`tgens[t][et]` for et in etoiles]) + '\n'
  75                 f.write(line.encode(self.encoding))
  76             i = 0
  77             totname = 'total'
  78             while totname + `i` in tgens :
  79                 i += 1
  80             totname = totname + `i`
  81             line = '\t'.join([totname] + [`totocc[et]` for et in etoiles]) + '\n'
  82             f.write(line.encode(self.encoding))
  83
  84 class History :
  85     def __init__(self, filein, syscoding = 'utf8') :
  86         self.filein = filein
  87         self.syscoding = syscoding
  88         self.corpus = {}
  89         self.openedcorpus = {}
  90         self.openedmatrix = {}
  91         self.orph = []
  92         self.analyses = {}
  93         self.history = []
  94         self.opened = {}
  95         self.read()
  96
  97     def read(self) :
  98         d = shelve.open(self.filein)
  99         self.history = d.get('history', [])
 100         self.matrix = d.get('matrix', [])
 101         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 102         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 103         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 104         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 105         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 106         d.close()
 107
 108     def write(self) :
 109         d = shelve.open(self.filein)
 110         d['history'] = self.history
 111         d['matrix'] = self.matrix
 112         d.close()
 113
 114     def add(self, analyse) :
 115         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 116         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 117         if tosave['uuid'] in self.corpus :
 118             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 119             return
 120         if analyse.get('corpus', False) :
 121             if analyse['uuid'] in self.analyses :
 122                 return
 123             tosave['corpus'] = analyse['corpus']
 124             tosave['name'] = analyse['name']
 125             acorpus_uuid =  analyse['corpus']
 126             if acorpus_uuid in self.corpus :
 127                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 128                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 129                 else :
 130                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 131             else :
 132                 self.orph.append(tosave)
 133         else :
 134             tosave['corpus_name'] = analyse['corpus_name']
 135             #self.ordercorpus[tosave['uuid']] = len(history)
 136             #self.corpus[tosave['uuid']] = analyse
 137             self.history.append(tosave)
 138         self.write()
 139         self.read()
 140
 141     def addMatrix(self, analyse) :
 142         tosave = analyse
 143         #tosave['matrix_name'] = analyse['matrix_name']
 144         tosave['analyses'] = []
 145         self.matrix.append(tosave)
 146         self.write()
 147         self.read()
 148
 149     def addMatrixAnalyse(self, analyse) :
 150         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 151         tosave['name'] = analyse['name']
 152         if tosave['matrix'] in self.ordermatrix :
 153             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 154         self.write()
 155         self.read()
 156
 157     def addmultiple(self, analyses) :
 158         log.info('add multiple')
 159         for analyse in analyses :
 160             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 161             corpus = analyse['corpus']
 162             tosave['corpus'] = corpus
 163             tosave['name'] = analyse['name']
 164             if corpus in self.corpus :
 165                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 166                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 167                 else :
 168                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 169         self.write()
 170         self.read()
 171
 172     def delete(self, analyse, corpus = False) :
 173         log.info('delete %s' % analyse.get('name', 'noname'))
 174         if corpus :
 175             self.history.pop(self.ordercorpus[analyse['uuid']])
 176             if analyse['uuid'] in self.openedcorpus :
 177                 del self.openedcorpus[analyse['uuid']]
 178             log.info('delete corpus : %s' % analyse['uuid'])
 179         elif analyse['uuid'] in self.analyses :
 180             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 181             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 182         elif analyse['uuid'] in self.matrixanalyse :
 183             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 184         self.write()
 185         self.read()
 186
 187     def addtab(self, analyse) :
 188         self.opened[analyse['uuid']] = analyse
 189
 190     def rmtab(self, analyse) :
 191         del self.opened[analyse['uuid']]
 192
 193     def clean(self) :
 194         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 195         print corpustodel
 196         for corpus in corpustodel :
 197             print 'cleaning :', corpus['corpus_name']
 198             self.delete(corpus, corpus = True)
 199         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 200         for analyse in anatodel :
 201             print 'cleaning :', analyse['name']
 202             self.delete(analyse)
 203
 204     def __str__(self) :
 205         return str(self.history)
 206
 207 class DoConf :
 208     def __init__(self, configfile=None, diff = None, parametres = None) :
 209         self.configfile = configfile
 210         self.conf = ConfigParser()
 211
 212         if configfile is not None :
 213             configfile = normpath_win32(configfile)
 214             self.conf.readfp(codecs.open(configfile, 'r', 'utf8'))
 215         self.parametres = {}
 216         if parametres is not None :
 217             self.doparametres(parametres)
 218
 219     def doparametres(self, parametres) :
 220         return parametres
 221
 222     def getsections(self) :
 223         return self.conf.sections()
 224
 225     def getoptions(self, section = None, diff = None):
 226         parametres = {}
 227         if section is None :
 228             section = self.conf.sections()[0]
 229         for option in self.conf.options(section) :
 230             if self.conf.get(section, option).isdigit() :
 231                 parametres[option] = int(self.conf.get(section, option))
 232             elif self.conf.get(section, option) == 'False' :
 233                 parametres[option] = False
 234             elif self.conf.get(section, option) == 'True' :
 235                 parametres[option] = True
 236             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 237                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 238             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 239                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 240             else :
 241                 parametres[option] = self.conf.get(section, option)
 242         if 'type' not in parametres :
 243             parametres['type'] = section
 244         return parametres
 245
 246     def makeoptions(self, sections, parametres, outfile = None) :
 247         txt = ''
 248         for i, section in enumerate(sections) :
 249             txt += '[%s]\n' % section
 250             if not self.conf.has_section(section) :
 251                 self.conf.add_section(section)
 252             for option in parametres[i] :
 253                 if isinstance(parametres[i][option], int) :
 254                     self.conf.set(section, option, `parametres[i][option]`)
 255                     txt += '%s = %i\n' % (option, parametres[i][option])
 256                 elif isinstance(parametres[i][option], basestring) :
 257                     self.conf.set(section, option, parametres[i][option].encode('utf8'))
 258                     txt += '%s = %s\n' % (option, parametres[i][option])
 259                 elif isinstance(parametres[i][option], wx.Colour) :
 260                     self.conf.set(section, option, str(parametres[i][option]))
 261                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 262                 elif option == 'analyses' :
 263                     pass
 264                 else :
 265                     self.conf.set(section, option, `parametres[i][option]`)
 266                     txt += '%s = %s\n' % (option, `parametres[i][option]`)
 267         if outfile is None :
 268             outfile = self.configfile
 269         outfile = normpath_win32(outfile)
 270         with open(outfile, 'w') as f :
 271             f.write(txt.encode('utf8'))
 272             #self.conf.write(f)
 273
 274     def totext(self, parametres) :
 275         #txt = ['Corpus']
 276         txt = []
 277         for val in parametres :
 278             if isinstance(parametres[val], int) :
 279                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 280             elif isinstance(parametres[val], basestring) :
 281                 txt.append(' \t\t: '.join([val, parametres[val]]))
 282             elif val in ['listet', 'stars'] :
 283                 pass
 284             else :
 285                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 286         return '\n'.join(txt)
 287
 288
 289 def write_tab(tab, fileout) :
 290         writer = csv.writer(open(fileout, 'wb'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 291         writer.writerows(tab)
 292
 293 class BugDialog(wx.Dialog):
 294     def __init__(self, *args, **kwds):
 295         # begin wxGlade: MyDialog.__init__
 296         kwds["style"] = wx.DEFAULT_DIALOG_STYLE
 297         kwds["size"] = wx.Size(500, 200)
 298         wx.Dialog.__init__(self, *args, **kwds)
 299         self.SetTitle(kwds['title'])
 300         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 301         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 302         self.button_1 = wx.Button(self, wx.ID_OK, "")
 303
 304         self.__set_properties()
 305         self.__do_layout()
 306         # end wxGlade
 307
 308     def __set_properties(self):
 309         # begin wxGlade: MyDialog.__set_properties
 310         self.SetMinSize(wx.Size(500, 200))
 311         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 312
 313         # end wxGlade
 314
 315     def __do_layout(self):
 316         # begin wxGlade: MyDialog.__do_layout
 317         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 318         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 319         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 320         self.SetSizer(sizer_1)
 321         sizer_1.Fit(self)
 322         self.Layout()
 323
 324
 325 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 326     AnalyseConf = ConfigParser()
 327     AnalyseConf.read(DictPathOut['ira'])
 328     AnalyseConf.add_section(section)
 329     date = datetime.datetime.now().ctime()
 330     AnalyseConf.set(section, 'date', str(date))
 331     AnalyseConf.set(section, 'clusternb', clusternb)
 332     AnalyseConf.set(section, 'corpus_name', corpname)
 333
 334     fileout = open(DictPathOut['ira'], 'w')
 335     AnalyseConf.write(fileout)
 336     fileout.close()
 337
 338 def sortedby(list, direct, *indices):
 339
 340     """
 341         sortedby: sort a list of lists (e.g. a table) by one or more indices
 342                   (columns of the table) and return the sorted list
 343
 344         e.g.
 345          for list = [[2,3],[1,2],[3,1]]:
 346          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 347          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 348     """
 349
 350     nlist = map(lambda x, indices=indices:
 351                  map(lambda i, x=x: x[i], indices) + [x],
 352                  list)
 353     if direct == 1:
 354         nlist.sort()
 355     elif direct == 2:
 356         nlist.sort(reverse=True)
 357     return map(lambda l: l[-1], nlist)
 358
 359 def add_type(line, dictlem):
 360     if line[4] in dictlem:
 361         line.append(dictlem[line[4]])
 362     else :
 363         line.append('')
 364     return line
 365
 366 def treat_line_alceste(i, line) :
 367     if line[0] == '*' or line[0] == '*****' :
 368         return line + ['']
 369     if line[5] == 'NA':
 370         print 'NA', line[5]
 371         pass
 372     elif float(line[5].replace(',', '.')) < 0.0001:
 373         line[5] = '< 0,0001'
 374     elif float(line[5].replace(',', '.')) > 0.05:
 375         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 376     else:
 377         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 378     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 379
 380 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 381     dictlem = {}
 382     print 'lecture des profiles'
 383     FileReader = codecs.open(File, 'r', encoding)
 384     Filecontent = FileReader.readlines()
 385     FileReader.close()
 386     DictProfile = {}
 387     count = 0
 388     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 389     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 390     rows.pop(0)
 391     ClusterNb = rows[0][2]
 392     rows.pop(0)
 393     clusters = [row[2] for row in rows if row[0] == u'**']
 394     valclusters = [row[1:4] for row in rows if row[0] == u'****']
 395     lp = [i for i, line in enumerate(rows) if line[0] == u'****']
 396     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 397     if Alceste :
 398         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 399         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 400     else :
 401         prof = [[line + [''] for line in pr] for pr in prof]
 402         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 403     for i, cluster in enumerate(clusters):
 404         DictProfile[cluster] = [valclusters[i]] + prof[i]
 405     return DictProfile
 406
 407 def GetTxtProfile(dictprofile, cluster_size) :
 408     proflist = []
 409     for classe in range(0, len(dictprofile)) :
 410         prof = dictprofile[str(classe + 1)]
 411         clinfo = cluster_size[classe]
 412         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 413     return '\n\n'.join(proflist)
 414
 415 def formatExceptionInfo(maxTBlevel=5):
 416     cla, exc, trbk = sys.exc_info()
 417     try :
 418         excName = cla.__name__
 419     except :
 420         excName = 'None'
 421     try:
 422         excArgs = exc.args[0]
 423     except :
 424         excArgs = "<no args>"
 425     excTb = traceback.format_tb(trbk, maxTBlevel)
 426     return (excName, excArgs, excTb)
 427
 428
 429 #fonction des etudiants de l'iut
 430 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 431     """
 432         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 433         Si on trouve un '$', c'est fini.
 434         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 435     """
 436     separateurs = [[u'.', 60.0], [u'?', 60.0], [u'!', 60.0], [u'£$£', 60], [u':', 50.0], [u';', 40.0], [u',', 10.0], [u' ', 0.1]]
 437     trouve = False                 # si on a trouvé un bon séparateur
 438     iDecoupe = 0                # indice du caractere ou il faut decouper
 439
 440     # on découpe la chaine pour avoir au maximum 240 caractères
 441     longueur = min(longueur, len(chaine) - 1)
 442     chaineTravail = chaine[:longueur + 1]
 443     nbCar = longueur
 444     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 445
 446     # on vérifie si on ne trouve pas un '$'
 447     indice = chaineTravail.find(u'$')
 448     if indice > -1:
 449         trouve = True
 450         iDecoupe = indice
 451
 452     # si on ne trouve rien, on cherche le meilleur séparateur
 453     if not trouve:
 454         while nbCar >= 0:
 455             caractere = chaineTravail[nbCar]
 456             distance = abs(longueurOptimale - nbCar) + 1
 457             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 458
 459             # on vérifie si le caractére courant est une marque de ponctuation
 460             for s in separateurs:
 461                 if caractere == s[0]:
 462                     # si c'est une ponctuation
 463
 464                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 465                         # print nbCar, s[0]
 466                         meilleur[0] = s[0]
 467                         meilleur[1] = s[1]
 468                         meilleur[2] = nbCar
 469                         trouve = True
 470                         iDecoupe = nbCar
 471
 472                     # et on termine la recherche
 473                     break
 474
 475             # on passe au caractère précédant
 476             nbCar = nbCar - 1
 477
 478     # si on a trouvé
 479     if trouve:
 480         fin = chaine[iDecoupe + 1:]
 481         retour = chaineTravail[:iDecoupe]
 482         return len(retour) > 0, retour.split(), fin
 483     # si on a rien trouvé
 484     return False, chaine.split(), ''
 485
 486
 487 exceptions = {'paragrapheOT' : u"Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 488               'EmptyText' : u"Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 489               'CorpusEncoding' : u"Problème d'encodage.",
 490               'TextBeforeTextMark' : u"Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 491               'MissingAnalyse' : u'Aucun fichier à cet emplacement :\n',
 492 }
 493
 494 def BugReport(parent, error = None):
 495     for ch in parent.GetChildren():
 496         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 497             ch.Destroy()
 498     excName, exc, excTb = formatExceptionInfo()
 499     if excName == 'Exception' :
 500         print exc
 501         if len(exc.split()) == 2 :
 502             mss, linenb = exc.split()
 503             if mss in exceptions :
 504                 txt = exceptions[mss] + linenb
 505             else :
 506                 txt = exc
 507         else :
 508             if exc in exceptions :
 509                 txt = exceptions[exc]
 510             else :
 511                 txt = exc
 512         title = "Information"
 513     else :
 514         txt = u'            !== BUG ==!       \n'
 515         txt += u'*************************************\n'
 516         txt += '\n'.join(excTb).replace('    ', ' ')
 517         txt += excName + '\n'
 518         txt += `exc`
 519         title = "Bug"
 520
 521     dial = BugDialog(parent, **{'title' : title})
 522     if 'Rerror' in dir(parent) :
 523         txt += parent.Rerror
 524         parent.Rerror = ''
 525     log.info(txt)
 526     dial.text_ctrl_1.write(txt)
 527     dial.CenterOnParent()
 528     dial.ShowModal()
 529     dial.Destroy()
 530
 531 def PlaySound(parent):
 532     if parent.pref.getboolean('iramuteq', 'sound') :
 533         try:
 534             if "gtk2" in wx.PlatformInfo:
 535                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 536             else :
 537                 sound = wx.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 538                 sound.Play(wx.SOUND_SYNC)
 539         except :
 540             print 'pas de son'
 541
 542 def ReadDicoAsDico(dicopath):
 543     with codecs.open(dicopath, 'r', 'UTF8') as f:
 544         content = f.readlines()
 545     lines = [line.rstrip('\n\r').replace(u'\n', '').replace('"', '').split('\t') for line in content if line != u'']
 546     return dict([[line[0], line[1:]] for line in lines])
 547
 548 def ReadLexique(parent, lang = 'french', filein = None):
 549     if lang != 'other' :
 550         if filein is None :
 551             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 552         else :
 553             parent.lexique = ReadDicoAsDico(filein)
 554     else :
 555         if filein is None :
 556             parent.lexique = {}
 557         else :
 558             parent.lexique = ReadDicoAsDico(filein)
 559
 560 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 561     #file = open(filein)
 562     with codecs.open(filein, 'r', encoding) as f :
 563         content = f.read()
 564     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 565     #file = codecs.open(filein, 'r', encoding)
 566     #content = file.readlines()
 567     #file.close()
 568     first = content.pop(0)
 569     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 570     dict = {}
 571     i = 0
 572     for line in content:
 573         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 574         #line = line.split(';')
 575         nline = [line[0]]
 576         for val in line[1:]:
 577             if val == u'NA' :
 578                 don = ''
 579             else:
 580                 try:
 581                     don = int(val)
 582                 except:
 583                     don = float('%.5f' % float(val))
 584             nline.append(don)
 585         dict[i] = nline
 586         i += 1
 587     return dict, first
 588
 589 def exec_RCMD(rpath, command) :
 590     log.info('R CMD INSTALL %s' % command)
 591     rpath = rpath.replace('\\','\\\\')
 592     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 593     return error
 594
 595 def exec_rcode(rpath, rcode, wait = True, graph = False):
 596     log.info("R Script : %s" % rcode)
 597     needX11 = False
 598     if sys.platform == 'darwin' :
 599         try :
 600             macversion = platform.mac_ver()[0].split('.')
 601             if int(macversion[1]) < 5 :
 602                 needX11 = True
 603             else :
 604                 needX11 = False
 605         except :
 606             needX11 = False
 607
 608     rpath = rpath.replace('\\','\\\\')
 609     env = os.environ.copy()
 610     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 611         env['LC_ALL'] = 'en_US.UTF-8'
 612     if not graph :
 613         if wait :
 614             if sys.platform == 'win32':
 615                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 616             else :
 617                 error = call([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], env = env)
 618             return error
 619         else :
 620             if sys.platform == 'win32':
 621                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 622             else :
 623                 pid = Popen([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 624             return pid
 625     else :
 626         if wait :
 627             if sys.platform == 'win32':
 628                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 629             elif sys.platform == 'darwin' and needX11:
 630                 os.environ['DISPLAY'] = ':0.0'
 631                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 632             else :
 633                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 634             return error
 635         else :
 636             if sys.platform == 'win32':
 637                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 638             elif sys.platform == 'darwin' and needX11:
 639                 os.environ['DISPLAY'] = ':0.0'
 640                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 641             else :
 642                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 643             return pid
 644
 645 def check_Rresult(parent, pid) :
 646     if isinstance(pid, Popen) :
 647         if pid.returncode != 0 :
 648             error = pid.communicate()
 649             error = [str(error[0]), error[1]]
 650             if error[1] is None :
 651                 error[1] = 'None'
 652             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 653             try :
 654                 raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
 655             except :
 656                 BugReport(parent)
 657             return False
 658         else :
 659             return True
 660     else :
 661         if pid != 0 :
 662             try :
 663                 raise Exception(u'Erreur R')
 664             except :
 665                 BugReport(parent)
 666             return False
 667         else :
 668             return True
 669
 670 def print_liste(filename,liste):
 671     with open(filename,'w') as f :
 672         for graph in liste :
 673             f.write(';'.join(graph)+'\n')
 674
 675 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 676     with codecs.open(filename,'rU', encoding) as f :
 677         content=f.readlines()
 678         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 679     return ncontent
 680
 681
 682
 683
 684 def progressbar(self, maxi) :
 685     if 'parent' in dir(self) :
 686         parent = self.parent
 687     else :
 688         parent = self
 689     try :
 690         maxi = int(maxi)
 691     except :
 692         maxi = 1
 693     return wx.ProgressDialog("Traitements",
 694                              "Veuillez patienter...",
 695                              maximum=maxi,
 696                              parent=parent,
 697                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 698                              )
 699
 700
 701 def treat_var_mod(variables) :
 702     var_mod = {}
 703     variables = list(set(variables))
 704     varmod = [variable.split('_') for variable in variables]
 705     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 706     for var in vars :
 707         mods = ['_'.join(v) for v in varmod if v[0] == var]
 708         var_mod[var] = mods
 709
 710 #     for variable in variables :
 711 #         if u'_' in variable :
 712 #             forme = variable.split(u'_')
 713 #             var = forme[0]
 714 #             mod = forme[1]
 715 #             if not var in var_mod :
 716 #                 var_mod[var] = [variable]
 717 #             else :
 718 #                 if not mod in var_mod[var] :
 719 #                     var_mod[var].append(variable)
 720     return var_mod
 721
 722 def doconcorde(corpus, uces, mots, uci = False) :
 723     if not uci :
 724         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 725     else :
 726         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 727     ucestxt1 = dict(ucestxt1)
 728     ucestxt = []
 729     ucis_txt = []
 730     listmot = [corpus.getlems()[lem].formes for lem in mots]
 731     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 732     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 733     dmots = dict(zip(listmot, mothtml))
 734     for uce in uces :
 735         ucetxt = ucestxt1[uce].split()
 736         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 737         if not uci :
 738             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '</b></p>')
 739         else :
 740             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 741         ucestxt.append(ucetxt)
 742     return ucis_txt, ucestxt
 743
 744
 745 def getallstcarac(corpus, analyse) :
 746    pathout = PathOut(analyse['ira'])
 747    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 748    print profils