iramuteq.org Git - iramuteq/blob - functions.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2008-2012 Pierre Ratinaud
   5 #License: GNU/GPL
   6
   7 import wx
   8 import re
   9 from ConfigParser import ConfigParser
  10 from subprocess import Popen, call, PIPE
  11 import thread
  12 import os
  13 import ast
  14 import sys
  15 import csv
  16 import platform
  17 import traceback
  18 import codecs
  19 import locale
  20 import datetime
  21 from copy import copy
  22 from shutil import copyfile
  23 import shelve
  24 #from dialog import BugDialog
  25 import logging
  26
  27 log = logging.getLogger('iramuteq')
  28
  29
  30 indices_simi = [u'cooccurrence' ,'pourcentage de cooccurrence',u'Russel',u'Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  31
  32
  33 class TGen :
  34     def __init__(self, path = None, encoding = 'utf8'):
  35         self.path = path
  36         self.tgen = {}
  37         self.encoding = encoding
  38
  39     def __getitem__(self, key):
  40         return self.tgen[key]
  41
  42     def read(self, path):
  43         with codecs.open(path, 'r', self.encoding) as f :
  44             tgen = f.read()
  45         tgen = [line.split('\t') for line in tgen.splitlines()]
  46         tgen = dict([[line[0], line[1:]] for line in tgen])
  47         self.tgen = tgen
  48         self.path = path
  49
  50     def write(self, path = None):
  51         if path is None :
  52             path = self.path
  53         with open(path, 'w') as f :
  54             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]))
  55
  56     def writetable(self, pathout, tgens, totocc):
  57         etoiles = totocc.keys()
  58         with open(pathout, 'w') as f :
  59             line = '\t'.join([u'tgens'] + etoiles) + '\n'
  60             f.write(line.encode(self.encoding))
  61             for t in tgens :
  62                 line = '\t'.join([t] + [`tgens[t][et]` for et in etoiles]) + '\n'
  63                 f.write(line.encode(self.encoding))
  64             i = 0
  65             totname = 'total'
  66             while totname + `i` in tgens :
  67                 i += 1
  68             totname = totname + `i`
  69             line = '\t'.join([totname] + [`totocc[et]` for et in etoiles])
  70             f.write(line.encode(self.encoding))
  71
  72 class History :
  73     def __init__(self, filein, syscoding = 'utf8') :
  74         self.filein = filein
  75         self.syscoding = syscoding
  76         self.corpora = {}
  77         self.openedcorpus = {}
  78         self.openedmatrix = {}
  79         self.orph = []
  80         self.analyses = {}
  81         self.history = []
  82         self.opened = {}
  83         self.read()
  84
  85     def read(self) :
  86         d = shelve.open(self.filein)
  87         self.history = d.get('history', [])
  88         self.matrix = d.get('matrix', [])
  89         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
  90         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
  91         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
  92         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
  93         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
  94         d.close()
  95
  96     def write(self) :
  97         d = shelve.open(self.filein)
  98         d['history'] = self.history
  99         d['matrix'] = self.matrix
 100         d.close()
 101
 102     def add(self, analyse) :
 103         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 104         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 105         if analyse.get('corpus', False) :
 106             if analyse['uuid'] in self.analyses :
 107                 return
 108             tosave['corpus'] = analyse['corpus']
 109             tosave['name'] = analyse['name']
 110             acorpus_uuid =  analyse['corpus']
 111             if acorpus_uuid in self.corpus :
 112                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 113                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 114                 else :
 115                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 116             else :
 117                 self.orph.append(tosave)
 118         else :
 119             tosave['corpus_name'] = analyse['corpus_name']
 120             self.history.append(tosave)
 121         self.write()
 122         self.read()
 123
 124     def addMatrix(self, analyse) :
 125         tosave = analyse
 126         #tosave['matrix_name'] = analyse['matrix_name']
 127         tosave['analyses'] = []
 128         self.matrix.append(tosave)
 129         self.write()
 130         self.read()
 131
 132     def addMatrixAnalyse(self, analyse) :
 133         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 134         tosave['name'] = analyse['name']
 135         if tosave['matrix'] in self.ordermatrix :
 136             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 137         self.write()
 138         self.read()
 139
 140     def addmultiple(self, analyses) :
 141         log.info('add multiple')
 142         for analyse in analyses :
 143             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 144             corpus = analyse['corpus']
 145             tosave['corpus'] = corpus
 146             tosave['name'] = analyse['name']
 147             if corpus in self.corpus :
 148                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 149                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 150                 else :
 151                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 152         self.write()
 153         self.read()
 154
 155     def delete(self, analyse, corpus = False) :
 156         log.info('delete %s' % analyse.get('name', 'noname'))
 157         if corpus :
 158             self.history.pop(self.ordercorpus[analyse['uuid']])
 159             if analyse['uuid'] in self.openedcorpus :
 160                 del self.openedcorpus[analyse['uuid']]
 161         elif analyse['uuid'] in self.analyses :
 162             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 163             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 164         elif analyse['uuid'] in self.matrixanalyse :
 165             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 166         self.write()
 167         self.read()
 168
 169     def addtab(self, analyse) :
 170         self.opened[analyse['uuid']] = analyse
 171
 172     def rmtab(self, analyse) :
 173         del self.opened[analyse['uuid']]
 174
 175     def clean(self) :
 176         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 177         print corpustodel
 178         for corpus in corpustodel :
 179             print 'cleaning :', corpus['corpus_name']
 180             self.delete(corpus, corpus = True)
 181         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 182         for analyse in anatodel :
 183             print 'cleaning :', analyse['name']
 184             self.delete(analyse)
 185
 186     def __str__(self) :
 187         return str(self.history)
 188
 189 class DoConf :
 190     def __init__(self, configfile=None, diff = None, parametres = None) :
 191         self.configfile = configfile
 192         self.conf = ConfigParser()
 193         if configfile is not None :
 194             self.conf.readfp(codecs.open(configfile, 'r', 'utf8'))
 195         self.parametres = {}
 196         if parametres is not None :
 197             self.doparametres(parametres)
 198
 199     def doparametres(self, parametres) :
 200         return parametres
 201
 202     def getsections(self) :
 203         return self.conf.sections()
 204
 205     def getoptions(self, section = None, diff = None):
 206         parametres = {}
 207         if section is None :
 208             section = self.conf.sections()[0]
 209         for option in self.conf.options(section) :
 210             if self.conf.get(section, option).isdigit() :
 211                 parametres[option] = int(self.conf.get(section, option))
 212             elif self.conf.get(section, option) == 'False' :
 213                 parametres[option] = False
 214             elif self.conf.get(section, option) == 'True' :
 215                 parametres[option] = True
 216             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 217                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 218             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 219                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 220             else :
 221                 parametres[option] = self.conf.get(section, option)
 222         if 'type' not in parametres :
 223             parametres['type'] = section
 224         return parametres
 225
 226     def makeoptions(self, sections, parametres, outfile = None) :
 227         txt = ''
 228         for i, section in enumerate(sections) :
 229             txt += '[%s]\n' % section
 230             if not self.conf.has_section(section) :
 231                 self.conf.add_section(section)
 232             for option in parametres[i] :
 233                 if isinstance(parametres[i][option], int) :
 234                     self.conf.set(section, option, `parametres[i][option]`)
 235                     txt += '%s = %i\n' % (option, parametres[i][option])
 236                 elif isinstance(parametres[i][option], basestring) :
 237                     self.conf.set(section, option, parametres[i][option].encode('utf8'))
 238                     txt += '%s = %s\n' % (option, parametres[i][option])
 239                 elif isinstance(parametres[i][option], wx.Colour) :
 240                     self.conf.set(section, option, str(parametres[i][option]))
 241                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 242                 elif option == 'analyses' :
 243                     pass
 244                 else :
 245                     self.conf.set(section, option, `parametres[i][option]`)
 246                     txt += '%s = %s\n' % (option, `parametres[i][option]`)
 247         if outfile is None :
 248             outfile = self.configfile
 249         with codecs.open(outfile, 'w', 'utf8') as f :
 250             f.write(txt)
 251             #self.conf.write(f)
 252
 253     def totext(self, parametres) :
 254         #txt = ['Corpus']
 255         txt = []
 256         for val in parametres :
 257             if isinstance(parametres[val], int) :
 258                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 259             elif isinstance(parametres[val], basestring) :
 260                 txt.append(' \t\t: '.join([val, parametres[val]]))
 261             elif val in ['listet', 'stars'] :
 262                 pass
 263             else :
 264                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 265         return '\n'.join(txt)
 266
 267
 268 def write_tab(tab, fileout) :
 269         writer = csv.writer(open(fileout, 'wb'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 270         writer.writerows(tab)
 271
 272 class BugDialog(wx.Dialog):
 273     def __init__(self, *args, **kwds):
 274         # begin wxGlade: MyDialog.__init__
 275         kwds["style"] = wx.DEFAULT_DIALOG_STYLE
 276         kwds["size"] = wx.Size(500, 200)
 277         wx.Dialog.__init__(self, *args, **kwds)
 278         self.SetTitle(kwds['title'])
 279         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 280         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 281         self.button_1 = wx.Button(self, wx.ID_OK, "")
 282
 283         self.__set_properties()
 284         self.__do_layout()
 285         # end wxGlade
 286
 287     def __set_properties(self):
 288         # begin wxGlade: MyDialog.__set_properties
 289         self.SetMinSize(wx.Size(500, 200))
 290         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 291
 292         # end wxGlade
 293
 294     def __do_layout(self):
 295         # begin wxGlade: MyDialog.__do_layout
 296         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 297         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 298         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 299         self.SetSizer(sizer_1)
 300         sizer_1.Fit(self)
 301         self.Layout()
 302
 303
 304 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 305     AnalyseConf = ConfigParser()
 306     AnalyseConf.read(DictPathOut['ira'])
 307     AnalyseConf.add_section(section)
 308     date = datetime.datetime.now().ctime()
 309     AnalyseConf.set(section, 'date', str(date))
 310     AnalyseConf.set(section, 'clusternb', clusternb)
 311     AnalyseConf.set(section, 'corpus_name', corpname)
 312
 313     fileout = open(DictPathOut['ira'], 'w')
 314     AnalyseConf.write(fileout)
 315     fileout.close()
 316
 317 def sortedby(list, direct, *indices):
 318
 319     """
 320         sortedby: sort a list of lists (e.g. a table) by one or more indices
 321                   (columns of the table) and return the sorted list
 322
 323         e.g.
 324          for list = [[2,3],[1,2],[3,1]]:
 325          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 326          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 327     """
 328
 329     nlist = map(lambda x, indices=indices:
 330                  map(lambda i, x=x: x[i], indices) + [x],
 331                  list)
 332     if direct == 1:
 333         nlist.sort()
 334     elif direct == 2:
 335         nlist.sort(reverse=True)
 336     return map(lambda l: l[-1], nlist)
 337
 338 def add_type(line, dictlem):
 339     if line[4] in dictlem:
 340         line.append(dictlem[line[4]])
 341     else :
 342         line.append('')
 343     return line
 344
 345 def treat_line_alceste(i, line) :
 346     if line[0] == '*' or line[0] == '*****' :
 347         return line + ['']
 348     if line[5] == 'NA':
 349         print 'NA', line[5]
 350         pass
 351     elif float(line[5].replace(',', '.')) < 0.0001:
 352         line[5] = '< 0,0001'
 353     elif float(line[5].replace(',', '.')) > 0.05:
 354         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 355     else:
 356         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 357     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 358
 359 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 360     #print 'lecture des profils : ReadProfileAsDico'
 361     #if Alceste :
 362     #    print 'lecture du dictionnaire de type'
 363     #    dictlem = {}
 364     #    for line in parent.corpus.lem_type_list :
 365     #        dictlem[line[0]] = line[1]
 366     dictlem = {}
 367     print 'lecture des profiles'
 368     #encoding = sys.getdefaultencoding()
 369     FileReader = codecs.open(File, 'r', encoding)
 370     Filecontent = FileReader.readlines()
 371     FileReader.close()
 372     DictProfile = {}
 373     count = 0
 374     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 375     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 376     rows.pop(0)
 377     ClusterNb = rows[0][2]
 378     rows.pop(0)
 379     clusters = [row[2] for row in rows if row[0] == u'**']
 380     valclusters = [row[1:4] for row in rows if row[0] == u'****']
 381     lp = [i for i, line in enumerate(rows) if line[0] == u'****']
 382     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 383     if Alceste :
 384         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 385         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 386     else :
 387         prof = [[line + [''] for line in pr] for pr in prof]
 388         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 389     for i, cluster in enumerate(clusters):
 390         DictProfile[cluster] = [valclusters[i]] + prof[i]
 391     return DictProfile
 392
 393 def GetTxtProfile(dictprofile, cluster_size) :
 394     proflist = []
 395     for classe in range(0, len(dictprofile)) :
 396         prof = dictprofile[str(classe + 1)]
 397         clinfo = cluster_size[classe]
 398         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 399     return '\n\n'.join(proflist)
 400
 401 def formatExceptionInfo(maxTBlevel=5):
 402          cla, exc, trbk = sys.exc_info()
 403          try :
 404             excName = cla.__name__
 405          except :
 406             excName = 'None'
 407          try:
 408              excArgs = exc.args[0]
 409          except :
 410              excArgs = "<no args>"
 411          excTb = traceback.format_tb(trbk, maxTBlevel)
 412          return (excName, excArgs, excTb)
 413
 414
 415 #fonction des etudiants de l'iut
 416 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 417     """
 418         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 419         Si on trouve un '$', c'est fini.
 420         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 421     """
 422     separateurs = [[u'.', 60.0], [u'?', 60.0], [u'!', 60.0], [u'£$£', 60], [u':', 50.0], [u';', 40.0], [u',', 10.0], [u' ', 0.1]]
 423     trouve = False                 # si on a trouvé un bon séparateur
 424     iDecoupe = 0                # indice du caractere ou il faut decouper
 425
 426     # on découpe la chaine pour avoir au maximum 240 caractères
 427     longueur = min(longueur, len(chaine) - 1)
 428     chaineTravail = chaine[:longueur + 1]
 429     nbCar = longueur
 430     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 431
 432     # on vérifie si on ne trouve pas un '$'
 433     indice = chaineTravail.find(u'$')
 434     if indice > -1:
 435         trouve = True
 436         iDecoupe = indice
 437
 438     # si on ne trouve rien, on cherche le meilleur séparateur
 439     if not trouve:
 440         while nbCar >= 0:
 441             caractere = chaineTravail[nbCar]
 442             distance = abs(longueurOptimale - nbCar) + 1
 443             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 444
 445             # on vérifie si le caractére courant est une marque de ponctuation
 446             for s in separateurs:
 447                 if caractere == s[0]:
 448                     # si c'est une ponctuation
 449
 450                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 451                         # print nbCar, s[0]
 452                         meilleur[0] = s[0]
 453                         meilleur[1] = s[1]
 454                         meilleur[2] = nbCar
 455                         trouve = True
 456                         iDecoupe = nbCar
 457
 458                     # et on termine la recherche
 459                     break
 460
 461             # on passe au caractère précédant
 462             nbCar = nbCar - 1
 463
 464     # si on a trouvé
 465     if trouve:
 466         fin = chaine[iDecoupe + 1:]
 467         retour = chaineTravail[:iDecoupe]
 468         return len(retour) > 0, retour.split(), fin
 469     # si on a rien trouvé
 470     return False, chaine.split(), ''
 471
 472
 473 exceptions = {'paragrapheOT' : u"Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 474               'EmptyText' : u"Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 475               'CorpusEncoding' : u"Problème d'encodage.",
 476               'TextBeforeTextMark' : u"Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 477 }
 478
 479 def BugReport(parent, error = None):
 480     for ch in parent.GetChildren():
 481         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 482             ch.Destroy()
 483     excName, exc, excTb = formatExceptionInfo()
 484     if excName == 'Exception' :
 485         print exc
 486         if len(exc.split()) == 2 :
 487             mss, linenb = exc.split()
 488             if mss in exceptions :
 489                 txt = exceptions[mss] + linenb
 490             else :
 491                 txt = exc
 492         else :
 493             if exc in exceptions :
 494                 txt = exceptions[exc]
 495             else :
 496                 txt = exc
 497         title = "Information"
 498     else :
 499         txt = u'            !== BUG ==!       \n'
 500         txt += u'*************************************\n'
 501         txt += '\n'.join(excTb).replace('    ', ' ')
 502         txt += excName + '\n'
 503         txt += exc
 504         title = "Bug"
 505
 506     dial = BugDialog(parent, **{'title' : title})
 507     if 'Rerror' in dir(parent) :
 508         txt += parent.Rerror
 509         parent.Rerror = ''
 510     log.info(txt)
 511     dial.text_ctrl_1.write(txt)
 512     dial.CenterOnParent()
 513     dial.ShowModal()
 514     dial.Destroy()
 515
 516 def PlaySound(parent):
 517     if parent.pref.getboolean('iramuteq', 'sound') :
 518         try:
 519             if "gtk2" in wx.PlatformInfo:
 520                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 521             else :
 522                 sound = wx.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 523                 sound.Play(wx.SOUND_SYNC)
 524         except :
 525             print 'pas de son'
 526
 527 def ReadDicoAsDico(dicopath):
 528     with codecs.open(dicopath, 'r', 'UTF8') as f:
 529         content = f.readlines()
 530     lines = [line.rstrip('\n\r').replace(u'\n', '').replace('"', '').split('\t') for line in content if line != u'']
 531     return dict([[line[0], line[1:]] for line in lines])
 532
 533 def ReadLexique(parent, lang = 'french', filein = None):
 534     if lang != 'other' :
 535         if filein is None :
 536             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 537         else :
 538             parent.lexique = ReadDicoAsDico(filein)
 539     else :
 540         parent.lexique = {}
 541
 542 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 543     #file = open(filein)
 544     with codecs.open(filein, 'r', encoding) as f :
 545         content = f.read()
 546     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 547     #file = codecs.open(filein, 'r', encoding)
 548     #content = file.readlines()
 549     #file.close()
 550     first = content.pop(0)
 551     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 552     dict = {}
 553     i = 0
 554     for line in content:
 555         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 556         #line = line.split(';')
 557         nline = [line[0]]
 558         for val in line[1:]:
 559             if val == u'NA' :
 560                 don = ''
 561             else:
 562                 try:
 563                     don = int(val)
 564                 except:
 565                     don = float('%.5f' % float(val))
 566             nline.append(don)
 567         dict[i] = nline
 568         i += 1
 569     return dict, first
 570
 571 def exec_RCMD(rpath, command) :
 572     log.info('R CMD INSTALL %s' % command)
 573     rpath = rpath.replace('\\','\\\\')
 574     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 575     return error
 576
 577 def exec_rcode(rpath, rcode, wait = True, graph = False):
 578     log.info("R Script : %s" % rcode)
 579     needX11 = False
 580     if sys.platform == 'darwin' :
 581         try :
 582             macversion = platform.mac_ver()[0].split('.')
 583             if int(macversion[1]) < 5 :
 584                 needX11 = True
 585             else :
 586                 needX11 = False
 587         except :
 588             needX11 = False
 589
 590     rpath = rpath.replace('\\','\\\\')
 591     env = os.environ.copy()
 592     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 593         env['LC_ALL'] = 'en_US.UTF-8'
 594     if not graph :
 595         if wait :
 596             if sys.platform == 'win32':
 597                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 598             else :
 599                 error = call([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], env = env)
 600             return error
 601         else :
 602             if sys.platform == 'win32':
 603                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 604             else :
 605                 pid = Popen([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 606             return pid
 607     else :
 608         if wait :
 609             if sys.platform == 'win32':
 610                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 611             elif sys.platform == 'darwin' and needX11:
 612                 os.environ['DISPLAY'] = ':0.0'
 613                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 614             else :
 615                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 616             return error
 617         else :
 618             if sys.platform == 'win32':
 619                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 620             elif sys.platform == 'darwin' and needX11:
 621                 os.environ['DISPLAY'] = ':0.0'
 622                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 623             else :
 624                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 625             return pid
 626
 627 def check_Rresult(parent, pid) :
 628     if isinstance(pid, Popen) :
 629         if pid.returncode != 0 :
 630             error = pid.communicate()
 631             error = [str(error[0]), error[1]]
 632             if error[1] is None :
 633                 error[1] = 'None'
 634             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 635             try :
 636                 raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
 637             except :
 638                 BugReport(parent)
 639             return False
 640         else :
 641             return True
 642     else :
 643         if pid != 0 :
 644             try :
 645                 raise Exception(u'Erreur R')
 646             except :
 647                 BugReport(parent)
 648             return False
 649         else :
 650             return True
 651
 652 def print_liste(filename,liste):
 653     with open(filename,'w') as f :
 654         for graph in liste :
 655             f.write(';'.join(graph)+'\n')
 656
 657 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 658     with codecs.open(filename,'rU', encoding) as f :
 659         content=f.readlines()
 660         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 661     return ncontent
 662
 663 class MessageImage(wx.Frame):
 664     def __init__(self, parent,title, size):
 665         wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = title, pos = wx.DefaultPosition, size = size, style = wx.DEFAULT_FRAME_STYLE )
 666         self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize )
 667         self.imageFile = False
 668         self.imagename = u"chi_classe.png"
 669         self.HtmlPage = wx.html.HtmlWindow(self, -1)
 670         self.HtmlPage.SetMinSize(size)
 671         if "gtk2" in wx.PlatformInfo:
 672             self.HtmlPage.SetStandardFonts()
 673         self.HtmlPage.SetFonts('Courier', 'Courier')
 674
 675         self.button_1 = wx.Button(self, wx.ID_CANCEL)
 676         self.button_2 = wx.Button(self, wx.ID_SAVE)
 677         self.Bind(wx.EVT_BUTTON, self.OnCloseMe, self.button_1)
 678         self.Bind(wx.EVT_BUTTON, self.OnSaveImage, self.button_2)
 679         self.do_layout()
 680
 681     def do_layout(self):
 682         self.sizer_1 = wx.BoxSizer(wx.VERTICAL)
 683         self.sizer_2 = wx.BoxSizer(wx.HORIZONTAL)
 684         self.sizer_1.Add(self.HtmlPage, 2, wx.EXPAND, 0)
 685
 686         self.m_sdbSizer1 = wx.StdDialogButtonSizer()
 687         self.m_sdbSizer1.AddButton(  self.button_2 )
 688         self.m_sdbSizer1.AddButton(  self.button_1 )
 689         self.m_sdbSizer1.Realize()
 690         self.sizer_1.Add(self.m_sdbSizer1, 0, wx.EXPAND, 5)
 691         self.SetSizer(self.sizer_1)
 692         self.Layout()
 693         self.sizer_1.Fit( self )
 694
 695     def addsaveimage(self, imageFile) :
 696         self.imageFile = imageFile
 697
 698     def OnCloseMe(self, event):
 699         self.Destroy()
 700
 701     def OnSaveImage(self, event) :
 702         dlg = wx.FileDialog(
 703             self, message="Enregistrer sous...", defaultDir=os.getcwd(),
 704             defaultFile= self.imagename, wildcard="png|*.png", style=wx.SAVE | wx.OVERWRITE_PROMPT
 705             )
 706         dlg.SetFilterIndex(2)
 707         dlg.CenterOnParent()
 708         if dlg.ShowModal() == wx.ID_OK:
 709             path = dlg.GetPath()
 710             copyfile(self.imageFile, path)
 711
 712
 713 def progressbar(self, maxi) :
 714     if 'parent' in dir(self) :
 715         parent = self.parent
 716     else :
 717         parent = self
 718     return wx.ProgressDialog("Traitements",
 719                              "Veuillez patienter...",
 720                              maximum=maxi,
 721                              parent=parent,
 722                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 723                              )
 724
 725
 726 def treat_var_mod(variables) :
 727     var_mod = {}
 728     variables = list(set(variables))
 729     varmod = [variable.split('_') for variable in variables]
 730     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 731     for var in vars :
 732         mods = ['_'.join(v) for v in varmod if v[0] == var]
 733         var_mod[var] = mods
 734
 735 #     for variable in variables :
 736 #         if u'_' in variable :
 737 #             forme = variable.split(u'_')
 738 #             var = forme[0]
 739 #             mod = forme[1]
 740 #             if not var in var_mod :
 741 #                 var_mod[var] = [variable]
 742 #             else :
 743 #                 if not mod in var_mod[var] :
 744 #                     var_mod[var].append(variable)
 745     return var_mod
 746
 747 def doconcorde(corpus, uces, mots, uci = False) :
 748     if not uci :
 749         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 750     else :
 751         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 752     ucestxt1 = dict(ucestxt1)
 753     ucestxt = []
 754     ucis_txt = []
 755     listmot = [corpus.getlems()[lem].formes for lem in mots]
 756     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 757     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 758     dmots = dict(zip(listmot, mothtml))
 759     for uce in uces :
 760         ucetxt = ucestxt1[uce].split()
 761         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 762         if not uci :
 763             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '</b></p>')
 764         else :
 765             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 766         ucestxt.append(ucetxt)
 767     return ucis_txt, ucestxt
 768
 769
 770 def getallstcarac(corpus, analyse) :
 771    pathout = PathOut(analyse['ira'])
 772    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 773    print profils