-#!/bin/env python
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
-#Copyright (c) 2008-2010 Pierre Ratinaud
+#Copyright (c) 2008-2020 Pierre Ratinaud
+#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
#License: GNU/GPL
+#------------------------------------
+# import des modules python
+#------------------------------------
import xml.dom.minidom
-import wx.lib.filebrowsebutton as filebrowse
import os
import codecs
import re
+
+#------------------------------------
+# import des modules wx
+#------------------------------------
import wx
+import wx.lib.filebrowsebutton as filebrowse
+
+#------------------------------------
+# import des fichiers du projet
+#------------------------------------
from parse_factiva_mail import ParseFactivaMail
from parse_factiva_txt import ParseFactivaPaste
+from parse_europress import ParseEuropress
from import_txm import TXM2IRA
+from functions import BugReport
+
def ParseDocument(filename) :
- print filename
with codecs.open(filename, 'r', 'utf-8') as f :
content = f.read()
content = content.replace('<hlt>', ' ').replace('</hlt>', ' ')
- dom = xml.dom.minidom.parseString(content.encode("utf-8"))
+ dom = xml.dom.minidom.parseString(content)
result = []
articles = dom.getElementsByTagName("article")
for article in articles :
else :
val_tailParagraphs = []
inter = [' '.join(val_headline), val_sourceName,' '.join(val_publicationDate), ' '.join(val_leadParagraph), ' '.join(val_tailParagraphs)]
- inter = [re.sub(ur'[ "\n\r]+', ' ', val).replace('"',' ').replace('\n', ' ').replace('\r', ' ') for val in inter]
+ inter = [re.sub(r'[ "\n\r]+', ' ', val).replace('"',' ').replace('\n', ' ').replace('\r', ' ') for val in inter]
#inter = ['"' + val +'"' for val in inter]
result.append(inter)
return result
-
+
def getcorpus_from_xml(xmldir, corpus_out):
files = os.listdir(xmldir)
files = [os.path.join(xmldir,f) for f in files if os.path.splitext(f)[1] == '.xml']
#dates = [[date[0],date[1],date[2].split('T')[0]] for date in dates]
#txt = '\n'.join(['\n'.join([' '.join([u'****', '*%s' % row[1].replace(' ','_').replace('\'','_'), '*%s' % row[2].replace('-','_')]), row[3], row[4]]) for row in rs])
#avec la date decompose
- txt = '\n'.join(['\n'.join([' '.join([u'****', '*s_%s' % row[1].replace(' ','').replace('\'',''), '*annee_%s' % row[2].split('-')[0], '*mois_%s' % row[2].split('-')[1], '*jour_%s' % row[2].split('-')[2].split('T')[0]]), row[3], row[4]]) for row in rs])
+ txt = '\n'.join(['\n'.join([' '.join(['****', '*s_%s' % row[1].replace(' ','').replace('\'',''), '*annee_%s' % row[2].split('-')[0], '*mois_%s' % row[2].split('-')[1], '*jour_%s' % row[2].split('-')[2].split('T')[0]]), row[3], row[4]]) for row in rs])
fileout.write(txt+'\n\n')
fileout.close()
return 'ok'
+
class PrefImport(wx.Dialog):
+
def __init__(self, parent, size=wx.DefaultSize, pos=wx.DefaultPosition, style=wx.DEFAULT_DIALOG_STYLE, methode = 'mail'):
- pre = wx.PreDialog()
- pre.SetExtraStyle(wx.DIALOG_EX_CONTEXTHELP)
- pre.Create(parent, -1, '', pos, size, style)
- self.PostCreate(pre)
+ wx.Dialog.__init__(self) # 1
+ self.SetExtraStyle(wx.DIALOG_EX_CONTEXTHELP) # 2
+ self.Create(parent, -1, '') # 3
+ self.methode = methode
if methode in ['xml', 'txm'] :
- txt = _(u'Select a directory of xml files').decode('utf8')
+ txt = _('Select a directory of xml files')
+ elif methode == 'euro' :
+ txt = _('Select a directory of html files')
+ elif methode == 'dmi' :
+ txt = _('Select a csv file')
else :
- txt = _(u'Select a directory of txt files').decode('utf8')
+ txt = _('Select a directory of txt files')
self.parent = parent
- self.txt1 = wx.StaticText(self, -1, txt.encode('utf8'))
- self.dbb = filebrowse.DirBrowseButton(self, -1, size=(450, -1), changeCallback = self.fbbCallback)
+ self.txt1 = wx.StaticText(self, -1, txt)
+ if methode != 'dmi' :
+ self.dbb = filebrowse.DirBrowseButton(self, -1, size=(450, -1), changeCallback = self.fbbCallback)
+ else :
+ self.dbb = filebrowse.FileBrowseButton(self, -1, size=(450, -1), fileMode = 2, changeCallback = self.fbbCallback)
self.dbb.SetLabel("")
- self.txt2 = wx.StaticText(self, -1, _(u'Output file').decode('utf8'))
+ self.txt2 = wx.StaticText(self, -1, _('Output file'))
self.fbb = filebrowse.FileBrowseButton(self, -1, size=(450, -1), fileMode = 2)
self.fbb.SetLabel("")
-
self.btnsizer = wx.StdDialogButtonSizer()
btn_ok = wx.Button(self, wx.ID_OK)
btn = wx.Button(self, wx.ID_CANCEL)
self.btnsizer.AddButton(btn_ok)
self.btnsizer.AddButton(btn)
self.btnsizer.Realize()
-
-
self.Bind(wx.EVT_BUTTON, self.checkfile, btn_ok)
-
#self.SetButtonSizer(self.CreateStdDialogButtonSizer(wx.OK | wx.CANCEL))
self.Bind(wx.EVT_BUTTON, self.checkfile)
-
self. __do_layout()
#self.Fit()
self.SetMinSize(self.GetSize())
-
+
def __do_layout(self):
sizer = wx.BoxSizer(wx.VERTICAL)
grid_sizer_1 = wx.BoxSizer(wx.HORIZONTAL)
sizer.Fit(self)
self.Layout()
-
def fbbCallback(self, evt):
if self.fbb.GetValue() == "" :
- self.fbb.SetValue(os.path.join(self.dbb.GetValue(), 'corpus.txt'))
+ if self.methode != 'dmi' :
+ self.fbb.SetValue(os.path.join(self.dbb.GetValue(), 'corpus.txt'))
+ else :
+ self.fbb.SetValue(os.path.join(os.path.dirname(self.dbb.GetValue()), 'corpus.txt'))
#self.log.write('FileBrowseButton: %s\n' % evt.GetString())
def checkfile(self, evt) :
if evt.GetId() == wx.ID_OK :
if self.dbb.GetValue() != "" :
+ if self.methode == 'dmi' :
+ if not os.path.exists(self.dbb.GetValue()) :
+ dlg = wx.MessageDialog(self,
+ ' : '.join([self.dbb.GetValue(), _("this file doesn't exist")]), 'ATTENTION', wx.NO | wx.YES | wx.ICON_WARNING)
+ dlg.CenterOnParent()
+ if dlg.ShowModal() not in [wx.ID_NO, wx.ID_CANCEL]:
+ self.EndModal(wx.ID_OK)
if os.path.exists(self.fbb.GetValue()):
dlg = wx.MessageDialog(self,
- u"%s\nCe fichier existe, continuer quand même ?" % self.fbb.GetValue(), 'ATTENTION', wx.NO | wx.YES | wx.ICON_WARNING)
+ "%s\nCe fichier existe, continuer quand même ?" % self.fbb.GetValue(), 'ATTENTION', wx.NO | wx.YES | wx.ICON_WARNING)
dlg.CenterOnParent()
if dlg.ShowModal() not in [wx.ID_NO, wx.ID_CANCEL]:
self.EndModal(wx.ID_OK)
else :
self.EndModal(wx.ID_OK)
else :
- dlg = wx.MessageDialog(self, u"Vous devez choisir le répertoire contenant le ou les fichier(s) xml", 'ATTENTION', wx.OK | wx.ICON_WARNING)
+ dlg = wx.MessageDialog(self, "Vous devez choisir le répertoire contenant le ou les fichier(s) xml", 'ATTENTION', wx.OK | wx.ICON_WARNING)
dlg.CenterOnParent()
dlg.ShowModal()
-
else :
self.EndModal(wx.ID_CANCEL)
-
class ImportFactiva():
+
def __init__(self, parent, methode):
self.dial = PrefImport(parent, methode=methode)
self.dial.CenterOnParent()
if val == wx.ID_OK :
xmldir = self.dial.dbb.GetValue()
corp_out = self.dial.fbb.GetValue()
- if methode == 'xml' :
- res = getcorpus_from_xml(xmldir, corp_out)
- elif methode == 'mail' :
- res = ParseFactivaMail(xmldir, corp_out, 'utf8', parent.syscoding)
- elif methode == 'txt' :
- res = ParseFactivaPaste(xmldir, corp_out, 'utf8', parent.syscoding)
- elif methode == 'txm' :
- res = TXM2IRA(xmldir, corp_out, 'utf8', parent.syscoding)
- if res == 'nofile' :
- dlg = wx.MessageDialog(parent, u"Pas de fichier \'.xml\' dans %s" % xmldir, 'ATTENTION', wx.OK | wx.NO_DEFAULT | wx.ICON_WARNING)
- dlg.CenterOnParent()
- dlg.ShowModal()
- #else :
- # parent.filename = corp_out
- # parent.OpenText()
+ self.dial.Destroy()
+ busy = wx.BusyInfo(_("Please wait..."))
+ wx.SafeYield()
+ try :
+ if methode == 'xml' :
+ res = getcorpus_from_xml(xmldir, corp_out)
+ elif methode == 'mail' :
+ res = ParseFactivaMail(xmldir, corp_out, 'utf8', parent.syscoding)
+ elif methode == 'txt' :
+ res = ParseFactivaPaste(xmldir, corp_out, 'utf8', parent.syscoding)
+ elif methode == 'txm' :
+ res = TXM2IRA(xmldir, corp_out, 'utf8', parent.syscoding)
+ elif methode == 'euro' :
+ res = ParseEuropress(xmldir, corp_out, 'utf8', 'utf8')
+ del busy
+ if res == 'nofile' :
+ dlg = wx.MessageDialog(parent, "Pas de fichiers dans %s" % xmldir, 'ATTENTION', wx.OK | wx.ICON_WARNING)
+ dlg.CenterOnParent()
+ dlg.ShowModal()
+ dlg.Destroy()
+ else :
+ msg = '\n'.join([_("Corpus created :"), corp_out, _("Do you want to open it in IRaMuTeQ ?")])
+ dlg = wx.MessageDialog(parent, msg, _('Information'), wx.YES_NO | wx.ICON_INFORMATION | wx.STAY_ON_TOP)
+ dlg.CenterOnParent()
+ val = dlg.ShowModal()
+ if val == wx.ID_YES :
+ dlg.Destroy()
+ parent.filename = os.path.abspath(corp_out)
+ parent.OpenText()
+ else :
+ dlg.Destroy()
+ except :
+ del busy
+ BugReport(parent)
+ else :
+ self.dial.Destroy()