From 40ae5805c4286656d9c6c17e3231d0ea24cdcaa7 Mon Sep 17 00:00:00 2001 From: Pierre Date: Fri, 11 Jan 2013 09:32:07 +0100 Subject: [PATCH] tools --- configuration/corpus.cfg | 1 + corpusNG.py | 12 ------ dialog.py | 104 ++++++++++++++++++++++++++++++++++++++++++++++- iracmd.py | 10 +++-- iramuteq.py | 29 ++++++++++--- textstat.py | 4 +- 6 files changed, 137 insertions(+), 23 deletions(-) diff --git a/configuration/corpus.cfg b/configuration/corpus.cfg index eabc42d..190e613 100644 --- a/configuration/corpus.cfg +++ b/configuration/corpus.cfg @@ -23,3 +23,4 @@ apos = 1 tiret = 1 firstclean = 1 charact = 1 +lem = 1 diff --git a/corpusNG.py b/corpusNG.py index fa13a8b..e043707 100644 --- a/corpusNG.py +++ b/corpusNG.py @@ -196,18 +196,6 @@ class Corpus : res = self.getalluces() return [len(uce[1].split()) for uce in res] -# def getlemseff(self) : -# if self.idformes is None : -# self.make_idformes() -# return dict([[lem, sum([self.idformes[forme].freq for forme in self.lems[lem]])] for lem in self.lems]) - -# def getlemsefftype(self) : -# if self.idformes is None : -# self.make_idformes() -# if self.lems is None : -# self.make_lems() -# return dict([[lem, [sum([self.idformes[forme].freq for forme in self.lems[lem]]), '', self.idformes[self.lems[lem].keys()[0]].gram]] for lem in self.lems]) - def getconcorde(self, uces) : return self.cuces.execute('select * from uces where id IN (%s);' % ', '.join([`i` for i in uces])) diff --git a/dialog.py b/dialog.py index 60eb10d..3a6917a 100755 --- a/dialog.py +++ b/dialog.py @@ -2457,7 +2457,7 @@ class CorpusPref ( wx.Dialog ): class message(wx.Dialog): def __init__(self, parent, title, size, save = True): wx.Dialog.__init__ ( self, parent, id = wx.ID_ANY, title = title, pos = wx.DefaultPosition, size = size, style = wx.DEFAULT_DIALOG_STYLE ) - self.save = save + self.save = save self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize ) self.html = "" @@ -2506,3 +2506,105 @@ class message(wx.Dialog): def OnCloseWindow(self, event): self.Destroy() + +class ExtractDialog ( wx.Dialog ): + + def __init__( self, parent, option ): + wx.Dialog.__init__ ( self, parent, id = wx.ID_ANY, title = wx.EmptyString, pos = wx.DefaultPosition, size = wx.DefaultSize, style = wx.DEFAULT_DIALOG_STYLE ) + + self.option = option + + self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize ) + + fgSizer1 = wx.FlexGridSizer( 0, 2, 0, 0 ) + fgSizer1.SetFlexibleDirection( wx.BOTH ) + fgSizer1.SetNonFlexibleGrowMode( wx.FLEX_GROWMODE_SPECIFIED ) + + self.m_staticText1 = wx.StaticText( self, wx.ID_ANY, u"Corpus", wx.DefaultPosition, wx.DefaultSize, 0 ) + self.m_staticText1.Wrap( -1 ) + fgSizer1.Add( self.m_staticText1, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALIGN_LEFT|wx.ALL, 5 ) + + self.corpusfile = wx.FilePickerCtrl( self, wx.ID_ANY, wx.EmptyString, u"Select a file", u"*.txt", wx.DefaultPosition, wx.Size( -1,-1 ), wx.FLP_DEFAULT_STYLE|wx.FLP_FILE_MUST_EXIST|wx.FLP_OPEN ) + self.corpusfile.SetMinSize( wx.Size( 500,-1 ) ) + + fgSizer1.Add( self.corpusfile, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL|wx.EXPAND, 5 ) + + self.m_staticText2 = wx.StaticText( self, wx.ID_ANY, u"Encodage", wx.DefaultPosition, wx.DefaultSize, 0 ) + self.m_staticText2.Wrap( -1 ) + fgSizer1.Add( self.m_staticText2, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL, 5 ) + + encodageChoices = [' - '.join(encodage) for encodage in encodages] + self.encodage = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, encodageChoices, 0 ) + self.encodage.SetSelection( 0 ) + self.encodage.SetMinSize( wx.Size( 200,-1 ) ) + + fgSizer1.Add( self.encodage, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL|wx.EXPAND, 5 ) + + if option == 'splitvar' : + self.m_staticText3 = wx.StaticText( self, wx.ID_ANY, u"Variable (avec * mais sans le _ )", wx.DefaultPosition, wx.DefaultSize, 0 ) + self.m_staticText3.Wrap( -1 ) + fgSizer1.Add( self.m_staticText3, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL, 5 ) + + self.txtvar = wx.TextCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.DefaultSize, 0 ) + self.txtvar.SetMinSize( wx.Size( 200,-1 ) ) + + fgSizer1.Add( self.txtvar, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL|wx.EXPAND, 5 ) + + if option == 'mods' : + self.m_staticText4 = wx.StaticText( self, wx.ID_ANY, u"Modalités (une par ligne, avec * )", wx.DefaultPosition, wx.DefaultSize, 0 ) + self.m_staticText4.Wrap( -1 ) + fgSizer1.Add( self.m_staticText4, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL, 5 ) + + self.txtmods = wx.TextCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.DefaultSize, wx.TE_MULTILINE ) + self.txtmods.SetMinSize( wx.Size( 200,150 ) ) + + fgSizer1.Add( self.txtmods, 0, wx.ALL|wx.EXPAND, 5 ) + + self.m_staticText5 = wx.StaticText( self, wx.ID_ANY, u"Format de l'extraction", wx.DefaultPosition, wx.DefaultSize, 0 ) + self.m_staticText5.Wrap( -1 ) + fgSizer1.Add( self.m_staticText5, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL, 5 ) + + extractformatChoices = [ u"Un seul fichier", u"Un fichier par modalité" ] + self.extractformat = wx.RadioBox( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.DefaultSize, extractformatChoices, 1, wx.RA_SPECIFY_COLS ) + self.extractformat.SetSelection( 0 ) + fgSizer1.Add( self.extractformat, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL, 5 ) + + + fgSizer1.AddSpacer( ( 0, 0), 1, wx.EXPAND, 5 ) + + m_sdbSizer1 = wx.StdDialogButtonSizer() + self.m_sdbSizer1OK = wx.Button( self, wx.ID_OK ) + m_sdbSizer1.AddButton( self.m_sdbSizer1OK ) + self.m_sdbSizer1Cancel = wx.Button( self, wx.ID_CANCEL ) + m_sdbSizer1.AddButton( self.m_sdbSizer1Cancel ) + m_sdbSizer1.Realize(); + + fgSizer1.Add( m_sdbSizer1, 1, wx.EXPAND, 5 ) + + + self.SetSizer( fgSizer1 ) + self.Layout() + fgSizer1.Fit( self ) + + self.Centre( wx.BOTH ) + + def make_param(self) : + parametres = {} + le = [enc[0].lower() for enc in encodages] + parametres['filein'] = self.corpusfile.GetPath() + encodage = le[self.encodage.GetSelection()] + parametres['encodein'] = encodage + if self.option == 'splitvar' : + parametres['var'] = self.txtvar.GetValue() + if self.option == 'mods' : + parametres['mods'] = self.txtmods.GetValue().splitlines() + if self.extractformat.GetSelection() == 0 : + parametres['onefile'] = True + else : + parametres['onefile'] = False + parametres['encodeout'] = locale.getpreferredencoding() + print parametres + return parametres + + def __del__( self ): + pass diff --git a/iracmd.py b/iracmd.py index e0fd3da..abf8ce2 100644 --- a/iracmd.py +++ b/iracmd.py @@ -20,6 +20,7 @@ from ConfigParser import * #from textafcuci import AfcUci from textaslexico import Lexico from textstat import Stat +from tools import SubCorpus import tempfile ###################################### import logging @@ -56,7 +57,7 @@ class CmdLine : self.RPath = self.PathPath.get('PATHS', 'rpath') self.pref = RawConfigParser() self.pref.read(self.ConfigPath['preferences']) - self.history = History(self.ConfigPath['history']) + self.history = History(os.path.join(UserConfigPath, 'history.db')) parser = OptionParser() @@ -124,14 +125,17 @@ class CmdLine : #with codecs.open(self.filename, 'r', self.corpus_encodage) as f: elif options.read : corpus = Corpus(self, parametres = DoConf(options.read).getoptions('corpus'), read = options.read) - corpus.parametres['path'] = os.path.abspath(options.read) + corpus.parametres['pathout'] = os.path.dirname(os.path.abspath(options.read)) pathout = os.path.dirname(os.path.dirname(os.path.abspath(options.read))) self.corpus = corpus if corpus is not None : corpus.conn_all() + corpus = SubCorpus(self, corpus, [0,1,2,3,4,5,6,7]) + corpus.conn_all() corpus.make_lems() corpus.parse_active(gramact, gramsup) + print corpus.getlemconcorde('de').fetchall() # log.warning('ATTENTION gethapaxuces') # MakeUciStat(corpus) # qfqsdf @@ -159,7 +163,7 @@ class CmdLine : elif options.type_analyse == 'afcuci' : self.Text = AfcUci(self, cmd = True) elif options.type_analyse == 'stat' : - self.Text = Stat(self, corpus, parametres = config) + self.Text = Stat(self, corpus, parametres = {'type':'stat'}) elif options.type_analyse == 'spec' : self.Text = Lexico(self, corpus, config = {'type' : 'spec'}) #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's' diff --git a/iramuteq.py b/iramuteq.py index 479278d..d729c40 100644 --- a/iramuteq.py +++ b/iramuteq.py @@ -64,6 +64,7 @@ from sheet import MySheet from checkinstall import CreateIraDirectory, CheckRPath, FindRPAthWin32, FindRPathNix, CheckRPackages, IsNew, UpgradeConf, CopyConf, RLibsAreInstalled from chemins import ConstructRscriptsPath, ConstructConfigPath, ConstructDicoPath, ConstructGlobalPath, PathOut from parse_factiva_xml import ImportFactiva +from tools import Extract from tree import LeftTree ########################################################## @@ -196,13 +197,20 @@ class IraFrame(wx.Frame): menuFactiva.AppendItem(fact_from_xml) menuFactiva.AppendItem(fact_from_mail) menuFactiva.AppendItem(fact_from_txt) - file_menu.AppendMenu(-1, _(u"Import from factiva"), menuFactiva) - #item1.Enable(True) - + + menuTools = wx.Menu() + splitvar = wx.MenuItem(menuTools, wx.ID_ANY, _(u"Split from variable")) + extractmod = wx.MenuItem(menuTools, wx.ID_ANY, _(u"Extract mods")) + menuTools.AppendItem(splitvar) + menuTools.AppendItem(extractmod) + self.ID_splitvar = splitvar.GetId() + self.ID_extractmod = extractmod.GetId() + file_menu.AppendMenu(-1, _(u"Tools"), menuTools) + + item = wx.MenuItem(file_menu, ID_SaveTab, _(u"Save tab as..."), _(u"Save tab as...")) item.SetBitmap(wx.ArtProvider_GetBitmap(wx.ART_FILE_SAVE_AS)) - #item.Enable(False) file_menu.AppendItem(item) file_menu.Append(wx.ID_EXIT, _(u"Exit")) @@ -343,6 +351,8 @@ class IraFrame(wx.Frame): self.Bind(wx.EVT_MENU, self.import_factiva_xml, fact_from_xml) self.Bind(wx.EVT_MENU, self.import_factiva_mail, fact_from_mail) self.Bind(wx.EVT_MENU, self.import_factiva_txt, fact_from_txt) + self.Bind(wx.EVT_MENU, self.ExtractTools, splitvar) + self.Bind(wx.EVT_MENU, self.ExtractTools, extractmod) self.Bind(wx.EVT_MENU, self.OnFreq, id=ID_Freq) self.Bind(wx.EVT_MENU, self.OnChi2, id=ID_Chi2) self.Bind(wx.EVT_MENU, self.OnStudent, id=ID_Student) @@ -475,7 +485,9 @@ vous devez signaler le chemin de l'éxecutable de R dans les préférences.""" #Disable SaveTabAs file_menu = self.mb.GetMenu(0) items = file_menu.GetMenuItems() - items[4].Enable(DISEN) + for item in items : + if item.GetId() == ID_SaveTab : + item.Enable(DISEN) def ShowMenu(self, menu, Show=True): menu_pos = self.mb.FindMenu(menu) @@ -914,6 +926,13 @@ Voulez-vous fermer quand même ?""" except : BugReport(self) + def ExtractTools(self, evt) : + ID = evt.GetId() + if ID == self.ID_splitvar : + Extract(self, 'splitvar') + else : + Extract(self, 'mods') + def OnTextAlceste(self, event, corpus = None): try: #print('ATTENTION : PLUS DE BUG SUR ALCESTE') diff --git a/textstat.py b/textstat.py index bf28af1..d14522f 100644 --- a/textstat.py +++ b/textstat.py @@ -124,8 +124,8 @@ class Stat(AnalyseText) : else : with open(self.pathout['%s.txt' % 'glob'], 'w') as f : f.write(self.result['glob'].encode(self.parent.syscoding)) - self.parametres['pathout'] = self.pathout['Analyse.ira'] - DoConf().makeoptions(['stat'],[self.parametres], self.pathout['Analyse.ira']) + #self.parametres['pathout'] = self.pathout['Analyse.ira'] + #DoConf().makeoptions(['stat'],[self.parametres], self.pathout['Analyse.ira']) #class Stat(): -- 2.7.4