From 6919f2ef8d85c176c7be824b606c4b71142e10fd Mon Sep 17 00:00:00 2001 From: Pierre Ratinaud Date: Sat, 19 Jul 2014 01:27:46 +0200 Subject: [PATCH] matrix --- iramuteq.py | 274 +++++++++++++++++------------- layout.py | 109 ++++++++++-- listlex.py | 94 ++++++++--- openanalyse.py | 76 ++++++--- parse_factiva_txt.py | 47 +++++- tabchdalc.py | 136 +++++++-------- tabchddist.py | 20 +-- tabchi2.py | 459 +++++++++++++++++++++++++++++++++++++++++++-------- tabfrequence.py | 196 +++++++++------------- tableau.py | 132 +++++++++------ tabrsimple.py | 2 +- tabsimi.py | 154 +++++++---------- tabstudent.py | 2 +- tabverges.py | 21 +-- textafcuci.py | 2 +- textaslexico.py | 39 ++++- 16 files changed, 1148 insertions(+), 615 deletions(-) diff --git a/iramuteq.py b/iramuteq.py index 5d64a17..69e0bda 100644 --- a/iramuteq.py +++ b/iramuteq.py @@ -48,17 +48,17 @@ from tabsimi import DoSimi from tabrsimple import InputText from tabverges import Prototypical #from textafcuci import AfcUci -from analysetxt import Alceste from textdist import AnalysePam from textstat import Stat from textaslexico import Lexico from textsimi import SimiTxt, SimiFromCluster from textwordcloud import WordCloud, ClusterCloud +from textreinert import Reinert #from profile_segment import ProfileSegment #from textcheckcorpus import checkcorpus from openanalyse import OpenAnalyse -from corpus import Builder -from sheet import MySheet +from corpus import Builder, SubBuilder +#from sheet import MySheet from checkinstall import CreateIraDirectory, CheckRPath, FindRPAthWin32, FindRPathNix, CheckRPackages, IsNew, UpgradeConf, CopyConf, RLibsAreInstalled from chemins import RscriptsPath, ConstructConfigPath, ConstructDicoPath, ConstructGlobalPath, PathOut from parse_factiva_xml import ImportFactiva @@ -74,11 +74,11 @@ ID_Freq = wx.NewId() ID_Chi2 = wx.NewId() ID_Student = wx.NewId() ID_CHDSIM = wx.NewId() -ID_CHDAlceste = wx.NewId() +ID_CHDReinert = wx.NewId() ID_TEXTAFCM = wx.NewId() ID_TEXTSTAT = wx.NewId() ID_ASLEX = wx.NewId() -ID_TEXTALCESTE = wx.NewId() +ID_TEXTREINERT = wx.NewId() ID_TEXTPAM = wx.NewId() ID_CHECKCORPUS = wx.NewId() ID_Tabcontent = wx.NewId() @@ -216,16 +216,19 @@ class IraFrame(wx.Frame): menuTools = wx.Menu() splitvar = wx.MenuItem(menuTools, wx.ID_ANY, _(u"Split from variable").decode('utf8')) extractmod = wx.MenuItem(menuTools, wx.ID_ANY, _(u"Extract mods").decode('utf8')) + extractthem = wx.MenuItem(menuTools, wx.ID_ANY, _(u"Extract thematics").decode('utf8')) menuTools.AppendItem(splitvar) menuTools.AppendItem(extractmod) + menuTools.AppendItem(extractthem) self.ID_splitvar = splitvar.GetId() self.ID_extractmod = extractmod.GetId() + self.ID_extractthem = extractthem.GetId() file_menu.AppendMenu(-1, _(u"Tools"), menuTools) - item = wx.MenuItem(file_menu, ID_SaveTab, _(u"Save tab as...").decode('utf8'), _(u"Save tab as...").decode('utf8')) - item.SetBitmap(wx.ArtProvider_GetBitmap(wx.ART_FILE_SAVE_AS)) - file_menu.AppendItem(item) + #item = wx.MenuItem(file_menu, ID_SaveTab, _(u"Save tab as...").decode('utf8'), _(u"Save tab as...").decode('utf8')) + #item.SetBitmap(wx.ArtProvider_GetBitmap(wx.ART_FILE_SAVE_AS)) + #file_menu.AppendItem(item) file_menu.Append(wx.ID_EXIT, _(u"Exit").decode('utf8')) @@ -243,7 +246,7 @@ class IraFrame(wx.Frame): analyse_menu.Append(ID_Chi2, _(u"Chi2").decode('utf8')) #analyse_menu.Append(ID_Student, u"t de Student") menu_classif = wx.Menu() - menu_classif.Append(ID_CHDAlceste, _(u"Reinert Method").decode('utf8')) + menu_classif.Append(ID_CHDReinert, _(u"Reinert's Method").decode('utf8')) #menu_classif.Append(ID_CHDSIM, u"Par matrice des distances") analyse_menu.AppendMenu(-1, _(u"Clustering").decode('utf8'), menu_classif) #analyse_menu.Append(ID_AFCM, u"AFCM") @@ -258,7 +261,7 @@ class IraFrame(wx.Frame): text_menu.Append(ID_ASLEX, _(u"Specificities and CA").decode('utf8')) #text_menu.Append(ID_TEXTAFCM, u"AFC sur UCI / Vocabulaire") menu_classiftxt = wx.Menu() - menu_classiftxt.Append(ID_TEXTALCESTE, _(u"Reinert Method").decode('utf8')) + menu_classiftxt.Append(ID_TEXTREINERT, _(u"Reinert's Method").decode('utf8')) #menu_classiftxt.Append(ID_TEXTPAM, u"Par matrice des distances") text_menu.AppendMenu(-1, _(u"Clustering").decode('utf8'), menu_classiftxt) text_menu.Append(ID_SimiTxt, _(u"Similarities Analysis").decode('utf8')) @@ -333,9 +336,9 @@ class IraFrame(wx.Frame): self._mgr.AddPane(self.nb, aui.AuiPaneInfo(). Name("Tab_content"). CenterPane()) - self.Sheet = MySheet(self) + #self._mgr.AddPane(self.Sheet, wx.aui.AuiPaneInfo().Name("Data").CenterPane()) - self._mgr.AddPane(self.Sheet, aui.AuiPaneInfo().Name("Data").CenterPane()) + #self._mgr.AddPane(self.Sheet, aui.AuiPaneInfo().Name("Data").CenterPane()) self.nb.Bind(aui.EVT_AUINOTEBOOK_PAGE_CLOSE, self.OnCloseTab) self.nb.Bind(aui.EVT_AUINOTEBOOK_PAGE_CHANGED, self.OnPageChanged) # add the toolbars to the manager @@ -368,11 +371,12 @@ class IraFrame(wx.Frame): self.Bind(wx.EVT_MENU, self.import_factiva_txt, fact_from_txt) self.Bind(wx.EVT_MENU, self.ExtractTools, splitvar) self.Bind(wx.EVT_MENU, self.ExtractTools, extractmod) + self.Bind(wx.EVT_MENU, self.ExtractTools, extractthem) self.Bind(wx.EVT_MENU, self.OnFreq, id=ID_Freq) self.Bind(wx.EVT_MENU, self.OnChi2, id=ID_Chi2) self.Bind(wx.EVT_MENU, self.OnStudent, id=ID_Student) self.Bind(wx.EVT_MENU, self.OnCHDSIM, id=ID_CHDSIM) - self.Bind(wx.EVT_MENU, self.OnCHDAlceste, id=ID_CHDAlceste) + self.Bind(wx.EVT_MENU, self.OnCHDReinert, id=ID_CHDReinert) self.Bind(wx.EVT_MENU, self.OnAFCM, id=ID_AFCM) self.Bind(wx.EVT_MENU, self.OnProto, id=ID_proto) self.Bind(wx.EVT_MENU, self.OnRCode, id=ID_RCODE) @@ -380,13 +384,13 @@ class IraFrame(wx.Frame): self.Bind(wx.EVT_MENU, self.OnTextStat, id=ID_TEXTSTAT) self.Bind(wx.EVT_MENU, self.OnTextSpec, id=ID_ASLEX) self.Bind(wx.EVT_MENU, self.OnTextAfcm, id=ID_TEXTAFCM) - self.Bind(wx.EVT_MENU, self.OnTextAlceste, id=ID_TEXTALCESTE) + self.Bind(wx.EVT_MENU, self.OnTextReinert, id=ID_TEXTREINERT) self.Bind(wx.EVT_MENU, self.OnPamSimple, id=ID_TEXTPAM) self.Bind(wx.EVT_MENU, self.OnSimiTxt, id=ID_SimiTxt) self.Bind(wx.EVT_MENU, self.OnWordCloud, id=ID_WC) - self.Bind(wx.EVT_MENU, self.OnSimi, id=ID_SIMI) + self.Bind(wx.EVT_MENU, self.OnSimiTab, id=ID_SIMI) self.Bind(wx.EVT_MENU, self.OnExit, id=wx.ID_EXIT) - self.Bind(wx.EVT_MENU, self.OnSaveTabAs, id=ID_SaveTab) + #self.Bind(wx.EVT_MENU, self.OnSaveTabAs, id=ID_SaveTab) self.Bind(wx.EVT_MENU, self.OnAbout, id=wx.ID_ABOUT) self.Bind(wx.EVT_MENU, self.OnHelp, id=wx.ID_HELP) self.Bind(wx.EVT_MENU, self.OnPref, id=wx.ID_PREFERENCES) @@ -577,11 +581,14 @@ vous devez signaler le chemin de l'éxecutable de R dans les préférences.""" def OnOpenData(self, event): inputname, self.input_path = OnOpen(self, "Data") if inputname: - self.filename = self.input_path[0] + #filename = self.input_path[0] self.tableau = Tableau(self,os.path.abspath(self.input_path[0])) - get_table_param(self, self.input_path[0]) - self.tableau.make_content() - self.tableau.show_tab() + val = get_table_param(self, self.input_path[0]) + if val == wx.ID_OK : + self.tableau.make_content() + OpenAnalyse(self, self.tableau.parametres) + self.tree.OnItemAppend(self.tableau.parametres) + #self.tableau.show_tab() def OnOpenAnalyse(self, event): self.AnalysePath = OnOpen(self, "Analyse") @@ -612,7 +619,18 @@ vous devez signaler le chemin de l'éxecutable de R dans les préférences.""" self.DataPop = False self.ShowAPane(u"Text") self._mgr.Update() - + + def OnSubText(self, corpus, parametres = None): + busy = wx.BusyInfo(_("Please wait...").decode('utf8'), self) + wx.SafeYield() + builder = SubBuilder(self, corpus, parametres) + del busy + if builder.res == wx.ID_OK : + corpus = builder.doanalyse() + self.history.add(corpus.parametres) + self.tree.OnItemAppend(corpus.parametres) + OpenAnalyse(self, corpus.parametres) + def OpenText(self): dlg = wx.ProgressDialog("Ouverture...", "Veuillez patienter...", @@ -695,6 +713,12 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, États-Unis.""" npage = self.nb.GetPage(new) if 'parametres' in dir(npage) : self.tree.GiveFocus(uuid=npage.parametres['uuid']) + if npage.parametres.get('matrix', False) : + self.ShowMenu(_(u"Text analysis").decode('utf8'), False) + self.ShowMenu(_(u"Matrix analysis").decode('utf8'), True) + elif npage.parametres.get('corpus', False) : + self.ShowMenu(_(u"Text analysis").decode('utf8')) + self.ShowMenu(_(u"Matrix analysis").decode('utf8'), False) def OnCloseTab(self, evt): #log.info('Closing tab %s' % str(evt.GetEventObject())) @@ -708,40 +732,40 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, États-Unis.""" self.history.rmtab(page.parametres) self.tree.CloseItem(uuid = page.parametres['uuid']) TabTitle = self.nb.GetPageText(self.nb.GetSelection()) - if self.DictTab != {} : - if TabTitle in self.DictTab : - ListFile=self.DictTab[TabTitle] - if False in ListFile: - msg = u""" -Certains résultats ne sont pas enregistrer. -Voulez-vous fermer quand même ?""" - dlg = wx.MessageDialog(self, msg, "Sauvegarde",wx.YES_NO | wx.NO_DEFAULT | wx.ICON_QUESTION) - - dlg.CenterOnParent() - if dlg.ShowModal() in [wx.ID_NO, wx.ID_CANCEL]: - remove = False - evt.Veto() - dlg.Destroy() - else : - for f in ListFile[1:] : - print 'remove', f - os.remove(f) - remove = True - dlg.Destroy() - elif True in ListFile : - remove = True - if remove: - del self.DictTab[TabTitle] - else : - self.LastTabClose() - else : - remove = True - if self.nb.GetPageCount() == 1 and remove and not notebook : +# if self.DictTab != {} : +# if TabTitle in self.DictTab : +# ListFile=self.DictTab[TabTitle] +# if False in ListFile: +# msg = u""" +# Certains résultats ne sont pas enregistrer. +# Voulez-vous fermer quand même ?""" +# dlg = wx.MessageDialog(self, msg, "Sauvegarde",wx.YES_NO | wx.NO_DEFAULT | wx.ICON_QUESTION) +# +# dlg.CenterOnParent() +# if dlg.ShowModal() in [wx.ID_NO, wx.ID_CANCEL]: +# remove = False +# evt.Veto() +# dlg.Destroy() +# else : +# for f in ListFile[1:] : +# print 'remove', f +# os.remove(f) +# remove = True +# dlg.Destroy() +# elif True in ListFile : +# remove = True +# if remove: +# del self.DictTab[TabTitle] +# else : +# self.LastTabClose() +# else : +# remove = True + if self.nb.GetPageCount() == 1 and not notebook : self.LastTabClose() def LastTabClose(self) : if self.nb.GetPageCount() == 1 : - self.DisEnSaveTabAs(False) + #self.DisEnSaveTabAs(False) if self.DataTxt : self.ShowAPane("Text") elif self.DataPop : @@ -749,39 +773,39 @@ Voulez-vous fermer quand même ?""" else : self.ShowAPane("Intro_Text") - def OnSaveTabAs(self, event): - SelectTab = self.nb.GetSelection() - TabTitle = self.nb.GetPageText(SelectTab) - FileToSave = self.DictTab[TabTitle] - NewListFile = [] - dlg = wx.FileDialog( - self, message="Enregistrer sous...", defaultDir=os.getcwd(), - defaultFile="resultat.html", wildcard="Tous les fichiers|*", style=wx.SAVE | wx.OVERWRITE_PROMPT - ) - dlg.SetFilterIndex(2) - dlg.CenterOnParent() - - if dlg.ShowModal() == wx.ID_OK: - Path = dlg.GetPath() - Dirname = os.path.dirname(Path) - Filename = dlg.GetFilename() - else : - Path = False - dlg.Destroy() - if Path: - shutil.copyfile(FileToSave[-1], Path) - os.remove(FileToSave[len(FileToSave) - 1]) - NewListFile.append(True) - NewListFile.append(Path) - for f in FileToSave[1:-1] : - Fileout = os.path.join(Dirname, os.path.basename(f)) - shutil.copyfile(f, Fileout) - NewListFile.append(Fileout) - os.remove(f) - TabText = Filename - self.DictTab[TabText] = NewListFile - del self.DictTab[TabTitle] - self.nb.SetPageText(SelectTab, TabText) +# def OnSaveTabAs(self, event): +# SelectTab = self.nb.GetSelection() +# TabTitle = self.nb.GetPageText(SelectTab) +# FileToSave = self.DictTab[TabTitle] +# NewListFile = [] +# dlg = wx.FileDialog( +# self, message="Enregistrer sous...", defaultDir=os.getcwd(), +# defaultFile="resultat.html", wildcard="Tous les fichiers|*", style=wx.SAVE | wx.OVERWRITE_PROMPT +# ) +# dlg.SetFilterIndex(2) +# dlg.CenterOnParent() +# +# if dlg.ShowModal() == wx.ID_OK: +# Path = dlg.GetPath() +# Dirname = os.path.dirname(Path) +# Filename = dlg.GetFilename() +# else : +# Path = False +# dlg.Destroy() +# if Path: +# shutil.copyfile(FileToSave[-1], Path) +# os.remove(FileToSave[len(FileToSave) - 1]) +# NewListFile.append(True) +# NewListFile.append(Path) +# for f in FileToSave[1:-1] : +# Fileout = os.path.join(Dirname, os.path.basename(f)) +# shutil.copyfile(f, Fileout) +# NewListFile.append(Fileout) +# os.remove(f) +# TabText = Filename +# self.DictTab[TabText] = NewListFile +# del self.DictTab[TabTitle] +# self.nb.SetPageText(SelectTab, TabText) def GetStartPosition(self): @@ -815,20 +839,42 @@ Voulez-vous fermer quand même ?""" ################################################################ #debut des analyses ################################################################ - - def OnFreq(self, event): + def analyse_matrix(self, evt, analyse, analyse_type = '', matrix = None, dlgnb = 1): + if matrix is None : + matrix = self.tree.getmatrix() + #try : + analyse(self, matrix, parametres = {'type' : analyse_type}, dlg = progressbar(self, dlgnb)) + #except: + # BugReport(self) + + def OnFreq(self, event, matrix = None): + self.analyse_matrix(event, Frequences, analyse_type = 'freq', matrix = matrix, dlgnb = 3) + #if matrix is None : + # matrix = self.tree.getmatrix() + #try: + # Frequences(self, matrix, parametres = {'type' : 'freq'}, dlg = progressbar(self, 3)) + #except: + # BugReport(self) + + def OnChi2(self, event, matrix = None): + #try: + self.analyse_matrix(event, ChiSquare, matrix = matrix, analyse_type = 'chi2', dlgnb = 3) + #except: + # BugReport(self) + + def OnSimiTab(self, event, matrix = None): + if matrix is None : + matrix = self.tree.getmatrix() try: - Frequences(self) + DoSimi(self, matrix, parametres = {'type' : 'simimatrix'}, dlg = progressbar(self, 3)) except: BugReport(self) + + def OnCHDReinert(self, event, matrix = None): + if matrix is None : + matrix = self.tree.getmatrix() + AnalyseQuest(self, matrix, parametres = {'type' : 'reinertmatrix'}, dlg = progressbar(self, 3)) - def OnChi2(self, event): - try: - # print('PAS DE DEBUG SUR CHI2') - chi = ChiSquare(self) - except: - BugReport(self) - def OnStudent(self, event): try: MakeStudent(self) @@ -850,14 +896,14 @@ Voulez-vous fermer quand même ?""" except: BugReport(self) - def OnCHDAlceste(self, event): - try: - # print('PLUS DE BUG SUR ALCESTE QUESTIONNAIRE') - self.quest = AnalyseQuest(self) - if self.quest.val == wx.ID_OK: - PlaySound(self) - except: - BugReport(self) +# def OnCHDReinert(self, event): +# try: +# # print('PLUS DE BUG SUR ALCESTE QUESTIONNAIRE') +# self.quest = AnalyseQuest(self) +# if self.quest.val == wx.ID_OK: +# PlaySound(self) +# except: +# BugReport(self) def OnProto(self, evt) : Prototypical(self, {'type' : 'proto'}) @@ -953,16 +999,18 @@ Voulez-vous fermer quand même ?""" ID = evt.GetId() if ID == self.ID_splitvar : Extract(self, 'splitvar') - else : + elif ID == self.ID_extractmod : Extract(self, 'mods') + elif ID == self.ID_extractthem : + Extract(self, 'them') - def OnTextAlceste(self, event, corpus = None): + def OnTextReinert(self, event, corpus = None): try: #print('ATTENTION : PLUS DE BUG SUR ALCESTE') #RunAnalyse(self, corpus, Alceste, OptAlceste) if corpus is None : corpus = self.tree.getcorpus() - self.Text = Alceste(self, corpus, parametres = {'type': 'alceste'}, dlg = progressbar(self,6)) + self.Text = Reinert(self, corpus, parametres = {'type': 'alceste'}, dlg = progressbar(self,6)) if self.Text.val == wx.ID_OK: PlaySound(self) except: @@ -979,18 +1027,18 @@ Voulez-vous fermer quand même ?""" BugReport(self) def SimiCluster(self, parametres = {}, fromprof = False, pathout = '', listactives = [], actives = [], tableau = None) : - DoSimi(self, param = parametres, fromprof = fromprof, pathout = pathout, listactives = listactives, actives = actives, tableau = tableau) + DoSimi(self, param = parametres, fromprof = fromprof, listactives = listactives, actives = actives, tableau = tableau) - def OnSimi(self,evt): - try : +# def OnSimi(self,evt): +# try : #print 'ATTENTION !!!! VERGES' #print 'PLUS DE BUG SUR SIMI' - self.res = DoSimi(self, param = None) +# self.res = DoSimi(self, param = None) #self.res = Verges(self) - if self.res.val == wx.ID_OK : - PlaySound(self) - except : - BugReport(self) +# if self.res.val == wx.ID_OK : +# PlaySound(self) +# except : +# BugReport(self) ################################################################# def OnHelp(self, event): diff --git a/layout.py b/layout.py index 96aaaf6..2c3f41c 100644 --- a/layout.py +++ b/layout.py @@ -23,6 +23,7 @@ from dialog import PrefGraph, PrefExport, PrefSimpleFile, PrefDendro, SimpleDial from guifunct import SelectColumn, PrepSimi, PrefSimi from webexport import WebExport from corpus import Corpus +from sheet import MySheet import datetime import sys import tempfile @@ -361,7 +362,9 @@ class OpenCHDS(): self.corpus.make_ucecl_from_R(self.pathout['uce']) corpname = self.corpus.parametres['corpus_name'] else : - corpname = self.corpus.parametres['name'] + corpname = self.corpus.parametres['matrix_name'] + if os.path.exists(self.pathout['analyse.db']) : + self.corpus.read_tableau(self.pathout['analyse.db']) clnb = parametres['clnb'] dlg = progressbar(self, maxi = 4 + clnb) @@ -629,7 +632,27 @@ class SashList(wx.Panel) : self.rightwin1 = rightwin1 winids.append(rightwin1.GetId()) - +class TgenLayout : + def __init__(self, page): + self.page = page + parametres = self.page.parametres + ira = wx.GetApp().GetTopWindow() + self.page.tgens, etoiles = ReadList(parametres['tgenspec'], ira.syscoding, sep="\t") + tgentab = False + for i in range(page.GetPageCount()) : + tab = page.GetPage(i) + if 'tgen' in dir(tab) : + if tab.tgen : + tgentab = tab + break + if tgentab : + self.page.tgentab.RefreshData(self.page.tgens) + self.page.SetSelection(i) + else : + self.page.tgentab = ListForSpec(ira, None, self.page.tgens, etoiles[1:]) + self.page.tgentab.tgen = True + self.page.AddPage(self.page.tgentab, u'Tgens Specificities') + self.page.SetSelection(self.page.GetPageCount() - 1) class dolexlayout : def __init__(self, ira, corpus, parametres): @@ -649,21 +672,21 @@ class dolexlayout : self.DictEffType, firstefft = ReadList(self.dictpathout['tabletypem'], self.corpus.parametres['syscoding']) self.DictEffRelForme, firsteffrelf = ReadList(self.dictpathout['eff_relatif_forme'], self.corpus.parametres['syscoding']) self.DictEffRelType, firsteffrelt = ReadList(self.dictpathout['eff_relatif_type'], self.corpus.parametres['syscoding']) - + self.etoiles = firsteff[1:] #sash = SashList(ira.nb) self.TabStat = aui.AuiNotebook(ira.nb, -1, wx.DefaultPosition) self.TabStat.parametres = parametres - self.ListPan = ListForSpec(ira, self, self.DictSpec, first) + self.ListPan = ListForSpec(ira, self, self.DictSpec, self.etoiles) if os.path.exists(self.pathout['banalites.csv']) : - self.listban = ListForSpec(ira, self, self.dictban, firstban) + self.listban = ListForSpec(ira, self, self.dictban, ['eff'] + self.etoiles) #self.ListPan2 = ListForSpec(sash.rightwin1, self, self.DictSpec, first) - self.ListPant = ListForSpec(ira, self, self.DictType, firstt) - self.ListPanEff = ListForSpec(ira, self, self.DictEff, firsteff) - self.ListPanEffType = ListForSpec(ira, self, self.DictEffType, firstefft) - self.ListPanEffRelForme = ListForSpec(ira, self, self.DictEffRelForme, firsteffrelf) - self.ListPanEffRelType = ListForSpec(ira, self.parent, self.DictEffRelType, firsteffrelt) + self.ListPant = ListForSpec(ira, self, self.DictType, self.etoiles) + self.ListPanEff = ListForSpec(ira, self, self.DictEff, self.etoiles) + self.ListPanEffType = ListForSpec(ira, self, self.DictEffType, self.etoiles) + self.ListPanEffRelForme = ListForSpec(ira, self, self.DictEffRelForme, self.etoiles) + self.ListPanEffRelType = ListForSpec(ira, self.parent, self.DictEffRelType, self.etoiles) self.TabStat.AddPage(self.ListPan, u'formes') if os.path.exists(self.pathout['banalites.csv']) : @@ -682,11 +705,20 @@ class dolexlayout : self.tabAFCTGraph = GraphPanelAfc(self.TabAFC, self.dictpathout, list_graph, self.parametres['clnb'], itempath ='liste_graph_afct', coding=self.encoding) self.TabAFC.AddPage(self.tabAFCTGraph, 'AFC type') self.TabStat.AddPage(self.TabAFC, 'AFC') + + + ira.nb.AddPage(self.TabStat, u'Spécificités') + self.ira = ira self.TabStat.corpus = self.corpus + self.TabStat.etoiles = self.etoiles + if os.path.exists(os.path.join(self.parametres['pathout'], 'tgenspec.csv')) : + self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenspec.csv') + TgenLayout(self.TabStat) + self.TabStat.SetSelection(0) ira.nb.SetSelection(self.parent.nb.GetPageCount() - 1) ira.ShowAPane("Tab_content") @@ -798,9 +830,9 @@ class GraphPanelDendro(wx.Panel): self.param['type_tclasse'] = dial.m_radioBox2.GetSelection() def make_dendro(self, dendro = 'simple') : - while os.path.exists(os.path.join(self.dirout, 'dendrogamme_' + str(self.graphnb)+'.png')) : + while os.path.exists(os.path.join(self.dirout, 'dendrogramme_' + str(self.graphnb)+'.png')) : self.graphnb += 1 - fileout = ffr(os.path.join(self.dirout,'dendrogamme_' + str(self.graphnb)+'.png')) + fileout = ffr(os.path.join(self.dirout,'dendrogramme_' + str(self.graphnb)+'.png')) width = self.param['width'] height = self.param['height'] type_dendro = self.type_dendro[self.param['type_dendro']] @@ -892,6 +924,25 @@ class OpenCorpus : ira.nb.SetSelection(ira.nb.GetPageCount() - 1) ira.ShowAPane("Tab_content") +class MatLayout : + def __init__(self, ira, matrix): + #self.parent.content = self.csvtable + self.sheet = MySheet(ira.nb) + ira.nb.AddPage(self.sheet, matrix.parametres['matrix_name']) + self.sheet.Populate(matrix.csvtable) + self.sheet.parametres = matrix.parametres + #self.ira.ShowMenu(_(u"View").decode('utf8')) + #self.ira.ShowMenu(_(u"Matrix analysis").decode('utf8')) + #self.ira.ShowMenu(_(u"Text analysis").decode('utf8'), False) + #self.parent.type = "Data" + #self.parent.DataPop = False + ira.nb.SetSelection(ira.nb.GetPageCount() - 1) + ira.ShowAPane("Tab_content") + #self.ira.OnViewData('') + + + + class CopusPanel(wx.Panel) : def __init__(self, parent, parametres) : wx.Panel.__init__ ( self, parent, id = wx.ID_ANY, pos = wx.DefaultPosition, size = wx.Size( 500,300 ), style = wx.TAB_TRAVERSAL ) @@ -1119,11 +1170,37 @@ class DefaultMatLayout : self.parent = parent self.tableau = tableau self.parametres = parametres + if os.path.exists(self.pathout['analyse.db']) : + self.tableau.read_tableau(self.pathout['analyse.db']) self.dolayout() + self.ira.nb.SetSelection(self.ira.nb.GetPageCount() - 1) + self.ira.ShowAPane("Tab_content") def dolayout(self) : pass +class FreqLayout(DefaultMatLayout) : + def dolayout(self) : + self.tab = wx.html.HtmlWindow(self.ira.nb, -1) + if "gtk2" in wx.PlatformInfo: + self.tab.SetStandardFonts() + self.tab.LoadPage(self.pathout['resultats.html']) + self.tab.parametres = self.parametres + self.ira.nb.AddPage(self.tab, u"Fréquences") + + +class Chi2Layout(DefaultMatLayout) : + def dolayout(self): + self.tab = wx.html.HtmlWindow(self.ira.nb, -1) + if "gtk2" in wx.PlatformInfo: + self.tab.SetStandardFonts() + self.tab.LoadPage(self.pathout['resultats-chi2.html']) + self.tab.parametres = self.parametres + self.ira.nb.AddPage(self.tab, ' - '.join([u"Chi2", "%s" % self.parametres['name']])) + #self.ira.nb.SetSelection(self.ira.nb.GetPageCount() - 1) + #self.ira.ShowAPane("Tab_content") + + class ProtoLayout(DefaultMatLayout) : def dolayout(self) : list_graph = [['proto.png', 'Analyse prototypique']] @@ -1137,8 +1214,8 @@ class ProtoLayout(DefaultMatLayout) : #self.Tab.corpus = self.corpus self.TabProto.parametres = self.parametres self.ira.nb.AddPage(self.TabProto, 'Analyse Prototypique - %s' % self.parametres['name']) - self.ira.nb.SetSelection(self.ira.nb.GetPageCount() - 1) - self.ira.ShowAPane("Tab_content") + #self.ira.nb.SetSelection(self.ira.nb.GetPageCount() - 1) + #self.ira.ShowAPane("Tab_content") class SimiMatLayout(DefaultMatLayout) : @@ -1159,8 +1236,8 @@ class SimiMatLayout(DefaultMatLayout) : self.tabsimi.AddPage(self.graphpan, 'Graph') self.tabsimi.parametres = self.parametres self.parent.nb.AddPage(self.tabsimi, 'Analyse de graph') - self.parent.ShowTab(True) - self.parent.nb.SetSelection(self.parent.nb.GetPageCount() - 1) + #self.parent.ShowTab(True) + #self.parent.nb.SetSelection(self.parent.nb.GetPageCount() - 1) def redosimi(self,evt) : with open(self.pathout['selected.csv'],'r') as f : diff --git a/listlex.py b/listlex.py index 409c9fc..7664614 100644 --- a/listlex.py +++ b/listlex.py @@ -26,13 +26,23 @@ from operator import itemgetter #--------------------------------------------------------------------------- class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSorterMixin): - def __init__(self, parent,gparent, dlist, first, menu = True): + def __init__(self, parent,gparent, dlist = {}, first = [], menu = True): #def __init__(self, parent) : wx.ListCtrl.__init__( self, parent, -1, style=wx.LC_REPORT|wx.LC_VIRTUAL|wx.LC_HRULES|wx.LC_VRULES) self.parent=parent self.gparent=gparent self.dlist=dlist self.first = first + self.tgen = False + if 'etoiles' in dir(self.gparent) : + self.etoiles = self.gparent.etoiles + else : + self.etoiles = [] + for val in self.first : + if val.startswith(u'X.') : + val = val.replace(u'X.', u'*') + self.etoiles.append(val) + self.menu = menu #def start(self) : @@ -54,23 +64,26 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor self.attr1.SetBackgroundColour((230, 230, 230)) self.attr2 = wx.ListItemAttr() self.attr2.SetBackgroundColour("light blue") + self.attrselected = wx.ListItemAttr() + self.attrselected.SetBackgroundColour("red") + self.selected = {} i=0 - for name in self.first : + for name in [u'formes'] + self.first : self.InsertColumn(i,name,wx.LIST_FORMAT_LEFT) i+=1 self.SetColumnWidth(0, 180) - for i in range(1,len(self.first)-1): - self.SetColumnWidth(i, self.checkcolumnwidth(len(self.first[i]) * 10)) + for i in range(0,len(self.first)): + self.SetColumnWidth(i + 1, self.checkcolumnwidth(len(self.first[i]) * 10)) self.itemDataMap = self.dlist self.itemIndexMap = self.dlist.keys() self.SetItemCount(len(self.dlist)) #listmix.ListCtrlAutoWidthMixin.__init__(self) - listmix.ColumnSorterMixin.__init__(self, len(self.first)) + listmix.ColumnSorterMixin.__init__(self, len(self.first) + 1) self.SortListItems(1, 0) #----------------------------------------------------------------------------------------- @@ -84,6 +97,11 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor self.Bind(wx.EVT_RIGHT_UP, self.OnRightClick) #----------------------------------------------------------------------------------------- + def RefreshData(self, data): + self.itemDataMap = data + self.itemIndexMap = data.keys() + self.SetItemCount(len(data)) + self.Refresh() def checkcolumnwidth(self, width) : if width < 80 : @@ -97,11 +115,16 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor return s def OnGetItemAttr(self, item): - if item % 2 : - return self.attr1 + if self.getColumnText(item, 0) not in self.selected : + if item % 2 : + return self.attr1 + else : + return self.attr2 else : - return self.attr2 - + return self.attrselected + + def GetItemByWord(self, word): + return [ val for val in self.dlist if self.dlist[val][0] == word ][0] # Used by the ColumnSorterMixin, see wx/lib/mixins/listctrl.py def GetListCtrl(self): @@ -120,6 +143,12 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor self.Select(item) event.Skip() + + def GetString(self, evt): + return self.getselectedwords()[0] + + def GetSelections(self): + return self.getselectedwords() def getColumnText(self, index, col): item = self.GetItem(index, col) @@ -155,28 +184,31 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor self.popupID2 = wx.NewId() self.popupID3 = wx.NewId() self.popup_Tgen_glob = wx.NewId() + self.onmaketgen = wx.NewId() self.ID_stcaract = wx.NewId() self.Bind(wx.EVT_MENU, self.OnPopupOne, id=self.popupID1) self.Bind(wx.EVT_MENU, self.OnPopupTwo, id=self.popupID2) self.Bind(wx.EVT_MENU, self.OnPopupThree, id=self.popupID3) self.Bind(wx.EVT_MENU, self.OnTgen_glob, id=self.popup_Tgen_glob) + self.Bind(wx.EVT_MENU, self.OnMakeTgen, id=self.onmaketgen) #self.Bind(wx.EVT_MENU, self.onstcaract, id = self.ID_stcaract) # make a menu menu = wx.Menu() # add some items menu.Append(self.popupID1, u"Formes associées") menu.Append(self.popupID2, u"Concordancier") - menu.Append(self.popupID3, "Graphique") + menu.Append(self.popupID3, u"Graphique") menu_stcaract = wx.Menu() self.menuid = {} - for i, et in enumerate(self.first[1:]) : + for i, et in enumerate(self.etoiles) : nid = wx.NewId() self.menuid[nid] = i menu_stcaract.Append(nid, et) self.Bind(wx.EVT_MENU, self.onstcaract, id = nid) menu.AppendMenu(-1, u"Segments de texte caractéristiques", menu_stcaract) #menu.Append(self.popup_Tgen_glob, "Tgen global") + menu.Append(self.onmaketgen, "Make Tgen") self.PopupMenu(menu) menu.Destroy() @@ -185,7 +217,7 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor last = self.GetFirstSelected() while self.GetNextSelected(last) != -1: last = self.GetNextSelected(last) - words.append(self.getColumnText(last, 1)) + words.append(self.getColumnText(last, 0)) return words def OnPopupOne(self, event): @@ -214,9 +246,11 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor parametres = page.parametres paneff = self.gparent.ListPanEff panchi = self.gparent.ListPan - et = self.first[ind+1] - if et.startswith(u'X.') : - et = et.replace(u'X.', u'*') + #etoiles = self.gparent.etoiles + et = self.etoiles[ind] + + #if et.startswith(u'X.') : + # et = et.replace(u'X.', u'*') uces = corpus.getucesfrometoile(et) self.la = [panchi.dlist[i][0] for i in range(0, len(panchi.dlist)) if panchi.dlist[i][ind+1] >= minind ] self.lchi = [panchi.dlist[i][ind+1] for i in range(0, len(panchi.dlist)) if panchi.dlist[i][ind+1] >= minind ] @@ -240,21 +274,28 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor ntab2 = ntab2[:limite] nuces = [val[1] for val in ntab2] ucis_txt, ucestxt = doconcorde(corpus, nuces, self.la) - win = message(self, u"Segments de texte caractéristiques - %s" % self.first[ind], (750, 600)) - win.html = '\n' + '
'.join(['
'.join([ucis_txt[i], '
score : %.2f
' % ntab2[i][0], ucestxt[i]]) for i in range(0,len(ucestxt))]) + '\n' - win.HtmlPage.SetPage(win.html) + items = dict([[i, '
'.join([ucis_txt[i], '
score : %.2f

' % ntab2[i][0], ucestxt[i]])] for i in range(0,len(ucestxt))]) + win = message(self, items, u"Segments de texte caractéristiques - %s" % self.first[ind], (900, 600)) + #win.html = '\n' + '
'.join(['
'.join([ucis_txt[i], '
score : %.2f
' % ntab2[i][0], ucestxt[i]]) for i in range(0,len(ucestxt))]) + '\n' + #win.HtmlPage.SetPage(win.html) win.Show(True) def OnPopupTwo(self, event): - activenotebook = self.parent.nb.GetSelection() - page = self.parent.nb.GetPage(activenotebook) + if 'nb' in dir(self.parent) : + activenotebook = self.parent.nb.GetSelection() + page = self.parent.nb.GetPage(activenotebook) + corpus = page.corpus + else : + corpus = self.parent.parent.parent.corpus item=self.getColumnText(self.GetFirstSelected(), 0) - corpus = page.corpus uce_ok = corpus.getlemuces(item) - win = message(self, u"Concordancier", (750, 600)) + ira = wx.GetApp().GetTopWindow() ucis_txt, ucestxt = doconcorde(corpus, uce_ok, [item]) - win.html = ('\n

%s

' % item) + '
'.join(['
'.join([ucis_txt[i], ucestxt[i]]) for i in range(0,len(ucestxt))]) + '\n' - win.HtmlPage.SetPage(win.html) + items = dict([[i, '

'.join([ucis_txt[i], ucestxt[i]])] for i in range(0,len(ucestxt))]) + win = message(ira, items, u"Concordancier - %s" % item, (800, 500)) + #win = message(ira, u"Concordancier", (800, 500)) + #win.html = ('\n

%s

' % item) + '
'.join(['
'.join([ucis_txt[i], ucestxt[i]]) for i in range(0,len(ucestxt))]) + '\n' + #win.HtmlPage.SetPage(win.html) win.Show(True) def getinf(self, txt) : @@ -272,7 +313,7 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor last = self.GetNextSelected(last) data = self.GetItemData(last) datas += [data] - colnames = self.first[1:] + colnames = self.etoiles table = [[self.getinf(val) for val in line[1:]] for line in datas] rownames = [val[0] for val in datas] tmpgraph = tempfile.mktemp(dir=self.parent.TEMPDIR) @@ -320,3 +361,6 @@ class ListForSpec(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSor txt = "" % tmpgraph win.HtmlPage.SetPage(txt) win.Show(True) + + def OnMakeTgen(self, evt): + self.parent.tree.OnTgenEditor(self.getselectedwords()) diff --git a/openanalyse.py b/openanalyse.py index b02c1fe..f8c32bb 100644 --- a/openanalyse.py +++ b/openanalyse.py @@ -5,13 +5,13 @@ #License: GNU/GPL from chemins import ChdTxtPathOut, StatTxtPathOut, PathOut -from layout import OpenCHDS, dolexlayout, StatLayout, WordCloudLayout, OpenCorpus, SimiLayout, SimiMatLayout, ProtoLayout +from layout import OpenCHDS, dolexlayout, StatLayout, WordCloudLayout, OpenCorpus, SimiLayout, SimiMatLayout, ProtoLayout, MatLayout, FreqLayout, Chi2Layout from corpus import Corpus, copycorpus from tableau import Tableau import os #import shelve #from tabsimi import DoSimi -from functions import DoConf +from functions import DoConf, ReadDicoAsDico from tableau import Tableau import logging @@ -31,36 +31,46 @@ class OpenAnalyse(): if self.conf['type'] == 'corpus' : corpus = self.opencorpus() + elif self.conf['type'] == 'matrix' : + matrix = self.openmatrix() elif self.conf.get('corpus', False) in self.parent.history.corpus : if self.conf['uuid'] in self.parent.history.analyses : intree = True else : intree = False corpus = self.openanalyse() + if self.conf.get('lem',1) : - corpus.make_lems(True) + dolem = True else : - corpus.make_lems(False) + dolem = False + if self.conf.get('dictionary', False) : + dico = ReadDicoAsDico(self.conf['dictionary']) + corpus.make_lems_from_dict(dico, dolem = dolem) + else : + corpus.make_lems(lem = dolem) if not intree : self.parent.tree.AddAnalyse(self.conf, bold = True) else : self.parent.tree.GiveFocus(uuid = self.conf['uuid'], bold = True) self.doopen(corpus) - else : + elif self.conf.get('matrix', False) in self.parent.history.ordermatrix : corpus = None - if isinstance(parametres, dict) : - tableau = Tableau(parent, parametres['ira']) - else : - tableau = Tableau(parent, parametres) - tableau.parametres = self.conf - tableau.dictpathout = PathOut(filename = tableau.parametres['filename'], dirout = self.conf['pathout'], analyse_type = self.conf['type']) - tableau.dictpathout.basefiles(ChdTxtPathOut) - tableau.read_tableau(tableau.dictpathout['db']) - if self.parent.tree.IsInTree(uuid = self.conf['uuid']) : - self.parent.tree.GiveFocus(uuid = self.conf['uuid'], bold = True) - else : - self.parent.tree.AddAnalyse(self.conf, bold = True) - self.doopen(tableau) + matrix = Tableau(self.parent, parametres = self.parent.history.matrix[self.parent.history.ordermatrix[self.conf['matrix']]]) + matrix.open() + #if isinstance(parametres, dict) : + # tableau = Tableau(parent, parametres['ira']) + #else : + # tableau = Tableau(parent, parametres) + #tableau.parametres = self.conf + #tableau.dictpathout = PathOut(filename = tableau.parametres['filename'], dirout = self.conf['pathout'], analyse_type = self.conf['type']) + #tableau.dictpathout.basefiles(ChdTxtPathOut) + #tableau.read_tableau(tableau.dictpathout['db']) + #if self.parent.tree.IsInTree(uuid = self.conf['uuid']) : + self.parent.tree.GiveFocus(uuid = self.conf['uuid'], bold = True) + self.doopen(matrix) + else : + self.parent.tree.AddAnalyse(self.conf, bold = True) self.parent.history.addtab(self.conf) def redopath(self, conf, path) : @@ -87,6 +97,25 @@ class OpenAnalyse(): self.parent.history.openedcorpus[self.conf['uuid']] = corpus self.opencorpus_analyses() self.doopen(corpus) + + def openmatrix(self): + log.info('open matrix') + if self.conf['uuid'] not in self.parent.history.ordermatrix : + self.parent.history.addMatrix(self.conf) + log.info('add matrix to history') + self.parent.tree.OnItemAppend(self.conf) + if self.conf['uuid'] in self.parent.history.openedmatrix : + log.info('matrix is already opened') + self.doopen(self.parent.history.openedmatrix[self.conf['uuid']]) + else : + #dial = progressbar(2) + #dial.Update(1, 'Ouverture du corpus') + matrix = Tableau(self, parametres = self.conf) + matrix.open() + self.parent.history.openedmatrix[self.conf['uuid']] = matrix + self.openmatrix_analyses() + self.doopen(matrix) + self.parent.history.addtab(self.conf) def opencorpus_analyses(self) : log.info('open analysis') @@ -103,6 +132,9 @@ class OpenAnalyse(): self.parent.history.addmultiple(analyses) for analyse in analyses : self.parent.tree.AddAnalyse(analyse, bold = False) + + def openmatrix_analyses(self): + pass def openanalyse(self) : if self.conf['corpus'] in self.parent.history.openedcorpus : @@ -134,7 +166,7 @@ class OpenAnalyse(): elif self.conf['type'] == 'wordcloud' or self.conf['type'] == 'clustercloud': self.parent.ShowMenu(_("Text analysis")) WordCloudLayout(self.parent, corpus, self.conf) - elif self.conf['type'] == 'gnepamatrix' : + elif self.conf['type'] == 'reinertmatrix' : #self.parent.ShowMenu(_("Spreadsheet analysis")) OpenCHDS(self.parent, corpus, self.conf, Alceste = False) elif self.conf['type'] == 'simimatrix' : @@ -142,4 +174,10 @@ class OpenAnalyse(): SimiMatLayout(self.parent, corpus, self.conf) elif self.conf['type'] == 'proto' : ProtoLayout(self.parent, corpus, self.conf) + elif self.conf['type'] == 'matrix' : + MatLayout(self.parent, corpus) + elif self.conf['type'] == 'freq' : + FreqLayout(self.parent, corpus, self.conf) + elif self.conf['type'] == 'chi2' : + Chi2Layout(self.parent, corpus, self.conf) diff --git a/parse_factiva_txt.py b/parse_factiva_txt.py index 21f70aa..18461e0 100644 --- a/parse_factiva_txt.py +++ b/parse_factiva_txt.py @@ -14,6 +14,31 @@ import re #encodage_in = 'utf8' #encodage_out = 'utf8' +mois = {u'janvier' : '01', + u'février' : '02', + u'mars' : '03', + u'avril' : '04', + u'mai' : '05', + u'juin' : '06', + u'juillet' : '07', + u'août' : '08', + u'septembre' : '09', + u'octobre' : '10', + u'novembre' : '11', + u'décembre' : '12', + u'january' : '01', + u'february': '02', + u'march' : '03', + u'april': '04', + u'may': '05', + u'june' : '06', + u'july': '07', + u'august': '08', + u'september' : '09', + u'october': '10', + u'november': '11', + u'december': '12'} + def parsetxtpaste(txt): """ @@ -26,21 +51,27 @@ def parsetxtpaste(txt): keepline = False ucis = [] for line in txt : - if line.startswith('Article') : + if line.startswith(u'Article') : lp = line.split() if len(lp) > 2 : - if lp[2] == 'Article' : + if lp[2] == u'Article' or lp[2] == u'Next' or lp[2] == u'Previous': ucis.append([[u'****'],'']) keepline = False if line.startswith('SN ') : #source - jsource = re.sub('[^A-Za-z0-9]', '', line[4:]) + jsource = re.sub(u'[\'" !\.?;,:\+\-°&]', '', line[4:]) source = u'_'.join([u'*source', jsource]).lower() #source = '*source_' + line[4:].replace(' ','').replace('\'','').replace(u'´','').replace(u'’','').replace('-','').lower() ucis[-1][0].append(source) elif line.startswith('PD ') : #date - mois_annee = '*ma_' + line[4:].split(' ')[1] + line[4:].split(' ')[2] - ucis[-1][0].append(mois_annee) - annee = u'*annee_' + line[4:].split(' ')[2] + datemois = line[4:].split(' ')[1].lower() + datemois = mois.get(datemois, datemois) + dateannee = line[4:].split(' ')[2] + datejour = '%02d' % int(line[4:].split(' ')[0]) + am = '_'.join([u'*am', dateannee, datemois]) + amj = '_'.join([u'*amj', dateannee, datemois, datejour]) + ucis[-1][0].append(am) + ucis[-1][0].append(amj) + annee = '_'.join([u'*annee', dateannee]) ucis[-1][0].append(annee) elif line.strip() in no : #fin keepline = False @@ -59,16 +90,18 @@ def print_ucis(ucis, ofile, encodage) : #elimination des articles vides ucis = [uci for uci in ucis if uci[1].strip() != ''] toprint = '\n\n'.join(['\n'.join([' '.join(uci[0]),uci[1]]) for uci in ucis]) - ofile.write(toprint.encode(encodage) + '\n') + ofile.write(toprint.encode(encodage, errors='replace') + '\n') class ParseFactivaPaste : def __init__(self, txtdir, fileout, encodage_in, encodage_out) : files = os.listdir(txtdir) + files = [f for f in files if f.split('.')[-1] == 'txt'] tot = 0 with open(fileout,'w') as outf : for f in files : print f f = os.path.join(txtdir, f) + print f with codecs.open(f, 'rU', encodage_in) as infile : content = infile.read() ucis = parsetxtpaste(content) diff --git a/tabchdalc.py b/tabchdalc.py index 4f79e4b..2b101dc 100644 --- a/tabchdalc.py +++ b/tabchdalc.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2009 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ConstructPathOut, ChdTxtPathOut, ConstructAfcUciPath, ffr, PathOut from functions import sortedby, CreateIraFile, print_liste, exec_rcode, check_Rresult @@ -17,57 +17,62 @@ import tempfile import time -class AnalyseQuest(): - def __init__(self, parent): - dlg = PrefQuestAlc(parent) - dlg.CenterOnParent() - self.val = dlg.ShowModal() - parametres = parent.tableau.parametre - if self.val == wx.ID_OK : - parametres['nbcl_p1'] = dlg.spin_nbcl.GetValue() - parametres['mincl'] = dlg.spin_mincl.GetValue() - if dlg.m_radioBox1.GetSelection() == 1 : - parametres['listact'] = dlg.nactives - parametres['listsup'] = dlg.varsup +class AnalyseQuest(AnalyseMatrix): + def doparametres(self, dlg = None): + if dlg is not None : + dial = PrefQuestAlc(self.parent, self.tableau) + dial.CenterOnParent() + self.val = dial.ShowModal() + #parametres = self.tableau.parametre + if self.val == wx.ID_OK : + self.parametres['nbcl_p1'] = dial.spin_nbcl.GetValue() + self.parametres['mincl'] = dial.spin_mincl.GetValue() + if dial.m_radioBox1.GetSelection() == 1 : + self.parametres['listact'] = dial.nactives + self.parametres['listsup'] = dial.varsup + else : + self.parametres['formatted'] = 1 else : - parametres['formatted'] = 1 - DoQuestAlceste(parent, parametres) + self.parametres = None + dial.Destroy() + # DoQuestAlceste(parent, parametres) -class DoQuestAlceste(AnalyseMatrix): - def __init__(self, parent, parametres): - parametres['pathout'] = ConstructPathOut(parent.tableau.parametre['filename'], 'gnepaMatrix') - self.parametres = parametres - self.parametres['type'] = 'gnepamatrix' +#class DoQuestAlceste(AnalyseMatrix): + def doanalyse(self): + #parametres['pathout'] = ConstructPathOut(parent.tableau.parametre['filename'], 'ReinertMatrix') + #self.parametres = parametres + #self.parametres['type'] = 'reinertmatrix' self.DictForme = {} self.DictFormeSup = {} self.Min = 10 self.Linecontent = [] - self.parent = parent - self.RPath = self.parent.PathPath.get('PATHS', 'rpath') + #self.parent = parent + #self.RPath = self.parent.PathPath.get('PATHS', 'rpath') #self.dictpathout = PathOut(dirout = self.pathout) #self.dictpathout = self.pathout #self.dictpathout.basefiles(ChdTxtPathOut) #self.pathout = self.dictpathout self.clnb = '' - self.ListAct = parametres.get('listact', False) + self.ListAct = self.parametres.get('listact', False) self.ucecla = '' - dlg = wx.ProgressDialog("Traitements", - "Veuillez patienter...", - maximum=5, - parent=self.parent, - style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME - ) + #dlg = wx.ProgressDialog("Traitements", + # "Veuillez patienter...", + # maximum=5, + # parent=self.parent, + # style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME + # ) - AnalyseMatrix.__init__(self, parent, parent.tableau, self.parametres, dlg = dlg) + #AnalyseMatrix.__init__(self, parent, parent.tableau, self.parametres, dlg = dlg) #----------------------------------------------------------- - def doanalyse(self) : +# def doanalyse(self) : #------------------------------------------------------------------- - self.dictpathout = self.pathout - self.dictpathout.basefiles(ChdTxtPathOut) - self.parent.tableau.dictpathout = self.dictpathout + #self.dictpathout = self.pathout + self.pathout.basefiles(ChdTxtPathOut) + self.tableau.pathout.basefiles(ChdTxtPathOut) +# self.parent.tableau.dictpathout = self.dictpathout self.dlg.Center() count = 1 @@ -76,22 +81,23 @@ class DoQuestAlceste(AnalyseMatrix): count += 1 self.dlg.Update(count, u"passage en O/1") if 'formatted' in self.parametres: - self.parent.tableau.make_01_alc_format(self.dictpathout['mat01']) + self.tableau.make_01_alc_format(self.pathout['mat01.csv']) else: - self.parent.tableau.make_01_from_selection(self.parametres['listact'], self.parametres['listsup']) - file = open(self.dictpathout['listeuce1'], 'w') + print self.parametres['listsup'] + self.tableau.make_01_from_selection(self.parametres['listact'], self.parametres['listsup']) + file = open(self.pathout['listeuce1'], 'w') file.write('num uce;num uc\n') - for i in range(0, len(self.parent.tableau.linecontent)): + for i in range(0, len(self.tableau.linecontent)): file.write('%i;%i\n' % (i, i)) file.close() - self.nbind = len(self.parent.tableau.linecontent) + self.nbind = len(self.tableau.linecontent) #------------------------------------------------------------ - RchdQuest(self.dictpathout, self.parent.RscriptsPath, self.parametres['nbcl_p1'], self.parametres['mincl']) + RchdQuest(self.pathout, self.parent.RscriptsPath, self.parametres['nbcl_p1'], self.parametres['mincl']) #------------------------------------------------------------ count += 1 self.dlg.Update(count, u"Analyse (patientez...)") - pid = exec_rcode(self.RPath, self.dictpathout['Rchdquest'], wait = False) + pid = exec_rcode(self.parent.RPath, self.pathout['Rchdquest'], wait = False) while pid.poll() == None : self.dlg.Pulse(u"Analyse (patientez...)") time.sleep(0.2) @@ -99,24 +105,24 @@ class DoQuestAlceste(AnalyseMatrix): #------------------------------------------------------------ count += 1 self.dlg.Update(count, u"Ecriture des résultats") - self.parent.tableau.buildprofil() - self.clnb = self.parent.tableau.clnb + self.tableau.buildprofil() + self.clnb = self.tableau.clnb self.parametres['clnb'] = self.clnb - self.ucecla = self.parent.tableau.ucecla + self.ucecla = self.tableau.ucecla self.BuildProfile() temps = time.time() - self.t1 PrintRapport(self, self, {}, istxt = False) - self.parent.tableau.save_tableau(self.dictpathout['db']) + self.tableau.save_tableau(self.pathout['db']) #CreateIraFile(self.dictpathout, self.clnb, corpname = os.path.basename(self.parent.filename), section = 'questionnaire') - afc_graph_list = [[os.path.basename(self.dictpathout['AFC2DL_OUT']), u'Variables actives - coordonnées - facteurs 1 / 2'], - [os.path.basename(self.dictpathout['AFC2DSL_OUT']), u'variables illustratives - coordonnées - facteurs 1 / 2'], - [os.path.basename(self.dictpathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2'],] - chd_graph_list = [[os.path.basename(self.dictpathout['dendro1']), u'dendrogramme à partir de chd1']] - chd_graph_list.append([os.path.basename(self.dictpathout['arbre1']), u'chd1']) - print_liste(self.dictpathout['liste_graph_afc'], afc_graph_list) - print_liste(self.dictpathout['liste_graph_chd'], chd_graph_list) + afc_graph_list = [[os.path.basename(self.pathout['AFC2DL_OUT']), u'Variables actives - coordonnées - facteurs 1 / 2'], + [os.path.basename(self.pathout['AFC2DSL_OUT']), u'variables illustratives - coordonnées - facteurs 1 / 2'], + [os.path.basename(self.pathout['AFC2DCL_OUT']), u'Classes - Coordonnées - facteur 1 / 2'],] + chd_graph_list = [[os.path.basename(self.pathout['dendro1']), u'dendrogramme à partir de chd1']] + chd_graph_list.append([os.path.basename(self.pathout['arbre1']), u'chd1']) + print_liste(self.pathout['liste_graph_afc'], afc_graph_list) + print_liste(self.pathout['liste_graph_chd'], chd_graph_list) - self.tableau = self.parent.tableau + #self.tableau = self.parent.tableau #OpenCHDS(self.parent, self, self.dictpathout['ira'], False) #------------------------------------------------------------ print 'fini', time.time() - self.t1 @@ -131,13 +137,13 @@ class DoQuestAlceste(AnalyseMatrix): """ % self.parent.RscriptsPath['chdfunct'] txt += """ load("%s") - """ % self.dictpathout['RData'] + """ % self.pathout['RData'] txt += """ dataact<-read.csv2("%s", header = FALSE, sep = ';',quote = '\"', row.names = 1, na.strings = 'NA') - """ % self.dictpathout['Contout'] + """ % self.pathout['Contout'] txt += """ dataet<-read.csv2("%s", header = FALSE, sep = ';',quote = '\"', row.names = 1, na.strings = 'NA') - """ % self.dictpathout['ContEtOut'] + """ % self.pathout['ContEtOut'] txt += """ clnb<-%i """ % self.clnb @@ -145,7 +151,7 @@ class DoQuestAlceste(AnalyseMatrix): tablesqrpact<-BuildProf(as.matrix(dataact),n1,clnb) tablesqrpet<-BuildProf(as.matrix(dataet),n1,clnb) PrintProfile(n1,tablesqrpact[4],tablesqrpet[4],tablesqrpact[5],tablesqrpet[5],%i,"%s","%s") - """ % (self.clnb, self.dictpathout['PROFILE_OUT'], self.dictpathout['ANTIPRO_OUT']) + """ % (self.clnb, self.pathout['PROFILE_OUT'], self.pathout['ANTIPRO_OUT']) txt += """ colnames(tablesqrpact[[2]])<-paste('classe',1:clnb,sep=' ') colnames(tablesqrpact[[1]])<-paste('classe',1:clnb,sep=' ') @@ -155,13 +161,13 @@ class DoQuestAlceste(AnalyseMatrix): ptabletot<-rbind(as.data.frame(tablesqrpact[1]),as.data.frame(tablesqrpet[1])) gbcluster<-n1 write.csv2(chistabletot,file="%s") - """ % self.dictpathout['chisqtable'] + """ % self.pathout['chisqtable'] txt += """ write.csv2(ptabletot,file="%s") - """ % self.dictpathout['ptable'] + """ % self.pathout['ptable'] txt += """ write.csv2(gbcluster,file="%s") - """ % self.dictpathout['SbyClasseOut'] + """ % self.pathout['SbyClasseOut'] if self.clnb > 2 : txt += """ library(ca) @@ -182,7 +188,7 @@ class DoQuestAlceste(AnalyseMatrix): write.csv2(afc_table$facteur, file = "%s") write.csv2(afc_table$colonne, file = "%s") write.csv2(afc_table$ligne, file = "%s") - """ % (self.dictpathout['afc_facteur'], self.dictpathout['afc_col'], self.dictpathout['afc_row']) + """ % (self.pathout['afc_facteur'], self.pathout['afc_col'], self.pathout['afc_row']) txt += """ xlab <- paste('facteur 1 - ', round(afc$facteur[1,2],2), sep = '') @@ -196,16 +202,16 @@ class DoQuestAlceste(AnalyseMatrix): """ % "0.9" txt += """ xyminmax <- PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=1, fin=(debet-1), xlab = xlab, ylab = ylab) - """ % (self.dictpathout['AFC2DL_OUT']) + """ % (self.pathout['AFC2DL_OUT']) txt += """ PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=debet, fin=fin, xlab = xlab, ylab = ylab, xmin = xyminmax$xminmax[1], xmax = xyminmax$xminmax[2], ymin = xyminmax$yminmax[1], ymax = xyminmax$yminmax[2]) - """ % (self.dictpathout['AFC2DSL_OUT']) + """ % (self.pathout['AFC2DSL_OUT']) txt += """ PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col = TRUE, what='coord', xlab = xlab, ylab = ylab, xmin = xyminmax$xminmax[1], xmax = xyminmax$xminmax[2], ymin = xyminmax$yminmax[1], ymax = xyminmax$yminmax[2]) - """ % (self.dictpathout['AFC2DCL_OUT']) + """ % (self.pathout['AFC2DCL_OUT']) txt += """ save.image(file="%s") - """ % self.dictpathout['RData'] + """ % self.pathout['RData'] tmpfile = tempfile.mktemp(dir=self.parent.TEMPDIR) tmpscript = open(tmpfile, 'w') tmpscript.write(txt) diff --git a/tabchddist.py b/tabchddist.py index 18864a1..39c50ac 100644 --- a/tabchddist.py +++ b/tabchddist.py @@ -1,17 +1,13 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL import wx import os -#from rchdng import RchdFunct from chemins import ffr, ConstructPathOut,ChdTxtPathOut -#from layout import PrintRapport -#from openanalyse import OpenAnalyse -from ConfigParser import ConfigParser from functions import CreateIraFile, print_liste, exec_rcode, check_Rresult -from dialog import CHDDialog, PrefQuestAlc, ClusterNbDialog +from dialog import PrefQuestAlc, ClusterNbDialog import tempfile import time @@ -51,9 +47,9 @@ def RchdFunct(self,parent, rep_out, CLASSIF, encode, RscriptsPath): time.sleep(0.2) check_Rresult(parent, pid) - file=open(fileout,'rU') - lcl=file.readlines() - file.close() + f=open(fileout,'rU') + lcl=f.readlines() + f.close() ListClasseOk=[line.replace('\n','').replace('"','') for line in lcl] ListClasseOk.pop(0) @@ -171,9 +167,9 @@ def RchdFunct(self,parent, rep_out, CLASSIF, encode, RscriptsPath): txt += """ PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", col = TRUE, what='crl') """ % (rep_out['AFC2DCoulCl']) - file=open(Rtmp,'w') - file.write(txt) - file.close() + f=open(Rtmp,'w') + f.write(txt) + f.close() pid = exec_rcode(parent.RPath, Rtmp, wait = False) while pid.poll() == None : time.sleep(0.2) diff --git a/tabchi2.py b/tabchi2.py index 6a8cf55..422b447 100755 --- a/tabchi2.py +++ b/tabchi2.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2010 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL import HTML import os @@ -16,6 +16,7 @@ import wx.lib.sized_controls as sc from time import sleep from functions import exec_rcode, check_Rresult from dialog import ChiDialog, PrefChi +from analysematrix import AnalyseMatrix def make_res(line) : if float(line[5]) <= 0.05 and line[6] != 'warning': @@ -45,11 +46,21 @@ def make_title(res, text) : return ['
%s
retour
' % (i, val[-1], text[i], i) for i, val in enumerate(res)] - +chioption = { 'valobs' : True, + 'valtheo' : True, + 'resi' : False, + 'contrib' : True, + 'pourcent' : False, + 'pourcentl' : True, + 'pourcentc' : True, + 'graph' : True, + 'bw' : False, + } class MakeChi2(): - def __init__(self, parent, select1, select2, chioption): + def __init__(self, parent, select1, select2, chioption, tableau): + self.tableau = tableau self.OutFrame=tempfile.mktemp(dir=parent.TEMPDIR) print self.OutFrame self.parent=parent @@ -59,7 +70,7 @@ class MakeChi2(): self.TextCroise=[] for i in select1 : for j in select2 : - self.TextCroise.append(parent.tableau.colnames[i] + ' / ' + parent.tableau.colnames[j]) + self.TextCroise.append(self.tableau.colnames[i] + ' / ' + self.tableau.colnames[j]) rchioption = {} for val in chioption : if chioption[val]: @@ -88,11 +99,11 @@ class MakeChi2(): bw <- %s """ % (rchioption['valobs'], rchioption['valtheo'], rchioption['contrib'], rchioption['resi'], rchioption['pourcent'], rchioption['pourcentl'], rchioption['pourcentc'], rchioption['graph'], rchioption['bw']) txt+=""" - datadm <- ReadData("%s", encoding="%s", header = TRUE, sep = "%s",quote = '%s', na.strings = "%s",rownames= 1) + datadm <- read.csv2("%s", encoding="%s", header = TRUE, row.names = 1, sep='\\t', quote = '"', na.string = '') listres<-list() listcol<-list() cont<-1 - """%(ffr(parent.tableau.parametre['csvfile']),self.parent.encode, parent.tableau.parametre['colsep'], parent.tableau.parametre['txtsep'], self.parent.nastrings) + """%(ffr(self.tableau.parametres['csvfile']), self.tableau.parametres['syscoding']) if len(select1)==1: strsel1=str(select1).replace(',','') else: @@ -148,9 +159,7 @@ class MakeChi2(): chi$prl <- round((chi$observed/sr)*100,2) chi$prc <- t(round((t(chi$observed)/sc)*100,2)) } - fileout<-paste('histo',%i,sep='') - fileout<-paste(fileout,'_',sep='') - fileout<-paste(fileout,count,sep='') + fileout<-paste('histo_',count,sep='') fileout<-paste(fileout,'.png',sep='') count<-count+1 fileout<-file.path("%s",fileout) @@ -258,7 +267,7 @@ class MakeChi2(): li<-matrix('fin_analyse',1,maxcol) frameout<-rbind(frameout,li) write.csv2(frameout,file="%s") - """%(parent.FreqNum,ffr(parent.TEMPDIR),ffr(self.OutFrame)) + """ % (ffr(parent.TEMPDIR),ffr(self.OutFrame)) tmpfile=tempfile.mktemp(dir=self.TEMPDIR) print tmpfile tmpscript=open(tmpfile,'w') @@ -350,77 +359,381 @@ class MakeChi2(): txt = '


\n'.join(['

'.join([tab[i] for tab in allhtml]) for i,val in enumerate(res)]) txt = header + pretxt + txt + '\n' - fileout=os.path.join(self.TEMPDIR,'resultats-chi2_%s.html'%str(self.parent.FreqNum)) - file=open(fileout,'w') - file.write(txt) - file.close() + fileout=os.path.join(self.parametres['pathout'],'resultats-chi2.html') + with open(fileout, 'w') as f : + f.write(txt) ListFile.append(fileout) return ListFile -class ChiSquare(): - def __init__(self,parent): - chioption = { 'valobs' : True, - 'valtheo' : True, - 'resi' : False, - 'contrib' : True, - 'pourcent' : False, - 'pourcentl' : True, - 'pourcentc' : True, - 'graph' : True, - 'bw' : False, - } - dlg = ChiDialog(parent, -1, u"Chi2", chioption, size=(400, 350), +class ChiSquare(AnalyseMatrix): + def doparametres(self, dlg = None): + if dlg is None : + return + dial = ChiDialog(self.parent, -1, u"Chi2", chioption, self.tableau, size=(400, 350), style = wx.DEFAULT_DIALOG_STYLE ) - dlg.CenterOnParent() - val = dlg.ShowModal() - if val==wx.ID_OK : - self.dlg=wx.ProgressDialog("Traitements", - "Veuillez patienter...", - maximum = 4, - parent=parent, - style = wx.PD_APP_MODAL|wx.PD_AUTO_HIDE|wx.PD_ELAPSED_TIME - ) - self.dlg.Center() + dial.CenterOnParent() + val = dial.ShowModal() + if val==wx.ID_OK : + dlg.Center() self.count = 1 - keepGoing = self.dlg.Update(self.count) + keepGoing = dlg.Update(self.count) - ColSel1 = dlg.list_box_1.GetSelections() - ColSel2 = dlg.list_box_2.GetSelections() - if dlg.chiopt : - chioption['valobs'] = dlg.dial.check1.GetValue() - chioption['valtheo'] = dlg.dial.check2.GetValue() - chioption['resi'] = dlg.dial.check3.GetValue() - chioption['contrib'] = dlg.dial.check4.GetValue() - chioption['pourcent'] = dlg.dial.check5.GetValue() - chioption['pourcentl'] = dlg.dial.check6.GetValue() - chioption['pourcentc'] = dlg.dial.check7.GetValue() - chioption['graph'] = dlg.dial.check8.GetValue() - chioption['bw'] = dlg.dial.checkbw.GetValue() - dlg.dial.Destroy() + self.colsel1 = dial.list_box_1.GetSelections() + self.colsel2 = dial.list_box_2.GetSelections() + if dial.chiopt : + chioption['valobs'] = dial.dial.check1.GetValue() + chioption['valtheo'] = dial.dial.check2.GetValue() + chioption['resi'] = dial.dial.check3.GetValue() + chioption['contrib'] = dial.dial.check4.GetValue() + chioption['pourcent'] = dial.dial.check5.GetValue() + chioption['pourcentl'] = dial.dial.check6.GetValue() + chioption['pourcentc'] = dial.dial.check7.GetValue() + chioption['graph'] = dial.dial.check8.GetValue() + chioption['bw'] = dial.dial.checkbw.GetValue() + dial.dial.Destroy() + dial.Destroy() + self.parametres.update(chioption) + self.chioption = chioption + else : + if dial.chiopt : + dial.dial.Destroy() + dial.Destroy() + self.parametres = None - self.count += 1 - keepGoing = self.dlg.Update(self.count,u"Analyse dans R...") - analyse=MakeChi2(parent,ColSel1,ColSel2, chioption) + def doanalyse(self): + + + #self.dlg=wx.ProgressDialog("Traitements", + # "Veuillez patienter...", + # maximum = 4, + # parent=parent, + # style = wx.PD_APP_MODAL|wx.PD_AUTO_HIDE|wx.PD_ELAPSED_TIME + # ) + + - self.count += 1 - keepGoing = self.dlg.Update(self.count,u"Ecriture des résultats") + self.count += 1 + keepGoing = self.dlg.Update(self.count,u"Analyse dans R...") + #analyse=MakeChi2(self.parent, self.colsel1, self.colsel2, self.chioption, self.tableau) + #self.tableau = tableau + self.OutFrame=tempfile.mktemp(dir=self.parent.TEMPDIR) + print self.OutFrame + #self.parent=parent + self.encode=self.parent.encode + self.TEMPDIR=self.parent.TEMPDIR + self.RPath=self.parent.PathPath.get('PATHS','rpath') + self.TextCroise=[] + for i in self.colsel1 : + for j in self.colsel2 : + self.TextCroise.append(self.tableau.colnames[i] + ' / ' + self.tableau.colnames[j]) + rchioption = {} + for val in self.chioption : + if self.chioption[val]: + rchioption[val] = 'TRUE' + else : + rchioption[val] = 'FALSE' + txt=""" + source("%s") + """%self.parent.RscriptsPath['Rfunct'] +# if parent.tableau.: rownames=1 +# else : rownames='NULL' +# if parent.g_header : header = 'TRUE' +# else : header = 'FALSE' + txt += """ + source("%s") + """ % ffr(self.parent.RscriptsPath['Rgraph']) + txt += """ + doobs <- %s + doexp <- %s + docontrib <- %s + doresi <- %s + dopr <- %s + doprl <- %s + doprc <- %s + dograph <- %s + bw <- %s + """ % (rchioption['valobs'], rchioption['valtheo'], rchioption['contrib'], rchioption['resi'], rchioption['pourcent'], rchioption['pourcentl'], rchioption['pourcentc'], rchioption['graph'], rchioption['bw']) + txt+=""" + datadm <- read.csv2("%s", encoding="%s", header = TRUE, row.names = 1, sep='\\t', quote = '"', na.string = '') + listres<-list() + listcol<-list() + cont<-1 + """%(ffr(self.tableau.parametres['csvfile']), self.tableau.parametres['syscoding']) + if len(self.colsel1)==1: + strsel1=str(self.colsel1).replace(',','') + else: + strsel1=str(self.colsel1) + if len(self.colsel2)==1: + strsel2=str(self.colsel2).replace(',','') + else: + strsel2=str(self.colsel2) + txt+=""" + for (i in c%s) {""" % strsel1 + txt+=""" + for (j in c%s) {""" % strsel2 + txt+=""" + tab<-table(datadm[,i+1],datadm[,j+1]) + if (min(dim(tab)) != 1) { + chi<-chisq.test(tab) + CS<-colSums(tab) + RS<-rowSums(tab) + GT<-sum(tab) + chi$contrib<-(tab-chi$expected)/sqrt(chi$expected * ((1 - RS/GT) %%*%% t(1 - CS/GT))) + listres[[cont]]<-chi + listcol[[cont]]<-ncol(tab) + cont<-cont+1 + } else { + chi <- list(observed = tab, residuals = tab, contrib = tab, statistic = 0, p.value = 1, expected = tab, message = 'pas de calcul') + listres[[cont]] <- chi + listcol[[cont]]<-ncol(tab) + cont <- cont + 1 + } + } + } + maxcol<-max(unlist(listcol))+1 + if (maxcol<7) {maxcol<-7} + frameout<-matrix('*',1,maxcol) + count<-0 + for (chi in listres) { + if (min(chi$expected)<5) { + att<-"warning" + } else { + att<-"" + } + if ('message' %%in%% attributes(chi)$names) { + att <- "Ce chi2 n\'a pas été calculé" + nom_colresi<-colnames(chi$observed) + chi$prl <- chi$expected + chi$prc <- chi$expected + st <- sum(chi$observed) + } else { + nom_colresi<-colnames(chi$observed) + st <- sum(chi$observed) + sc <- colSums(chi$observed) + sr <- rowSums(chi$observed) + chi$prl <- round((chi$observed/sr)*100,2) + chi$prc <- t(round((t(chi$observed)/sc)*100,2)) + } + fileout<-paste('histo_',count,sep='') + fileout<-paste(fileout,'.png',sep='') + count<-count+1 + fileout<-file.path("%s",fileout) + if (max(nchar(colnames(chi$observed)))>15) { + leg <- 1:length(colnames(chi$observed)) + } else { + leg <- colnames(chi$observed) + } + if (dograph) { + width<-ncol(chi$observed)*100 + if (width < 350) {width <- 350} + open_file_graph(fileout,width = width, height = 300) + par(mar=c(0,0,0,0)) + layout(matrix(c(1,2),1,2, byrow=TRUE),widths=c(3,1)) + par(mar=c(2,2,1,0)) + par(cex=0.8) + if (!bw) colors <- rainbow(length(rownames(chi$observed))) + else colors <- gray.colors(length(rownames(chi$observed))) + barplot(chi$prl,names.arg = leg, beside=TRUE,border=NA, col=colors) + par(mar=c(0,0,0,0)) + par(cex=0.8) + plot(0, axes = FALSE, pch = '') + legend(x = 'center' , rownames(chi$observed), fill = colors) + dev.off() + } + chi$prl <- cbind(chi$prl, total = rowSums(chi$prl)) + chi$prc <- rbind(chi$prc, total = colSums(chi$prc)) + chi$observed<-rbind(chi$observed,total=colSums(chi$observed)) + chi$observed<-cbind(chi$observed,total=rowSums(chi$observed)) + chi$pr <- round((chi$observed/st)*100,2) + chi$expected<-rbind(chi$expected,total=colSums(chi$expected)) + chi$expected<-cbind(chi$expected,total=rowSums(chi$expected)) + chi$expected<-round(chi$expected,digits=2) + chi$residuals<-round(chi$residuals,digits=2) + chi$contrib<-round(chi$contrib, digits=2) + nom_col<-colnames(chi$observed) + + if (ncol(chi$observed)\n + \n + \n +

Test du Chi2

\n +
+
+ Légende :
+ p <= 0.05
+ p <= 0.05 mais il y a des valeurs théoriques < 5
+ p > 0.05 +


+ """%self.parent.SysEncoding + + + pretxt = '
\n'.join(links)+'


\n' + txt = '


\n'.join(['

'.join([tab[i] for tab in allhtml]) for i,val in enumerate(res)]) + txt = header + pretxt + txt + '\n' + + fileout=os.path.join(self.parametres['pathout'],'resultats-chi2.html') + with open(fileout, 'w') as f : + f.write(txt) + ListFile.append(fileout) + return ListFile \ No newline at end of file diff --git a/tabfrequence.py b/tabfrequence.py index 4eb42ee..563e5b4 100644 --- a/tabfrequence.py +++ b/tabfrequence.py @@ -2,154 +2,110 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL #from __future__ import division import os -import sys import wx -import wx.html from chemins import ffr, FFF import tempfile from time import sleep +from analysematrix import AnalyseMatrix from functions import exec_rcode, check_Rresult from dialog import FreqDialog +from PrintRScript import PrintRScript - -class Frequences(): - def __init__(self, parent): - #self.Filename = parent.filename - self.fileforR = parent.tableau.parametre['csvfile'] - self.TEMPDIR = parent.TEMPDIR - self.num = parent.FreqNum - self.DICTFILE = {} - self.RPath = parent.PathPath.get('PATHS', 'rpath') - self.parent=parent - self.tableau = parent.tableau - dlg = FreqDialog(parent, -1, self.tableau.get_colnames(), u"Fréquences", size=(350, 200)) - dlg.CenterOnParent() - val = dlg.ShowModal() - if val == wx.ID_OK : - ColSel = dlg.list_box_1.GetSelections() - self.header=dlg.header - dlg.Destroy() - listfileout = self.ShowFreq(ColSel) - parent.FreqNum += 1 - parent.DictTab[u"Fréquences_%s*" % parent.FreqNum] = listfileout - parent.FileTabList.append(listfileout) - parent.newtab = wx.html.HtmlWindow(parent.nb, -1) - if "gtk2" in wx.PlatformInfo: - parent.newtab.SetStandardFonts() - parent.newtab.LoadPage(listfileout[len(listfileout) - 1]) - parent.nb.AddPage(parent.newtab, u"Fréquences_%s*" % parent.FreqNum) - parent.nb.SetSelection(parent.nb.GetPageCount() - 1) - parent.ShowAPane("Tab_content") - parent.DisEnSaveTabAs(True) +class Frequences(AnalyseMatrix) : + def doparametres(self, dlg=None) : + if dlg is None : + return else : - dlg.Destroy() - - def ShowFreq(self, select): - listfile = [] - listfile.append(False) - self.ListFileForR = [] - self.ListTitre = [] - self.OutFrame = tempfile.mktemp(dir=self.TEMPDIR) - if self.parent.g_id: rownames = '1' - else: rownames = 'NULL' - if self.parent.g_header : header = 'TRUE' - else : header = 'FALSE' - self.ListTitre = [self.header[i] for i in select] - self.ListFileForR = [ffr(os.path.join(self.TEMPDIR, 'freq%s_%s.jpeg' % (str(self.num), i))) for i in range(len(select))] - listfile = [os.path.join(self.TEMPDIR, 'freq%s_%s.jpeg' % (str(self.num), i)) for i in range(len(select))] + dial = FreqDialog(self.parent, -1, self.tableau.get_colnames(), u"Fréquences", size=(350, 200)) + dial.CenterOnParent() + val = dial.ShowModal() + if val == wx.ID_OK : + self.parametres['colsel'] = dial.list_box_1.GetSelections() + self.parametres['header'] = dial.header + else : + self.parametres = None + dial.Destroy() + + def doanalyse(self): + self.pathout.createdir(self.parametres['pathout']) + header = self.tableau.get_colnames() + select = self.parametres['colsel'] + self.listtitre = [header[i] for i in select] + b, self.outframe = tempfile.mkstemp() + self.fileforR = [ffr(os.path.join(self.pathout.dirout, 'freq_%i.png' % i)) for i in range(len(select))] + self.Rscript = PrintRScript(self) sel = 'c(' + ','.join([str(val + 1) for val in select]) + ')' - listfiles = 'c("' + '","'.join(self.ListFileForR) + '")' - titles = 'c("' + '","'.join(self.ListTitre) + '")' + listfiles = 'c("' + '","'.join(self.fileforR) + '")' + titles = 'c("' + '","'.join(self.listtitre) + '")' txt = """ - source("%s") - """ % self.parent.RscriptsPath['Rfunct'] - + filein <- "%s" + encoding <- '%s' + dm <- read.csv2(filein, encoding = encoding, header = TRUE, row.names = 1, sep='\\t', quote = '"', na.string = '') + """ %(ffr(self.tableau.parametres['csvfile']), self.tableau.parametres['syscoding']) txt += """ - datadm <- ReadData("%s", encoding="%s", header = TRUE, sep = ";",quote = "\\%s", na.strings = "%s",rownames=1) - """ % (ffr(self.fileforR), self.parent.encode, self.parent.tableau.parametre['txtsep'], self.parent.nastrings) - txt += """ - outframe<-data.frame(cbind('***','****')) - colnames(outframe)<-c('effectif','pourcentage') + outframe <- data.frame(cbind('***','****','****')) + colnames(outframe)<-c('effectif','pourcentage', 'labels') select <- %s listfiles <- %s titles <- %s compteur <- 1 """ % (sel, listfiles, titles) + txt += """ for (i in select) { - datasum<-as.matrix(summary(datadm[,i])) - if (rownames(datasum)[1]=='Min.' && rownames(datasum)[3]=='Median') { - dtype<-'num' - } else if (datasum[1] == "logical") { - dtype <- 'char' - datasum <- as.matrix(as.integer(datasum[2])) - rownames(datasum) <- 'NA' - } else { - dtype<-'char' - } - datasum<-as.data.frame(datasum) - if (dtype=='char') { - datasum[,2]<-round((datasum[,1]/sum(datasum[,1]))*100,digits=2) - } else { - datasum[,2]<-datasum[,1] - } - colnames(datasum)<-c('effectif','pourcentage') + freq <- table(dm[,i]) + sumfreq <- sum(freq) + pour <- prop.table(as.matrix(freq), 2) * 100 + sumpour <- sum(pour) + ntable <- cbind(as.matrix(freq), pour) graphout <- listfiles[compteur] if (Sys.info()["sysname"]=='Darwin') { - quartz(file=graphout,type='jpeg') + quartz(file=graphout,type='png') par(cex=1) } else { - jpeg(graphout,res=200) + png(graphout) par(cex=0.3) } - if (max(nchar(rownames(datasum))) > 15) { - lab.bar <- 1:nrow(datasum) + if (max(nchar(rownames(ntable))) > 15) { + lab.bar <- 1:nrow(ntable) } else { - lab.bar <- rownames(datasum) + lab.bar <- rownames(ntable) } - barplot(datasum[,2],border=NA,beside=TRUE,names.arg=lab.bar) + barplot(ntable[,2],border=NA,beside=TRUE,names.arg=lab.bar) + ntable <- cbind(ntable, rownames(as.matrix(freq))) + colnames(ntable) <- c('effectif','pourcentage', 'labels') title(main=titles[compteur]) dev.off() - datasum<-rbind(datasum,total=colSums(datasum)) - outframe<-rbind(outframe,c('***','****')) - datasum[,1]<-as.character(datasum[,1]) - datasum[,2]<-as.character(datasum[,2]) - outframe<-rbind(outframe,datasum) + ntable<-rbind(ntable,total=c(sumfreq,sumpour,'')) + outframe<-rbind(outframe,c('***','****','****')) + #datasum[,1]<-as.character(datasum[,1]) + #datasum[,2]<-as.character(datasum[,2]) + outframe<-rbind(outframe,ntable) compteur <- compteur + 1 - } - outframe<-rbind(outframe,c('***','****')) - write.csv2(outframe,file="%s") - """ % ffr(self.OutFrame) - tmpfile = tempfile.mktemp(dir=self.TEMPDIR) - tmpscript = open(tmpfile, 'w') - tmpscript.write(txt) - tmpscript.close() - pid = exec_rcode(self.RPath, tmpfile, wait = False) - while pid.poll() == None : - sleep(0.2) - check_Rresult(self.parent, pid) - fileout = self.DoLayout() - listfile.append(fileout) - self.DICTFILE[self.num] = listfile - return listfile - - - def DoLayout(self): + } + outframe<-rbind(outframe,c('***','****','****')) + write.table(outframe, file="%s", sep="\\t") + """ % ffr(self.outframe) + self.Rscript.add(txt) + self.Rscript.write() + self.doR(self.Rscript.scriptout) + self.dolayout() + + def dolayout(self): listtab = [] tab = [] - filein = open(self.OutFrame, 'rU') - content = filein.readlines() - filein.close() + with open(self.outframe) as f : + content = f.read().splitlines() content.pop(0) content.pop(0) texte = '' for ligne in content: - ligne = ligne.replace('"', '').replace('\n', '') - ligne = ligne.split(';') + ligne = ligne.replace('"', '') + ligne = ligne.split('\t') if ligne[1] == u'***' : if tab != []: listtab.append(tab) @@ -162,27 +118,25 @@ class Frequences():
''' % self.parent.SysEncoding for i in range(0, len(listtab)): - pretexte += '

%s

' % (str(i), self.ListTitre[i]) + pretexte += '

%s

' % (str(i), self.listtitre[i]) texte += '
\n' texte += '

Retour

\n' - texte += '

%s

\n' % (str(i), self.ListTitre[i]) + texte += '

%s

\n' % (str(i), self.listtitre[i]) texte += '\n' texte += """
\n' texte += '' for line in listtab[i] : texte += '' texte += """ - """ % (line[0], line[1], line[2]) + """ % (line[3], line[1], line[2]) texte += '' texte += '
Effectifspourcentage
%s%s%s %%
graph
\n - """ % os.path.basename(self.ListFileForR[i]) + """ % os.path.basename(self.fileforR[i]) texte += '\n' - fileout = os.path.join(self.TEMPDIR, 'resultats%s-freq.html' % str(self.num)) - FILE = open(fileout, 'w') - FILE.write(pretexte + texte) - FILE.close() - return fileout - - + fileout = os.path.join(self.pathout.dirout, 'resultats.html') + with open(fileout, 'w') as f : + f.write(pretexte + texte) + #return fileout + \ No newline at end of file diff --git a/tableau.py b/tableau.py index 514c8cf..6fa6abc 100644 --- a/tableau.py +++ b/tableau.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2010 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL import codecs import sys @@ -12,7 +12,9 @@ import tempfile import re import htmlentitydefs import shelve +from functions import DoConf from uuid import uuid4 +from chemins import PathOut import logging log = logging.getLogger('iramuteq.tableau') @@ -51,18 +53,41 @@ def UpdateDico(Dico, word, line): Dico[word][1].append(line) else: Dico[word] = [1, [line]] + +def copymatrix(tableau): + log.info('copy matrix') + copymat = Tableau(tableau.parent, parametres = tableau.parametres) + copymat.linecontent = tableau.linecontent + copymat.csvtable = tableau.csvtable + copymat.pathout = tableau.pathout + copymat.colnames = tableau.colnames + copymat.rownb = tableau.rownb + copymat.colnb = tableau.colnb + if copymat.csvtable is None : + copymat.open() + return copymat class Tableau() : - def __init__(self, parent, filename = '', filetype = 'csv', encodage = 'utf-8') : + def __init__(self, parent, filename = '', filetype = 'csv', encodage = 'utf-8', parametres = None) : self.parent = parent - self.parametre = {'filename' : filename} - self.parametre['filetype'] = filetype - self.parametre['encodage'] = encodage - self.parametre['pathout'] = os.path.dirname(os.path.abspath(filename)) - self.parametre['mineff'] = 3 - self.parametre['syscoding'] = sys.getdefaultencoding() - self.parametre['type'] = 'matrix' - self.parametre['name'] = 'unNOm' + if parametres is None : + self.parametres = DoConf(os.path.join(self.parent.UserConfigPath,'matrix.cfg')).getoptions('matrix') + self.parametres['pathout'] = PathOut(filename, 'matrix').mkdirout() + self.parametres['originalpath'] = filename + self.parametres['filetype'] = filetype + self.parametres['encodage'] = encodage + #self.parametre['pathout'] = os.path.dirname(os.path.abspath(filename)) + self.parametres['mineff'] = 3 + self.parametres['syscoding'] = sys.getdefaultencoding() + self.parametres['type'] = 'matrix' + self.parametres['matrix_name'] = os.path.basename(filename) + self.parametres['uuid'] = str(uuid4()) + self.parametres['shelves'] = os.path.join(self.parametres['pathout'], 'shelve.db') + self.parametres['ira'] = os.path.join(self.parametres['pathout'], 'Matrix.ira') + else : + self.parametres = parametres + self.pathout = PathOut(filename = filename, dirout = self.parametres['pathout']) + self.csvtable = None self.sups = {} self.actives = {} self.listactives = None @@ -77,13 +102,13 @@ class Tableau() : self.colnb = 0 self.rownb = 0 self.classes = [] - self.parametres = self.parametre + #self.parametres = self.parametre def read_tableau(self, fileout) : d=shelve.open(fileout) - self.parametre = d['parametre'] - if 'syscoding' not in self.parametre : - self.parametre['syscoding'] = sys.getdefaultencoding() + #self.parametres = d['parametres'] + #if 'syscoding' not in self.parametres : + # self.parametres['syscoding'] = sys.getdefaultencoding() self.actives = d['actives'] self.sups = d['sups'] self.classes = d['classes'] @@ -96,11 +121,20 @@ class Tableau() : self.datas = d['datas'] if 'lchi' in d : self.lchi = d['lchi'] + if 'content' in d : + self.content = d['content'] d.close() + + def open(self): + print 'open matrix' + self.read_csvfile() + self.colnames = self.csvtable[0][1:] + self.rownb = len(self.linecontent) + self.colnb = len(self.linecontent[0]) def save_tableau(self, fileout) : d=shelve.open(fileout) - d['parametre'] = self.parametre + d['parametres'] = self.parametres d['actives'] = self.actives d['sups'] = self.sups d['classes'] = self.classes @@ -113,28 +147,32 @@ class Tableau() : d['datas'] = self.datas if 'lchi' in dir(self) : d['lchi'] = self.lchi + d['content'] = self.content d.close() def make_content(self) : - if self.parametre['filetype'] == 'csv' : + self.pathout.createdir(self.parametres['pathout']) + if self.parametres['filetype'] == 'csv' : self.read_csv() - elif self.parametre['filetype'] == 'xls' : + elif self.parametres['filetype'] == 'xls' : self.read_xls() - elif self.parametre['filetype'] == 'ods' : + elif self.parametres['filetype'] == 'ods' : self.read_ods() - self.parametre['csvfile'] = tempfile.mktemp(dir=self.parent.TEMPDIR) + self.parametres['csvfile'] = os.path.join(self.parametres['pathout'], 'csvfile.csv') self.make_tmpfile() + DoConf().makeoptions(['matrix'],[self.parametres], self.parametres['ira']) + self.parent.history.addMatrix(self.parametres) def read_xls(self) : #FIXME : encodage #print '############## ENCODING IN EXCEL #######################' #datafile = xlrd.open_workbook(self.parametre['filename'], encoding_override="azerazerazer") - datafile = xlrd.open_workbook(self.parametre['filename']) - datatable = datafile.sheet_by_index(self.parametre['sheetnb']-1) - self.linecontent = [[str(datatable.cell_value(rowx = i, colx = j)) for j in range(datatable.ncols)] for i in range(datatable.nrows)] + datafile = xlrd.open_workbook(self.parametres['originalpath']) + datatable = datafile.sheet_by_index(self.parametres['sheetnb']-1) + self.linecontent = [[str(datatable.cell_value(rowx = i, colx = j)).replace(u'"','').replace(u';','').replace(u'\n',' ').strip() for j in range(datatable.ncols)] for i in range(datatable.nrows)] def read_ods(self) : - doc = ooolib.Calc(opendoc=self.parametre['filename']) + doc = ooolib.Calc(opendoc=self.parametres['originalpath']) doc.set_sheet_index(0) (cols, rows) = doc.get_sheet_dimensions() for row in range(1, rows + 1): @@ -142,19 +180,20 @@ class Tableau() : for col in range(1, cols + 1): data = doc.get_cell_value(col, row) if data is not None : - ligne.append(unescape(data[1])) + ligne.append(unescape(data[1].replace(u'"','').replace(u';','').replace(u'\n', ' ').strip())) else : ligne.append('') self.linecontent.append(ligne) def read_csv(self) : - with codecs.open(self.parametre['filename'], 'r', self.parametre['encodage']) as f : + with codecs.open(self.parametres['originalpath'], 'r', self.parametres['encodage']) as f : content = f.read() - self.linecontent = [line.replace('"','').split(self.parametre['colsep']) for line in content.splitlines()] + self.linecontent = [line.split(self.parametres['colsep']) for line in content.splitlines()] + self.linecontent = [[val.replace(u'"','').strip() for val in line] for line in self.linecontent] def write_csvfile(self) : - with open(self.parametre['csvfile'], 'w') as f : - f.write('\n'.join([';'.join(line) for line in self.csvtable])) + with open(self.parametres['csvfile'], 'w') as f : + f.write('\n'.join(['\t'.join(line) for line in self.csvtable])) def make_tmpfile(self) : self.rownb = len(self.linecontent) @@ -178,14 +217,11 @@ class Tableau() : self.csvtable = [[self.idname] + self.colnames] + [[self.rownames[i]] + self.linecontent[i] for i in range(len(self.rownames))] self.write_csvfile() - def show_tab(self) : - self.parent.content = self.csvtable - self.parent.ShowMenu(_("View")) - self.parent.ShowMenu(_("Spreadsheet analysis")) - self.parent.ShowMenu(_("Text analysis"), False) - self.parent.type = "Data" - self.parent.DataPop = False - self.parent.OnViewData('') + def read_csvfile(self): + with codecs.open(self.parametres['csvfile'], 'r', self.parametres['syscoding']) as f: + self.csvtable = [line.split('\t') for line in f.read().splitlines()] + self.linecontent = [line[1:] for line in self.csvtable] + self.linecontent.pop(0) def check_rownames(self) : if len(self.rownames) == len(list(set(self.rownames))) : @@ -197,9 +233,9 @@ class Tableau() : def make_unique_list(self) : return list(set([val for line in self.linecontent for val in line if val.strip() != ''])) - def make_dico(self, linecontent) : + def make_dico(self, selcol) : dico = {} - for i, line in enumerate(linecontent) : + for i, line in enumerate(selcol) : for forme in line: if forme.strip() != '' : UpdateDico(dico, forme, i) @@ -216,11 +252,11 @@ class Tableau() : return [[val, self.actives[val][0]] for val in self.actives] def make_listactives(self) : - self.listactives = [val for val in self.actives if val != 'NA' and self.actives[val] >= self.parametre['mineff']] + self.listactives = [val for val in self.actives if val != 'NA' and self.actives[val] >= self.parametres['mineff']] def write01(self, fileout, dico, linecontent) : if self.listactives is None : - self.listactives = [val for val in dico if val != 'NA' and dico[val] >= self.parametre['mineff']] + self.listactives = [val for val in dico if val != 'NA' and dico[val] >= self.parametres['mineff']] out = [['0' for forme in self.listactives] for line in linecontent] for i, forme in enumerate(self.listactives) : for line in dico[forme][1] : @@ -234,7 +270,7 @@ class Tableau() : def make_01_from_selection(self, listact, listsup = None, dowrite = True) : selcol = self.select_col(listact) self.actives = self.make_dico(selcol) - self.write01(self.dictpathout['mat01'], self.actives, selcol) + self.write01(self.pathout['mat01.csv'], self.actives, selcol) if listsup is not None : selcol = self.select_col(listsup) self.sups = self.make_dico(selcol) @@ -247,7 +283,7 @@ class Tableau() : UpdateDico(self.sups, forme, i) else: UpdateDico(self.actives, forme, i) - self.listactives = [val for val in self.actives if self.actives[val][0] >= self.parametre['mineff']] + self.listactives = [val for val in self.actives if self.actives[val][0] >= self.parametres['mineff']] table = [['0' for i in range(len(self.listactives))] for j in range(self.rownb)] for i, val in enumerate(self.listactives) : for j, line in enumerate(self.linecontent) : @@ -259,12 +295,12 @@ class Tableau() : with open(fileout, 'w') as f: f.write('\n'.join([';'.join(line) for line in table])) - def printtable(self, filename, Table): + def printtable(self, filename, Table, sep = ';'): with open(filename, 'w') as f : - f.write('\n'.join([';'.join(line) for line in Table])) + f.write('\n'.join([sep.join(line) for line in Table])) def buildprofil(self) : - with open(self.dictpathout['uce'], 'rU') as filein : + with open(self.pathout['uce'], 'rU') as filein : content = filein.readlines() content.pop(0) lsucecl = [] @@ -291,7 +327,7 @@ class Tableau() : if cl == i + 1 : if active in self.linecontent[uce]: line[i + 1] += 1 - if sum(line[1:]) > self.parametre['mineff']: + if sum(line[1:]) > self.parametres['mineff']: tablecont.append([line[0]] + [`don` for don in line if type(don) == type(1)]) tablecontet = [] @@ -306,8 +342,8 @@ class Tableau() : line[i + 1] += 1 tablecontet.append([line[0]] + [`don` for don in line if type(don) == type(1)]) - self.printtable(self.dictpathout['ContEtOut'], tablecontet) - self.printtable(self.dictpathout['Contout'], tablecont) + self.printtable(self.pathout['ContEtOut'], tablecontet) + self.printtable(self.pathout['Contout'], tablecont) def get_colnames(self) : return self.colnames[:] diff --git a/tabrsimple.py b/tabrsimple.py index dd81048..2db7c5a 100644 --- a/tabrsimple.py +++ b/tabrsimple.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2011 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ffr, FFF from functions import exec_rcode, check_Rresult diff --git a/tabsimi.py b/tabsimi.py index b34a107..4d64840 100644 --- a/tabsimi.py +++ b/tabsimi.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2009-2010 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ConstructPathOut, simipath, ffr, PathOut from functions import print_liste, exec_rcode, read_list_file, check_Rresult, indices_simi, treat_var_mod @@ -12,10 +12,6 @@ from analysematrix import AnalyseMatrix from PrintRScript import PrintSimiScript from listlex import * import wx -#if wx.__version__ >= '2.11' : -# import wx.lib.agw.aui as aui -#else : -# import aui import os import tempfile import datetime @@ -25,17 +21,26 @@ from uuid import uuid4 class DoSimi(AnalyseMatrix): - def __init__(self, parent, param = None, isopen = False, fromprof = False, pathout = False, filename ='', gparent = False, wordgraph = False, listactives = False, actives = False, cmd = False, openfromprof=False, tableau = None): + def doanalyse(self) : + self.fromprof = self.parametres.get('fromprof', False) + self.wordgraph = self.parametres.get('wordgraph', False) + self.listactives = self.parametres.get('listactives', False) + self.actives = self.parametres.get('actives', False) + self.openfromprof = self.parametres.get('openfromprof', False) + self.cmd = self.parametres.get('cmd', False) + self.dirout = self.parametres.get('pathout', False) + #parent, matrix = None, parametres = None, isopen = False, fromprof = False, pathout = False, filename ='', gparent = False, wordgraph = False, listactives = False, actives = False, cmd = False, openfromprof=False, tableau = None): #------------------------------------------------------------------- - self.fromprof = fromprof - self.wordgraph = wordgraph - self.listactives = listactives - self.actives = actives - self.openfromprof = openfromprof - self.cmd = cmd - self.dirout = pathout - if param is not None and fromprof: - self.paramsimi = param + # self.fromprof = fromprof + # self.wordgraph = wordgraph + # self.listactives = listactives + # self.actives = actives + # self.openfromprof = openfromprof + # self.cmd = cmd + # self.dirout = pathout + # if parametres is not None and fromprof: + if self.fromprof: + self.paramsimi = parametres else : self.paramsimi = {'coeff' : 0, 'layout' : 2, @@ -75,24 +80,21 @@ class DoSimi(AnalyseMatrix): # self.parent = parent.parent # self.Source = parent #else : - self.parent = parent self.Source = None - if pathout : - self.pathout = PathOut(dirout = pathout) + if self.dirout : + self.pathout = PathOut(dirout = self.dirout) - self.RPath = self.parent.PathPath.get('PATHS', 'rpath') - if not isopen : + #self.RPath = self.parent.PathPath.get('PATHS', 'rpath') + if not self.parametres.get('isopen', False) : #if not fromprof : # self.tableau = self.parent.tableau #else : # self.tableau = parent.tableau - if tableau is not None : - self.tableau = tableau - else : + if self.tableau is None : self.tableau = parent.tableau - self.tableau.parametre['mineff'] = 0 - if not fromprof : - dialcol = FreqDialog(self.parent, -1, self.tableau.get_colnames(), u"Sélectionnez les colonnes", size=(600, 250)) + self.tableau.parametres['mineff'] = 0 + if not self.fromprof : + dialcol = FreqDialog(self.parent, -1, self.tableau.get_colnames(), _(u"Select columns").decode('utf8'), size=(600, 250)) dialcol.CenterOnParent() res = dialcol.ShowModal() else : @@ -108,7 +110,7 @@ class DoSimi(AnalyseMatrix): self.tableau.actives = dict(actives) self.tableau.make_listactives() actives = dict([[i, val] for i, val in enumerate(actives)]) - self.dial = PrefSimi(parent, -1, self.paramsimi, self.indices, wordlist = actives) + self.dial = PrefSimi(self.parent, -1, self.paramsimi, self.indices, wordlist = actives) self.dial.CenterOnParent() self.val = self.dial.ShowModal() if self.val == wx.ID_OK : @@ -122,31 +124,39 @@ class DoSimi(AnalyseMatrix): self.column = [self.tableau.listactives.index(val) for val in indexes] self.column.sort() self.paramsimi = self.make_param() - self.parametres = self.paramsimi - self.parametres['type'] = 'simimatrix' - if not pathout : - self.parametres['pathout'] = ConstructPathOut(self.tableau.parametre['filename'], 'SimiMatrix') + self.parametres.update(self.paramsimi) + #self.parametres['type'] = 'simimatrix' + if not self.pathout : + self.parametres['pathout'] = ConstructPathOut(self.parametres['pathout'], 'SimiMatrix') + print self.parametres['pathout'] else : self.parametres['pathout'] = self.dirout - self.parametres['filename'] = self.tableau.parametres['filename'] + self.pathout.createdir(self.parametres['pathout']) + self.pathout.dirout = self.parametres['pathout'] + #self.parametres['filename'] = self.tableau.parametres['filename'] self.dial.Destroy() - dlg = wx.ProgressDialog("Traitements", - "Veuillez patienter...", - maximum=4, - parent=self.parent, - style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME - ) - dlg.Center() - AnalyseMatrix.__init__(self, parent, self.tableau, self.paramsimi, dlg = dlg) + self.doanalyse2() + #dlg = wx.ProgressDialog("Traitements", + # "Veuillez patienter...", + # maximum=4, + # parent=self.parent, + # style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME + # ) + #dlg.Center() + #AnalyseMatrix.__init__(self, parent, self.tableau, self.paramsimi, dlg = dlg) else : self.dial.Destroy() + self.parametres = None + return False else : dialcol.Destroy() + self.parametres = None + return False - def doanalyse(self) : + def doanalyse2(self) : self.pathout.basefiles(simipath) with open(self.pathout['selected.csv'], 'w') as f : - f.write('\n'.join([`val` for val in self.column])) + f.write('\n'.join([`val` for val in self.column])) count = 1 keepGoing = self.dlg.Update(count) @@ -157,7 +167,7 @@ class DoSimi(AnalyseMatrix): #-------------------------------------------------------- count += 1 #if not self.fromprof : - #self.pathout = ConstructPathOut(self.tableau.parametre['filename'], 'Simi') + #self.pathout = ConstructPathOut(self.tableau.parametres['filename'], 'Simi') #self.DictPathOut = construct_simipath(self.pathout) self.tableau.dictpathout = self.pathout self.dlg.Update(count, u"passage en O/1") @@ -190,35 +200,6 @@ class DoSimi(AnalyseMatrix): fromprof = True else: fromprof = False - #OpenAnalyse(self.parent, self.DictPathOut['ira'], False, simifromprof=fromprof) -# else : -# self.tableau = gparent.tableau -# if 'corpus' in dir(gparent) : -# self.Source = gparent -# self.tableau.parametre['mineff'] = 0 -# self.DictPathOut = construct_simipath(os.path.abspath(os.path.dirname(filename))) -# self.dolayout() -# self.paramsimi['first'] = False -# self.paramsimi['coeff'] = int(param.get('simi', 'indice')) -# self.paramsimi['layout'] = int(param.get('simi', 'layout')) -# self.paramsimi['seuil_ok'] = param.getboolean('simi', 'seuil_ok') -# self.paramsimi['seuil'] = int(param.get('simi', 'seuil')) -# if param.get('simi', 'wordgraph') == 'False' : -# self.wordgraph = False -# else : -# self.wordgraph = param.get('simi', 'wordgraph') -# if 'listet' in dir(self.tableau) : -# self.paramsimi['stars'] = self.tableau.listet -# self.paramsimi['bystar'] = False -# self.paramsimi['cexfromchi'] = True -# self.paramsimi['tvprop'] = False -# self.paramsimi['sfromchi'] = False -# self.paramsimi['coeff_te'] = True -# self.paramsimi['coeff_tv'] = True -# self.paramsimi['coeff_tv_nb'] = 0 -# self.paramsimi['label_e'] = False -# self.paramsimi['width'] = 1000 -# self.paramsimi['height'] = 1000 def make_param(self) : @@ -266,31 +247,14 @@ class DoSimi(AnalyseMatrix): if 'sfromchi' in self.paramsimi : paramsimi['sfromchi'] = self.dial.checki.GetValue() if 'vlabcolor' in self.paramsimi : - paramsimi['vlabcolor'] = self.paramsimi['vlabcolor'] + paramsimi['vlabcolor'] = self.paramsimi['vlabcolor'] if 'check_bystar' in dir(self.dial) : paramsimi['bystar'] = self.dial.check_bystar.GetValue() paramsimi['stars'] = self.paramsimi['stars'] + if 'tmpchi' in self.paramsimi : + paramsimi['tmpchi'] = self.paramsimi['tmpchi'] return paramsimi -# def make_ira(self): -# self.tableau.save_tableau(self.DictPathOut['db']) -# conf = RawConfigParser() -# conf.read(self.DictPathOut['ira']) -# if not 'simi' in conf.sections() : -# conf.add_section('simi') -# date = datetime.datetime.now().ctime() -# if self.fromprof : -# conf.set('simi', 'corpus', self.Source.corpus.parametres['uuid']) -# conf.set('simi', 'uuid', str(uuid4())) -# conf.set('simi', 'date', str(date)) -# conf.set('simi', 'indice', self.paramsimi['coeff']) -# conf.set('simi','layout', self.paramsimi['layout']) -# conf.set('simi', 'seuil_ok', self.paramsimi['seuil_ok']) -# conf.set('simi', 'seuil', str(self.paramsimi['seuil'])) -# conf.set('simi', 'wordgraph', self.wordgraph) -# fileout = open(self.DictPathOut['ira'], 'w') -# conf.write(fileout) -# fileout.close() # def addgraph(self) : if self.parametres['type_graph'] == 1: @@ -307,14 +271,14 @@ class DoSimi(AnalyseMatrix): print_liste(self.DictPathOut['liste_graph'], graph_simi) def DoR(self, dlg): - if self.paramsimi['type'] == 1 : + if self.paramsimi['type_graph'] == 1 : graph = False wait = False else : graph = True wait = True - pid = exec_rcode(self.RPath, self.tmpfile, wait = wait, graph = graph) - if self.paramsimi['type'] == 1 : + pid = exec_rcode(self.ira.RPath, self.tmpfile, wait = wait, graph = graph) + if self.paramsimi['type_graph'] == 1 : while pid.poll() == None : if not self.cmd : dlg.Pulse(u'R ...') diff --git a/tabstudent.py b/tabstudent.py index fd34232..26e464f 100644 --- a/tabstudent.py +++ b/tabstudent.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ffr diff --git a/tabverges.py b/tabverges.py index 12f4a61..8ba6619 100644 --- a/tabverges.py +++ b/tabverges.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2012 Pierre Ratinaud -#Lisense: GNU GPL +#License: GNU GPL import os import string @@ -60,7 +60,7 @@ class Prototypical(AnalyseMatrix) : self.makedatas(table_assoc, table_rank) self.DoR() else : - return 'stop' + return 'stop' def dotable(self) : table_assoc = self.tableau.select_col(self.ColSel1) @@ -71,15 +71,16 @@ class Prototypical(AnalyseMatrix) : words = {} for i in range(0, len(table_assoc)) : for j, word in enumerate(table_assoc[i]) : - if word in words : - words[word][0] += 1 - if table_rank[i][j] != '' : - words[word][1].append(int(table_rank[i][j])) - else : - if table_rank[i][j] != '' : - words[word] = [1, [int(table_rank[i][j])]] + if word.strip() != "" : + if word in words : + words[word][0] += 1 + if table_rank[i][j] != '' : + words[word][1].append(float(table_rank[i][j])) else : - words[word] = [1, []] + if table_rank[i][j] != '' : + words[word] = [1, [float(table_rank[i][j])]] + else : + words[word] = [1, []] res = [[word, words[word][0], float(sum(words[word][1])) / len(words[word][1])] for word in words if len(words[word][1]) != 0 and words[word][0] >= self.parametres['freqmin']] with open(self.pathout['table.csv'], 'w') as f : f.write('\n'.join(['\t'.join(['"' + val[0] +'"', `val[1]`, `val[2]`]) for val in res])) diff --git a/textafcuci.py b/textafcuci.py index 33315a6..d030413 100644 --- a/textafcuci.py +++ b/textafcuci.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud #Copyright (c) 2008-2009 Pierre Ratinaud -#Lisense: GNU/GPL +#License: GNU/GPL from chemins import ConstructPathOut, ConstructAfcUciPath from layout import GraphPanel diff --git a/textaslexico.py b/textaslexico.py index 1f3e32f..000e8d0 100644 --- a/textaslexico.py +++ b/textaslexico.py @@ -3,19 +3,20 @@ #Copyright (c) 2008-2011 Pierre Ratinaud #License: GNU/GPL -from chemins import ConstructPathOut, StatTxtPathOut +from chemins import ConstructPathOut, StatTxtPathOut, PathOut #from corpus import Corpus from analysetxt import AnalyseText import wx import os -import sys -from listlex import * -from functions import exec_rcode, progressbar, check_Rresult, CreateIraFile, print_liste, treat_var_mod, write_tab, DoConf -from dialog import OptLexi, StatDialog -from openanalyse import OpenAnalyse +#import sys +#from listlex import * +from functions import exec_rcode, progressbar, check_Rresult, CreateIraFile, print_liste, treat_var_mod, write_tab, DoConf, TGen +from dialog import OptLexi#, StatDialog +#from openanalyse import OpenAnalyse import tempfile -from ConfigParser import RawConfigParser -from guifunct import getPage, getCorpus +#from ConfigParser import RawConfigParser +#from guifunct import getPage, getCorpus +from PrintRScript import TgenSpecScript from time import sleep import logging @@ -206,3 +207,25 @@ class Lexico(AnalyseText) : print_liste(self.dictpathout['liste_graph_afcf'],afcf_graph_list) print_liste(self.dictpathout['liste_graph_afct'],afct_graph_list) #DoConf().makeoptions(['spec'],[self.parametres], self.dictpathout['ira']) + +class TgenSpec(AnalyseText): + def __init__(self, ira, corpus, parametres): + self.ira = ira + self.corpus = corpus + self.parametres = parametres + self.pathout = PathOut(dirout = self.parametres['pathout']) + self.doanalyse() + + def doanalyse(self): + self.tgen = TGen(path = self.parametres['tgenpath']) + self.tgen.read(self.tgen.path) + tgenocc, totocc = self.corpus.make_tgen_table(self.tgen, self.parametres['etoiles']) + self.parametres['tgeneff'] = os.path.join(self.parametres['pathout'], 'tgeneff.csv') + self.tgen.writetable(self.parametres['tgeneff'], tgenocc, totocc) + self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenspec.csv') + self.Rscript = TgenSpecScript(self) + self.Rscript.make_script() + self.Rscript.write() + self.doR(self.Rscript.scriptout, dlg = False, message = 'R...') + + \ No newline at end of file -- 2.7.4