chronological analysis of clusters
authorPierre Ratinaud <ratinaud@univ-tlse2.fr>
Mon, 27 Mar 2017 07:39:55 +0000 (09:39 +0200)
committerPierre Ratinaud <ratinaud@univ-tlse2.fr>
Mon, 27 Mar 2017 07:39:55 +0000 (09:39 +0200)
PrintRScript.py
ProfList.py
Rscripts/date_ok.R [new file with mode: 0644]

index be81a14..a2dde7f 100644 (file)
@@ -940,7 +940,7 @@ class PrintSimiScript(PrintRScript) :
         if self.parametres['film'] : 
             txt += """
             film <- "%s"
-            """ % self.pathout['film']
+            """ % ffr(self.pathout['film'])
         else : 
             txt += """
             film <- NULL
@@ -1207,11 +1207,12 @@ class ExportAfc(PrintRScript) :
         """
 
 class MergeGraphes(PrintRScript) :
-    def __init__(self, parametres):
+    def __init__(self, analyse):
         self.script = u"#Script genere par IRaMuTeQ - %s\n" % datetime.now().ctime()
         self.pathout = PathOut()
-        self.parametres = parametres
+        self.parametres = analyse.parametres
         self.scriptout = self.pathout['temp']
+        self.analyse = analyse 
 
     def make_script(self) :
         #FIXME
@@ -1227,7 +1228,7 @@ class MergeGraphes(PrintRScript) :
         V(g)$weight <- (graph.simi$mat.eff/nrow(dm))*100
         graphs[['%s']] <- g
         """
-        for i, graph in enumerate(self.parametres['lgraphes']) :
+        for i, graph in enumerate(self.parametres['graphs']) :
             path = os.path.dirname(graph)
             gname = ''.join(['g', `i`])
             RData = os.path.join(path,'RData.RData')
@@ -1331,4 +1332,142 @@ class LabbeScript(PrintRScript) :
         self.add(txt)
         self.write()
 
+class ChronoChi2Script(PrintRScript) :
+    def make_script(self) :
+        self.sources([self.analyse.parent.RscriptsPath['Rgraph']])
+        print self.parametres
+        txt = """
+        inRData <- "%s"
+        dendrof <- "%s"
+        load(inRData)
+        load(dendrof)
+        """ % (self.pathout['RData.RData'], self.pathout['dendrogramme.RData'])
+        txt += """
+        svg <- %s
+        """ % self.parametres['svg']
+        txt += """
+        tc <- which(grepl("%s",rownames(chistabletot)))
+        rn <- rownames(chistabletot)[tc]
+        tc <- tc[order(rn)]
+        dpt <- chistabletot[tc,]
+        tot <- afctable[tc,]
+        tcp <- rowSums(tot)
+        ptc <- tcp/sum(tcp)
+        dpt <- t(dpt)
+        dd <- dpt
+        """ % self.parametres['var']
+        txt += """
+        classes <- n1[,ncol(n1)]
+        tcl <- table(classes)
+        if ('0' %in% names(tcl)) {
+            to.vire <- which(names(tcl) == '0')
+            tcl <- tcl[-to.vire]
+        }
+        tclp <- tcl/sum(tcl)
+
+        #chi2 colors
+        library(ape)
+        k <- 1e-02
+        lcol <- NULL
+        lk <- k
+        for (i in 1:5) {
+            lcol <- c(lcol, qchisq(1-k,1))
+            k <- k/10
+            lk <- c(lk,k)
+        }
+        lcol <- c(3.84, lcol)
+        lcol <- c(-Inf,lcol)
+        lcol <- c(lcol, Inf)
+        lk <- c(0.05,lk)
+        breaks <- lcol
+        alphas <- seq(0,1, length.out=length(breaks))
+        clod <- rev(as.numeric(tree.cut1$tree.cl$tip.label))
+        #end
+        """
+        txt += """
+        open_file_graph("%s", w=%i, h=%i, svg=svg)
+        """ % (ffr(self.parametres['tmpgraph']), self.parametres['width'], self.parametres['height'])
+        txt += """
+        par(mar=c(3,3,3,3))
+        mat.graphic <- matrix(c(rep(1,nrow(dd)),c(2:(nrow(dd)+1))), ncol=2)
+        mat.graphic <- rbind(mat.graphic, c(max(mat.graphic) + 1 , max(mat.graphic) + 2))
+        hauteur <- tclp[clod] * 0.9
+        heights.graphic <- append(hauteur, 0.1)
+        layout(mat.graphic, heights=heights.graphic, widths=c(0.15,0.85))
+        par(mar=c(0,0,0,0))
+        tree.toplot <- tree.cut1$tree.cl
+        tree.toplot$tip.label <- paste('classe ', tree.toplot$tip.label)
+        plot.phylo(tree.toplot,label.offset=0.1, cex=1.1, no.margin=T)
+        for (i in clod) {
+            print(i)
+            par(mar=c(0,0,0,0))
+            lcol <- cut(dd[i,], breaks, include.lowest=TRUE)
+            ulcol <- names(table(lcol))
+            lcol <- as.character(lcol)
+            for (j in 1:length(ulcol)) {
+                lcol[which(lcol==ulcol[j])] <- j
+            }
+            lcol <- as.numeric(lcol)
+            mcol <- rainbow(nrow(dd))[i]
+            last.col <- NULL
+            for (k in alphas) {
+                last.col <- c(last.col, rgb(r=col2rgb(mcol)[1]/255, g=col2rgb(mcol)[2]/255, b=col2rgb(mcol)[3]/255, a=k))
+            }
+            #print(last.col)
+
+            barplot(rep(1,ncol(dd)), width=ptc, names.arg=FALSE, axes=FALSE, col=last.col[lcol], border=rgb(r=0, g=0, b=0, a=0.3))
+        }
+        plot(0,type='n',axes=FALSE,ann=FALSE)
+        label.coords <- barplot(rep(1, ncol(dd)), width=ptc, names.arg = F, las=2, axes=F, ylim=c(0,1), plot=T, col='white')
+        text(x=label.coords, y=0.5, labels=rn[order(rn)], srt=90)
+        dev.off()
+        """
+        self.add(txt)
+        self.write()
+
+class ChronoPropScript(PrintRScript) :
+    def make_script(self) :
+        self.sources([self.analyse.parent.RscriptsPath['Rgraph']])
+        print self.parametres
+        txt = """
+        inRData <- "%s"
+        dendrof <- "%s"
+        load(inRData)
+        load(dendrof)
+        """ % (self.pathout['RData.RData'], self.pathout['dendrogramme.RData'])
+        txt += """
+        svg <- %s
+        """ % self.parametres['svg']
+        txt += """
+        tc <- which(grepl("%s",rownames(chistabletot)))
+        rn <- rownames(chistabletot)[tc]
+        tc <- tc[order(rn)]
+        dpt <- chistabletot[tc,]
+        tot <- afctable[tc,]
+        tcp <- rowSums(tot)
+        ptc <- tcp/sum(tcp)
+        dpt <- t(dpt)
+        dd <- dpt
+        """ % self.parametres['var']
+        txt += """
+        classes <- n1[,ncol(n1)]
+        tcl <- table(classes)
+        if ('0' %in% names(tcl)) {
+            to.vire <- which(names(tcl) == '0')
+            tcl <- tcl[-to.vire]
+        }
+        tclp <- tcl/sum(tcl)
+        """
+        txt += """
+        open_file_graph("%s", w=%i, h=%i, svg=svg)
+        """ % (ffr(self.parametres['tmpgraph']), self.parametres['width'], self.parametres['height'])
+        txt+= """
+        ptt <- prop.table(as.matrix(tot), 1)
+        par(mar=c(10,2,2,2))
+        barplot(t(ptt)[as.numeric(tree.cut1$tree.cl$tip.label),], col=rainbow(ncol(ptt))[as.numeric(tree.cut1$tree.cl$tip.label)], width=ptc, las=3, space=0.05, cex.axis=0.7, border=NA)
+        dev.off()
+        """
+        self.add(txt)
+        self.write()
+
 
index 7bf1290..da8d316 100644 (file)
@@ -18,7 +18,7 @@ import  wx
 import  wx.lib.mixins.listctrl  as  listmix
 from listlex import ListForSpec
 from chemins import ConstructPathOut, ffr
-from dialog import PrefUCECarac, SearchDial, message, BarFrame
+from dialog import PrefUCECarac, SearchDial, message, BarFrame, ChronoFrame
 from tableau import copymatrix
 from search_tools import SearchFrame
 import webbrowser
@@ -91,7 +91,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
         self.attr2 = wx.ListItemAttr()
         self.attr2.SetBackgroundColour((190, 249, 236))
         self.attr2s = wx.ListItemAttr()
-        self.attr2s.SetBackgroundColour((211, 252, 244))        
+        self.attr2s.SetBackgroundColour((211, 252, 244))
         self.attr3 = wx.ListItemAttr()
         self.attr3.SetBackgroundColour((245, 180, 180))
         self.attr3s = wx.ListItemAttr()
@@ -106,7 +106,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
         self.InsertColumn(5, "Type", wx.LIST_FORMAT_RIGHT)
         self.InsertColumn(6, "forme", wx.LIST_FORMAT_RIGHT)
         self.InsertColumn(7, "p", wx.LIST_FORMAT_RIGHT)
-        
+
 
         self.SetColumnWidth(0, 60)
         self.SetColumnWidth(1, 70)
@@ -122,7 +122,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
         self.itemDataMap = dictdata
         self.itemIndexMap = dictdata.keys()
         self.SetItemCount(len(dictdata))
-        
+
         #mixins
         listmix.ListCtrlAutoWidthMixin.__init__(self)
         listmix.ColumnSorterMixin.__init__(self, len(classen[0]))
@@ -221,7 +221,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
         items = list(self.itemDataMap.keys())
         items.sort(sorter)
         self.itemIndexMap = items
-        
+
         # redraw the list
         self.Refresh()
 
@@ -275,6 +275,8 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
                 self.idexporttropes = wx.NewId()
                 self.idexportowledge = wx.NewId()
                 self.onmaketgen = wx.NewId()
+                self.onchronochi2 = wx.NewId()
+                self.onchronoprop = wx.NewId()
             #    self.export_classes = wx.NewId()
    
                 self.Bind(wx.EVT_MENU, self.OnPopupOne, id=self.popupID1)
@@ -300,6 +302,8 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
                 self.Bind(wx.EVT_MENU, self.onexporttropes, id = self.idexporttropes)
                 self.Bind(wx.EVT_MENU, self.onexportowledge, id = self.idexportowledge)
                 self.Bind(wx.EVT_MENU, self.OnMakeTgen, id=self.onmaketgen)
+                self.Bind(wx.EVT_MENU, self.OnChronoChi2, id=self.onchronochi2)
+                self.Bind(wx.EVT_MENU, self.OnChronoProp, id=self.onchronoprop)
              #  self.Bind(wx.EVT_MENU, self.on_export_classes, id = self.export_classes)
    #            self.Bind(wx.EVT_MENU, self.OnPopupThree, id=self.popupID3)
     
@@ -309,9 +313,13 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             menu.Append(self.idtablex, _(u"Chi2 by cluster").decode('utf8'))
             menu.Append(self.idlexdendro, _(u"Chi2 by cluster on dendrogram").decode('utf8'))
             menu.Append(self.idchimod, _(u"Chi2 modalities of variable").decode('utf8'))
+            menu_chrono = wx.Menu()
+            menu_chrono.Append(self.onchronochi2, _(u'Chi2').decode('utf8'))
+            menu_chrono.Append(self.onchronoprop, _(u'Proportion').decode('utf8'))
+            menu.AppendMenu(-1, _(u"Chronological view").decode('utf8'), menu_chrono)
             menu.Append(self.idwordgraph, _(u"Word graph").decode('utf8'))
             #menu.Append(self.export_classes, u"Exporter le corpus...") 
-            
+
             #menu.Append(self.popupID10, u"Spécificités")
 
             menu_conc = wx.Menu()
@@ -320,7 +328,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             menu_conc.Append(self.popupID4, _(u"In all segments").decode('utf8'))
             menu.AppendMenu(-1, _(u"Concordance").decode('utf8'), menu_conc)
             menu.Append(self.onmaketgen, _(u"Make Tgen").decode('utf8'))
-            menu_cnrtl = wx.Menu()      
+            menu_cnrtl = wx.Menu()
             menu_cnrtl.Append(self.popupID5, _(u"Definition").decode('utf8'))
             menu_cnrtl.Append(self.popupID6, _(u"Etymology").decode('utf8'))
             menu_cnrtl.Append(self.popupID7, _(u"Synonymous").decode('utf8'))
@@ -338,7 +346,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             menu.Append(self.idexportowledge, _('Exporter for Owledge').decode('utf8'))
             #menu.Append(self.popupID2, u"Concordancier")
     #        menu.Append(self.popupID3, "recharger")
-    
+
             self.PopupMenu(menu)
             menu.Destroy()
         elif 'tableau' in dir(self.Source) :
@@ -380,7 +388,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
         dial = wx.MessageDialog(self, self.Source.pathout['classe_%i_export.txt' % self.cl], u"Export", wx.OK|wx.ICON_INFORMATION)
         dial.ShowModal()
         dial.Destroy()
-    
+
     def onexporttropes(self, evt) :
         if 'corpus' in dir(self.Source):
             corpus = self.Source.corpus
@@ -390,7 +398,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             uci = True
         fileout = self.Source.pathout['export_tropes_classe_%i.txt' % self.cl]
         corpus.export_tropes(fileout, self.cl, uci = uci)
-    
+
     def onexportowledge(self, evt):
         if 'corpus' in dir(self.Source):
             corpus = self.Source.corpus
@@ -401,7 +409,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
         repout = self.Source.pathout['export_owledge_classe_%i' % self.cl]
         if not os.path.exists(repout) :
             os.mkdir(repout)
-        corpus.export_owledge(repout, self.cl, uci = uci)        
+        corpus.export_owledge(repout, self.cl, uci = uci)
 
     def getselectedwords(self) :
         words = [self.getColumnText(self.GetFirstSelected(), 6)]
@@ -411,7 +419,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             words.append(self.getColumnText(last, 6))
         return words
 
-    def quest_var_mod(self, evt) :  
+    def quest_var_mod(self, evt) :
         word = self.getselectedwords()[0]
         if len(word.split('_')) <= 1 :
             dial = wx.MessageDialog(self, _(u"This is not a variable_modality form").decode('utf8'), _(u"Problem").decode('utf8'), wx.OK | wx.ICON_WARNING)
@@ -419,7 +427,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             dial.ShowModal()
             dial.Destroy()
             return
-            
+
         if 'corpus' in dir(self.Source):
             corpus = self.Source.corpus
             if word.startswith(u'-*') :
@@ -452,7 +460,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             dial.CenterOnParent()
             dial.ShowModal()
             dial.Destroy()
-            return            
+            return
         words.sort()
         tableout = []
         kwords = []
@@ -462,6 +470,85 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
                 kwords.append(word)
         BarFrame(self.Source.parent, tableout, title, kwords)
 
+    def OnChronoChi2(self, evt) :
+        word = self.getselectedwords()[0]
+        if len(word.split('_')) <= 1 :
+            dial = wx.MessageDialog(self, _(u"This is not a variable_modality form").decode('utf8'), _(u"Problem").decode('utf8'), wx.OK | wx.ICON_WARNING)
+            dial.CenterOnParent()
+            dial.ShowModal()
+            dial.Destroy()
+            return
+
+        if 'corpus' in dir(self.Source):
+            corpus = self.Source.corpus
+            if word.startswith(u'-*') :
+                if self.them_mod == {} :
+                    self.them_mod = self.Source.corpus.make_theme_dict()
+                var_mod = self.them_mod
+            else :
+                if self.var_mod == {} :
+                    self.var_mod = self.Source.corpus.make_etoiles_dict()
+                var_mod = self.var_mod
+        else :
+            corpus = self.Source.tableau
+            if self.var_mod == {} :
+                self.var_mod = treat_var_mod([val for val in corpus.actives] + [val for val in corpus.sups])
+            var_mod = self.var_mod
+        var = word.split('_')
+        #words = ['_'.join([var[0],word]) for word in self.var_mod[var[0]]]
+        try :
+            words = [word for word in var_mod[var[0]]]
+        except KeyError:
+            dial = wx.MessageDialog(self, _(u"This is not a meta-data").decode('utf8'), _(u"Problem").decode('utf8'), wx.OK | wx.ICON_WARNING)
+            dial.CenterOnParent()
+            dial.ShowModal()
+            dial.Destroy()
+            return
+        words.sort()
+        vartoplot = var[0] + '_'
+        parametres = {'var' : vartoplot}
+        ChronoFrame(self.Source.parent, parametres, self.Source.pathout, which = 'chi2')
+
+    def OnChronoProp(self, evt) :
+        word = self.getselectedwords()[0]
+        if len(word.split('_')) <= 1 :
+            dial = wx.MessageDialog(self, _(u"This is not a variable_modality form").decode('utf8'), _(u"Problem").decode('utf8'), wx.OK | wx.ICON_WARNING)
+            dial.CenterOnParent()
+            dial.ShowModal()
+            dial.Destroy()
+            return
+
+        if 'corpus' in dir(self.Source):
+            corpus = self.Source.corpus
+            if word.startswith(u'-*') :
+                if self.them_mod == {} :
+                    self.them_mod = self.Source.corpus.make_theme_dict()
+                var_mod = self.them_mod
+            else :
+                if self.var_mod == {} :
+                    self.var_mod = self.Source.corpus.make_etoiles_dict()
+                var_mod = self.var_mod
+        else :
+            corpus = self.Source.tableau
+            if self.var_mod == {} :
+                self.var_mod = treat_var_mod([val for val in corpus.actives] + [val for val in corpus.sups])
+            var_mod = self.var_mod
+        var = word.split('_')
+        #words = ['_'.join([var[0],word]) for word in self.var_mod[var[0]]]
+        try :
+            words = [word for word in var_mod[var[0]]]
+        except KeyError:
+            dial = wx.MessageDialog(self, _(u"This is not a meta-data").decode('utf8'), _(u"Problem").decode('utf8'), wx.OK | wx.ICON_WARNING)
+            dial.CenterOnParent()
+            dial.ShowModal()
+            dial.Destroy()
+            return
+        words.sort()
+        vartoplot = var[0] + '_'
+        parametres = {'var' : vartoplot}
+        ChronoFrame(self.Source.parent, parametres, self.Source.pathout, which = 'prop')
+
+
     def quest_simi(self, evt) :
         tableau = self.Source.tableau
         tab = tableau.make_table_from_classe(self.cl, self.la)
@@ -530,7 +617,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             with open(self.tmpchi, 'w') as f:
                 f.write('\n'.join([str(val) for val in self.lchi]))
         index = self.la.index(word)
-        parametres = {'type' : 'clustersimitxt', 
+        parametres = {'type' : 'clustersimitxt',
                         'pathout' : self.Source.parametres['pathout'],
                         'word' : index ,
                         'lem' : self.Source.parametres['lem'],
@@ -545,7 +632,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             self.tmpchi = os.path.join(self.Source.parametres['pathout'],'chi_%i.csv' % self.cl)
             with open(self.tmpchi, 'w') as f:
                 f.write('\n'.join([str(val) for val in self.lchi]))
-        parametres = {'type' : 'clustersimitxt', 
+        parametres = {'type' : 'clustersimitxt',
                         'pathout' : self.Source.parametres['pathout'],
                         'lem' : self.Source.parametres['lem'],
                         'tmpchi' : self.tmpchi}
@@ -613,7 +700,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             #win.html = '<html>\n' + '<br>'.join(['<br>'.join([ucis_txt[i], '<table bgcolor = #1BF0F7 border=0><tr><td><b>score : %.2f</b></td></tr></table>' % ntab2[i][0], ucestxt[i]]) for i in range(0,len(ucestxt))]) + '\n</html>'
             #win.HtmlPage.SetPage(win.html)
             win.Show(True)
-    
+
     def on_tablex(self, evt):
         if 'corpus' in dir(self.Source):
             corpus = self.Source.corpus
@@ -658,12 +745,12 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             ListWord.append(self.getColumnText(last, 6))
         ucef = []
         if self.Source.parametres['classif_mode'] != 2 :
-            for word in ListWord : 
+            for word in ListWord :
                 uci = False
                 ucef += list(set(corpus.getlemuces(word)).intersection(uces))
         else :
-            for word in ListWord : 
-                ucef += list(set(corpus.getlemucis(word)).intersection(uces))            
+            for word in ListWord :
+                ucef += list(set(corpus.getlemucis(word)).intersection(uces))
                 uci = True
         ucis_txt, ucestxt = doconcorde(corpus, ucef, ListWord, uci = uci)
         items = dict([[i, '<br><br>'.join([ucis_txt[i], ucestxt[i]])] for i in range(0,len(ucestxt))])
@@ -676,13 +763,13 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
             uces = corpus.lc[self.cl-1]
             win = self.make_concord(uces, ' - '.join([_(u"Concordance").decode('utf8'), "Classe %i" % self.cl]))
             win.Show(True)
-    
+
     def OnPopupThree(self, event):
         corpus = self.Source.corpus
         uces = [classe[i] for classe in corpus.lc for i in range(0,len(classe))]
         win = self.make_concord(uces, ' - '.join([_(u"Concordance").decode('utf8'), _(u"Segments of this clustering").decode('utf8')]))
         win.Show(True)
-        
+
     def OnPopupFour(self, event):
         corpus = self.Source.corpus
         uces = [classe[i] for classe in corpus.lc for i in range(0,len(classe))] + corpus.lc0
@@ -694,22 +781,22 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
         lk = "http://www.cnrtl.fr/definition/" + word
         webbrowser.open(lk)
 
-    def OnPopupSix(self, event):  
+    def OnPopupSix(self, event):
         word = self.getColumnText(self.GetFirstSelected(), 6)
         lk = "http://www.cnrtl.fr/etymologie/" + word
         webbrowser.open(lk)
-        
-    def OnPopupSeven(self, event):        
+
+    def OnPopupSeven(self, event):
         word = self.getColumnText(self.GetFirstSelected(), 6)
         lk = "http://www.cnrtl.fr/synonymie/" + word
         webbrowser.open(lk)
-        
-    def OnPopupHeight(self, event):  
+
+    def OnPopupHeight(self, event):
         word = self.getColumnText(self.GetFirstSelected(), 6)
         lk = "http://www.cnrtl.fr/antonymie/" + word
         webbrowser.open(lk)
-        
-    def OnPopupNine(self, event):            
+
+    def OnPopupNine(self, event):
         word = self.getColumnText(self.GetFirstSelected(), 6)
         lk = "http://www.cnrtl.fr/morphologie/" + word
         webbrowser.open(lk)
@@ -744,9 +831,9 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
         #win.html = '<html>\n' + '<br>'.join([' : '.join([str(val) for val in forme]) for forme in rep]) + '\n</html>'
         #win.HtmlPage.SetPage(win.html)
         win.Show(True)
-    
+
     def OnMakeTgen(self, evt):
-        self.parent.tree.OnTgenEditor(self.getselectedwords())    
+        self.parent.tree.OnTgenEditor(self.getselectedwords())
 
 
 class wliste(wx.Frame):
@@ -767,7 +854,7 @@ class wliste(wx.Frame):
         self.SetAutoLayout(True)
         self.SetSizer(sizer_1)
         self.Layout()
-        
+
     def OnCloseMe(self, event):
         self.Close(True)
 
diff --git a/Rscripts/date_ok.R b/Rscripts/date_ok.R
new file mode 100644 (file)
index 0000000..05bac64
--- /dev/null
@@ -0,0 +1,337 @@
+
+
+load('mariagepourtousnov-jui_corpus_corpus_1/mariagepourtousnov-jui_corpus_alceste_2/RData.RData')
+load('mariagegaynov-jui_corpus_corpus_1/mariagegaynov-jui_corpus_alceste_3/RData.RData')
+load('mariagehomonov-jui_corpus_corpus_1/mariagehomonov-jui_corpus_alceste_2/RData.RData')
+
+dpt <- chistabletot[tc,]
+#dpt <- chistabletot[debet:nrow(chistabletot),]
+dd <-rownames(dpt)
+dd <- strptime(dd, "*date_%Y-%m-%d")
+dd <- strptime(dd, "%Y-%m-%d")
+dd <- cbind(as.character(dd), dpt)
+dd <- dd[order(dd[,1]),]
+dd <- add.missing.date(dd,c.dates = 1, datedeb=c(07,11,2012), datefin=c(31,07,2013))
+
+#tot <- afctable[debet:nrow(afctable),]
+tot <- afctable[tc,]
+tt <- rownames(tot)
+tt <- strptime(tt, "*date_%Y-%m-%d")
+tt <- strptime(tt, "%Y-%m-%d")
+tt <- cbind(as.character(tt), tot)
+tt <- tt[order(tt[,1]),]
+tt <- add.missing.date(tt, c.dates = 1, datedeb=c(07,11,2012), datefin=c(31,07,2013))
+
+rn <- tt[,1]
+tt <- tt[,-1]
+tt <- apply(tt, 2, as.numeric)
+rownames(tt) <- rn
+tcp <- rowSums(tt)
+ptc <- tcp/sum(tcp)
+
+ptt <- prop.table(as.matrix(tt), 1)
+
+tcl <- table(classes)
+z <- which(names(tcl)=="0")
+if (length(z) != 0) {tcl <- tcl[-z]}
+tclp <- tcl/sum(tcl)
+
+
+
+rn <- dd[,1]
+dd <- dd[,-1]
+dd <- apply(dd,2, as.numeric)
+rownames(dd) <- rn
+
+
+
+library(ape)
+
+
+tree1 <- tree.cut1$tree.cl
+tree1 <- compute.brlen(tree1)
+tree1 <- as.hclust(tree1)
+
+
+
+dd <- t(dd)
+
+cc <- dd
+cc[which(dd <= (-3.84))] <- 1
+cc[which((dd > (-3.84)) & (dd < 3.84))] <- 2
+cc[which(dd >= 3.84)] <- 3
+library(RColorBrewer)
+#col <- brewer.pal(3, 'Reds')
+#col <- c('red', 'green', 'blue')
+col <- c('black', 'black', 'red')
+col <- c('white', 'white', 'blue')
+col <- col[cc]
+
+
+clod <- rev(as.numeric(tree.cut1$tree.cl$tip.label))
+
+heatmap(as.matrix(dd[as.numeric(tree.cut1$tree.cl$tip.label),]), Colv=NA, Rowv=as.dendrogram(tree1), col=col)
+
+png('cl_dates_homo.png', h=1000, w=2500)
+alphas <- seq(0,1, length.out=length(breaks))
+#par(mfrow=c(nrow(dd),1))
+par(mar=c(3,3,3,3))
+layout(matrix(c(rep(1,nrow(dd)),c(2:(nrow(dd)+1)),c(rep(nrow(dd)+2, nrow(dd)))),ncol=3), heights=tclp[clod], widths=c(0.05,0.92,0.03))
+par(mar=c(0,0,0,0))
+plot.phylo(tree.cut1$tree.cl,label.offset=0.1)
+for (i in clod) {
+    print(i)
+    par(mar=c(0,0,0,0))
+    lcol <- cut(dd[i,], breaks, include.lowest=TRUE)
+    ulcol <- names(table(lcol))
+    lcol <- as.character(lcol)
+    for (j in 1:length(ulcol)) {
+        lcol[which(lcol==ulcol[j])] <- j
+    }
+    lcol <- as.numeric(lcol)
+
+    #lcol[which(lcol <= 9)] <- 1
+
+    mcol <- rainbow(nrow(dd))[i]
+    last.col <- NULL
+    for (k in alphas) {
+        last.col <- c(last.col, rgb(r=col2rgb(mcol)[1]/255, g=col2rgb(mcol)[2]/255, b=col2rgb(mcol)[3]/255, a=k))
+    }
+    print(last.col)
+
+    barplot(rep(1,ncol(dd)), width=ptc, names.arg=FALSE, axes=FALSE, col=last.col[lcol], border=rgb(r=0, g=0, b=0, a=0.3))
+#val2col(dd[i,], col=heat.colors(30)), border=NA)
+}
+plot.new()
+legend('right', as.character(lk), fill=last.col)
+
+dev.off()
+
+
+layout(matrix(c(rep(1,nrow(dd)),c(2:(nrow(dd)+1)),c(rep(nrow(dd)+2, nrow(dd)))),ncol=3), heights=tclp[clod], widths=c(0.05,0.92,0.03))
+par(mar=c(0,0,0,0))
+plot.phylo(tree.cut1$tree.cl,label.offset=0.1)
+ncol <- rainbow(nrow(dd))
+for (i in clod) {
+    print(i)
+    par(mar=c(0,0,0,0))
+    barplot(dd[i,], width=ptc, names.arg=FALSE, axes=FALSE, col=ncol[i])
+}
+
+
+
+
+vcol <- rainbow(nrow(dd))
+ncoli <- dd
+for (i in 1:nrow(dd)) {
+    lcol <- cut(dd[1,], breaks, include.lowest=TRUE)
+    ulcol <- names(table(lcol))
+    lcol <- as.character(lcol)
+    rlcol <- rank()
+    for (i in 1:length(ulcol)) {
+        lcol[which(lcol==ulcol[i])] <- i
+    }
+    lcol <- as.numeric(lcol)
+    for (j in 1:ncol(dd)) {
+        if (dd[i,j] < 3.84) {
+            ncoli[i,j] <- rgb(r=col2rgb(vcol[i])[1]/255, g=col2rgb(vcol[i])[2]/255, b=col2rgb(vcol[i])[3]/255, a=0.2)
+        } else {
+            ncoli[i,j] <- rgb(r=col2rgb(vcol[i])[1]/255, g=col2rgb(vcol[i])[2]/255, b=col2rgb(vcol[i])[3]/255, a=1)
+        }
+    }
+}
+
+barplot(t(ptt)[as.numeric(tree.cut1$tree.cl$tip.label),], col=rainbow(ncol(ptt))[as.numeric(tree.cut1$tree.cl$tip.label)], width=ptc, las=3, space=0.05, cex.axis=0.7, border=NA)
+
+layout(matrix(c(1:nrow(ptt)), nrow=1),  widths=ptc)
+od <- as.numeric(tree.cut1$tree.cl$tip.label)
+colod = rainbow(ncol(ptt))[od]
+for (i in 1:ncol(ptt)) {
+    par(mar=c(0,0,0,0))
+    barplot(as.matrix(ptt[i,od], ncol=1), col=colod, axes=FALSE)
+}
+
+k <- 1e-02
+lcol <- NULL
+lk <- k
+for (i in 1:5) {
+    lcol <- c(lcol, qchisq(1-k,1))
+    k <- k/10
+    lk <- c(lk,k)
+}
+lcol <- c(3.84, lcol)
+lcol <- c(-Inf,lcol)
+lcol <- c(lcol, Inf)
+lk <- c(0.05,lk)
+#lcol <- c(-rev(lcol), lcol)
+#lk <- c(-rev(lk), lk)
+#lcol <- c(min(dd), lcol)
+#lk <- c(1, lk)
+#breaks <- c(lcol, max(dd))
+breaks <- lcol
+
+lcol <- cut(dd[1,], breaks)
+ulcol <- names(table(lcol))
+lcol <- as.character(lcol)
+for (i in 1:length(ulcol)) {
+    lcol[which(lcol==ulcol[i])] <- i
+}
+lcol <- as.numeric(lcol)
+
+
+make.chi <- function(x) {
+    rs <- rowSums(x)
+    cs <- colSums(x)
+    n <- sum(x)
+    
+}
+
+
+
+select.chi.classe <- function(tablechi, nb, active = TRUE) {
+    rowkeep <- NULL
+    if (active & !is.null(debsup)) {
+        print(debsup)
+        print('###############################################################@')
+        tablechi <- tablechi[1:(debsup-1),]
+    } else if (!active & !is.null(debsup)) {
+        tablechi <- tablechi[debsup:(debet-1),]
+    }
+    if (nb > nrow(tablechi)) {
+        nb <- nrow(tablechi)
+    }
+    for (i in 1:ncol(tablechi)) {
+        rowkeep <- append(rowkeep,order(tablechi[,i], decreasing = TRUE)[1:nb])
+    }
+    rowkeep <- unique(rowkeep)
+    rowkeep
+}
+
+
+plot.dendro.cloud <- function(tree, classes, chisqtable, nbbycl = 60, type.dendro = "phylogram", max.cex=2, min.cex=0.3, from.cmd = FALSE, bw = FALSE, lab = NULL, do.cloud=FALSE) {
+    library(wordcloud)
+    library(ape)
+    classes<-classes[classes!=0]
+       classes<-as.factor(classes)
+       sum.cl<-as.matrix(summary(classes, maxsum=1000000))
+       sum.cl<-(sum.cl/colSums(sum.cl)*100)
+       sum.cl<-round(sum.cl,2)
+       sum.cl<-cbind(sum.cl,as.matrix(100-sum.cl[,1]))
+    sum.cl <- sum.cl[,1]
+    tree.order<- as.numeric(tree$tip.label)
+       vec.mat<-NULL
+
+       for (i in 1:length(sum.cl)) vec.mat<-append(vec.mat,1)
+       v<-2
+       for (i in 1:length(sum.cl)) {
+               vec.mat<-append(vec.mat,v)
+               v<-v+1
+       }
+       if (!do.cloud) {
+       layout(matrix(vec.mat,length(sum.cl),2), heights=tclp[clod], widths=c(0.15,0.85))
+       } else {
+               row.keep <- select.chi.classe(chisqtable, nbbycl)
+               toplot <- chisqtable[row.keep,]
+               lclasses <- list()
+               for (classe in 1:length(sum.cl)) {
+                       ntoplot <- toplot[,classe]
+                       ntoplot <- ntoplot[order(ntoplot, decreasing = TRUE)]
+                       ntoplot <- round(ntoplot, 0)
+                       ntoplot <- ntoplot[1:nbbycl]
+                       ntoplot <- ntoplot[order(ntoplot)]
+                       #ntoplot <- ifelse(length(ntoplot) > nbbycl, ntoplot[1:nbbycl], ntoplot)
+                       lclasses[[classe]] <- ntoplot
+               }
+               sup.keep <- select.chi.classe(chisqtable, nbbycl, active = FALSE)
+               toplot.sup <- chisqtable[debsup:(debet+1),]
+               toplot.sup <- toplot.sup[sup.keep, ]
+               lsup <- list()
+               for (classe in 1:length(sum.cl)) {
+                       ntoplot <- toplot.sup[,classe]
+                       ntoplot <- ntoplot[order(ntoplot, decreasing = TRUE)]
+                       ntoplot <- round(ntoplot, 0)
+                       ntoplot <- ntoplot[1:nbbycl]
+                       ntoplot <- ntoplot[order(ntoplot)]
+                       #ntoplot <- ifelse(length(ntoplot) > nbbycl, ntoplot[1:nbbycl], ntoplot)
+                       lsup[[classe]] <- ntoplot
+               }
+       layout(matrix(c(rep(1,nrow(dd)),c(2:(nrow(dd)+1)),c((nrow(dd)+2):(2*nrow(dd)+1)), c((2*nrow(dd)+2):(3*nrow(dd)+1))),ncol=4), heights=tclp[clod], widths=c(0.05,0.05,0.05, 0.85))
+       }
+       
+    if (! bw) {
+        col <- rainbow(length(sum.cl))[as.numeric(tree$tip.label)]
+        colcloud <- rainbow(length(sum.cl))
+    }
+    par(mar=c(0,0,0,0))
+    label.ori<-tree[[2]]
+    if (!is.null(lab)) {
+        tree$tip.label <- lab
+    } else {
+           tree[[2]]<-paste('classe ',tree[[2]])
+    }
+       plot.phylo(tree,label.offset=0.1,tip.col=col, type=type.dendro)
+       if (do.cloud) {
+           for (i in rev(tree.order)) {
+               par(mar=c(0,0,1,0),cex=0.9)
+               wordcloud(names(lclasses[[i]]), lclasses[[i]], scale = c(max.cex, min.cex), random.order=FALSE, colors = colcloud[i])
+           }
+           for (i in rev(tree.order)) {
+               par(mar=c(0,0,1,0),cex=0.9)
+               wordcloud(names(lsup[[i]]), lsup[[i]], scale = c(max.cex, min.cex), random.order=FALSE, colors = colcloud[i])
+           }
+       }
+
+    for (i in rev(tree.order)) {    
+        par(mar=c(0,0,0,0))
+        lcol <- cut(dd[i,], breaks, include.lowest=TRUE)
+        ulcol <- names(table(lcol))
+        lcol <- as.character(lcol)
+        for (j in 1:length(ulcol)) {
+            lcol[which(lcol==ulcol[j])] <- j
+        }
+        lcol <- as.numeric(lcol)
+    
+        #lcol[which(lcol <= 9)] <- 1
+    
+        mcol <- rainbow(nrow(dd))[i]
+        last.col <- NULL
+        for (k in alphas) {
+            last.col <- c(last.col, rgb(r=col2rgb(mcol)[1]/255, g=col2rgb(mcol)[2]/255, b=col2rgb(mcol)[3]/255, a=k))
+        }
+        print(last.col)
+    
+        barplot(rep(1,ncol(dd)), width=ptc, names.arg=FALSE, axes=FALSE, col=last.col[lcol], border=rgb(r=0, g=0, b=0, a=0.3))
+
+    }
+}
+
+
+filename.to.svg <- function(filename) {
+       filename <- gsub('.png', '.svg', filename)
+       return(filename)
+}
+
+open_file_graph <- function (filename, width=800, height = 800, quality = 100, svg = FALSE) {
+       if (Sys.info()["sysname"] == 'Darwin') {
+        width <- width/74.97
+        height <- height/74.97
+        if (!svg) {
+                   quartz(file = filename, type = 'png', width = width, height = height)
+        } else {
+            svg(filename.to.svg(filename), width=width, height=height)
+        }
+       } else {
+        if (svg) {
+            svg(filename.to.svg(filename), width=width/74.97, height=height/74.97)
+        } else {
+                   png(filename, width=width, height=height)#, quality = quality)
+        }
+       }
+}
+
+
+
+open_file_graph('cl_cloud_dates_gay.png', height=900, width=2500, svg=TRUE)
+plot.dendro.cloud(tree.cut1$tree.cl, classes, chistabletot, from.cmd=TRUE)
+
+dev.off()