correction labbé

author pierre <ratinaud@univ-tlse2.fr>

Mon, 20 Jun 2022 16:29:29 +0000 (18:29 +0200)

committer pierre <ratinaud@univ-tlse2.fr>

Mon, 20 Jun 2022 16:29:29 +0000 (18:29 +0200)
author pierre <ratinaud@univ-tlse2.fr>
Mon, 20 Jun 2022 16:29:29 +0000 (18:29 +0200)
committer pierre <ratinaud@univ-tlse2.fr>
Mon, 20 Jun 2022 16:29:29 +0000 (18:29 +0200)
diff --git a/Rscripts/Rgraph.R b/Rscripts/Rgraph.R

index af0b351..8407e31 100644 (file)
--- a/Rscripts/Rgraph.R
+++ b/Rscripts/Rgraph.R
@@ -14,7 +14,7 @@
  #   dev.off()
  #}
  
-PlotDendroCut <- function(chd,filename,reso,clusternb) {   
+PlotDendroCut <- function(chd,filename,reso,clusternb) {
     h.chd <- as.hclust(chd)
     memb <- cutree(h.chd, k = clusternb)
     cent <- NULL
@@ -951,7 +951,7 @@ plot.spec <- function(spec, nb.word = 20) {
             vcex <- norm.vec(word.size[,i], 2, 3)
                 text(-0.9, -0.5, cn[i], cex = 1, adj=0, srt=90, col='black')
             for (j in 1:length(word.size[,i])) {
-               yval <- yval-(strheight(word.to.plot[j,i],cex=vcex[j])+0.02)
+               yval <- yval-(strheight(word.to.plot[j,i],cex=vcex[j])+0.01)
                 text(-0.9, yval, word.to.plot[j,i], cex = vcex[j], col = col, adj=0)
             }
         }
@@ -996,6 +996,7 @@ plot.alceste.graph <- function(rdata,nd=3,layout='fruke', chilim = 2) {
  
  make.simi.afc <- function(x,chitable,lim=0, alpha = 0.1, movie = NULL) {
      library(igraph)
+    library(rgl)
      chimax<-as.matrix(apply(chitable,1,max))
      chimax<-as.matrix(chimax[,1][1:nrow(x)])
      chimax<-cbind(chimax,1:nrow(x))
@@ -1192,19 +1193,20 @@ graphml.to.file <- function(graph.path) {
  graph.to.file <- function(graph.simi, nodesfile = NULL, edgesfile = NULL, community = FALSE, color = NULL, sweight = NULL) {
         require(igraph)
         g <- graph.simi$graph
-    print(graph.simi$eff)
+    #print(graph.simi$eff)
      if (!is.null(graph.simi$eff)) {
             V(g)$weight <- graph.simi$eff
      } else {
          V(g)$weight <- graph.simi$label.cex
      }
-       layout <- layout.norm(graph.simi$layout,-5,5,-5,5,-5,5)
-       print(layout)
+       layout <- layout.norm(graph.simi$layout,-10,10,-10,10,-10,10)
+       #print(layout)
         V(g)$x <- layout[,1]
         V(g)$y <- layout[,2]
         if (ncol(layout) == 3) {
                 V(g)$z <- layout[,3]
         }
+    E(g)$weight <- graph.simi$we.width
         if (community) {
                 member <- graph.simi$communities$membership
                 col <- rainbow(max(member))
@@ -1218,7 +1220,7 @@ graph.to.file <- function(graph.simi, nodesfile = NULL, edgesfile = NULL, commun
                 v.colors <- col2rgb(color)
                 V(g)$r <- v.colors[1,]
                 V(g)$g <- v.colors[2,]
-               V(g)$b <- v.colors[3,]          
+               V(g)$b <- v.colors[3,]
         }
         if (!is.null(sweight)) {
                 V(g)$sweight <- sweight
@@ -1238,6 +1240,7 @@ graph.to.file <- function(graph.simi, nodesfile = NULL, edgesfile = NULL, commun
  graph.to.file2 <- function(graph, layout, nodesfile = NULL, edgesfile = NULL, community = FALSE, color = NULL, sweight = NULL) {
         require(igraph)
         g <- graph
+    layout <- layout.norm(layout,-5,5,-5,5,-5,5)
         V(g)$x <- layout[,1]
         V(g)$y <- layout[,2]
         if (ncol(layout) == 3) {
diff --git a/Rscripts/distance-labbe.R b/Rscripts/distance-labbe.R

index eccb3ae..747332b 100644 (file)
--- a/Rscripts/distance-labbe.R
+++ b/Rscripts/distance-labbe.R
@@ -29,8 +29,10 @@ compute.labbe <- function(x, y, tab) {
          U <- N1/N2
          mini.tab[,2] <- mini.tab[,2] * U
                 col.plusgrand <- mini.tab[,2]
-               cs.plus.grand <- sum(col.plusgrand[col.plusgrand>1])
+               cs.plus.grand <- sum(col.plusgrand[col.plusgrand>=1])
      }
+    #print(U)
+    #print(cs.plus.grand)
      commun <- which((mini.tab[,1] > 0) & (mini.tab[,2] > 0))
      deA <- which((mini.tab[,plus.petit] > 0) & (mini.tab[,plus.grand] == 0))
      deB <- which((mini.tab[,plus.petit] == 0)  & (mini.tab[,plus.grand] >= 1))
@@ -39,9 +41,12 @@ compute.labbe <- function(x, y, tab) {
      dist.deA <- abs(mini.tab[deA, plus.petit] - mini.tab[deA, plus.grand])
      dist.deB <- abs(mini.tab[deB, plus.petit] - mini.tab[deB, plus.grand])
      dist.labbe <- sum(dist.commun) + sum(dist.deA) + sum(dist.deB)
+    #print(cs[plus.petit])
+    #print(dist.labbe)
  
      indice.labbe <- dist.labbe/(cs[plus.petit] + cs.plus.grand)
-    indice.labbe
+    res = list(indice.labbe = indice.labbe, commun=commun, deA=deA, deB=deB, dist.commun=dist.commun, dist.deA=dist.deA, dist.deB=dist.deB)
+       res
  }
  
  #calcul pour distance texte 1 et 2
@@ -54,9 +59,59 @@ dist.labbe <- function(tab) {
         for (i in 1:(ncol(tab)-1)) {
                 for (j in (1+i):ncol(tab)) {
                         #lab <- compute.labbe(i,j,tab)
-                       mat[j,i] <- compute.labbe(i,j,tab)
+                       mat[j,i] <- compute.labbe(i,j,tab)$indice.labbe
                 }
         }
      mat
  }
  
+dist.labbe2 <- function(tab) {
+    distance_from_idxs <- function (idxs) {
+        i1 <- idxs[1]
+        i2 <- idxs[2]
+        compute.labbe(i1, i2, tab)$indice.labbe
+    }
+
+    size <- ncol(tab)
+    d <- apply(utils::combn(size, 2), 2, distance_from_idxs)
+    attr(d, "Size") <- size
+    xnames <- colnames(tab)
+    if (!is.null(xnames)) {
+        attr(d, "Labels") <- xnames
+    }
+    attr(d, "Diag") <- FALSE
+    attr(d, "Upper") <- FALSE
+    class(d) <- "dist"
+    d
+       #mat <- matrix(NA, ncol=ncol(tab), nrow=ncol(tab))
+       #rownames(mat) <- colnames(tab)
+       #colnames(mat) <- colnames(tab)
+       #for (i in 1:(ncol(tab)-1)) {
+       #       for (j in (1+i):ncol(tab)) {
+                       #lab <- compute.labbe(i,j,tab)
+       #               mat[j,i] <- compute.labbe(i,j,tab)$indice.labbe
+       #       }
+       #}
+    #mat
+}
+
+dist.labbe3 <- function(tab) {
+    distance_from_idxs <- function (idxs) {
+        i1 <- tab[,idxs[1]]
+        i2 <- tab[,idxs[2]]
+        labbe(i1, i2)
+    }
+
+    size <- ncol(tab)
+    d <- apply(utils::combn(size, 2), 2, distance_from_idxs)
+    attr(d, "Size") <- size
+    xnames <- colnames(tab)
+    if (!is.null(xnames)) {
+        attr(d, "Labels") <- xnames
+    }
+    attr(d, "Diag") <- FALSE
+    attr(d, "Upper") <- FALSE
+    class(d) <- "dist"
+    d
+
+}
diff --git a/checkinstall.py b/checkinstall.py

index e7dca8a..17d32fd 100644 (file)
--- a/checkinstall.py
+++ b/checkinstall.py
@@ -123,7 +123,7 @@ def FindRPAthWin32():
          for progpath in progpaths :
              rpath = os.path.join(progpath, "R")
              if os.path.exists(rpath) :
-                for maj in range(2,4) :
+                for maj in range(2,6) :
                      for i in range(0,30):
                          for j in range(0,20):
                              for poss in ['', 'i386', 'x64'] :
@@ -138,6 +138,8 @@ def FindRPathNix():
          BestPath='/usr/bin/R'
      elif os.path.exists('/usr/local/bin/R'):
          BestPath='/usr/local/bin/R'
+    elif os.path.exists('/Library/Frameworks/R.framework/Resources/bin/R') :
+        BestPath='/Library/Frameworks/R.framework/Resources/bin/R'
      return BestPath
  
  def RLibsAreInstalled(self) :
@@ -148,7 +150,8 @@ def RLibsAreInstalled(self) :
          return True
  
  def CheckRPackages(self):
-    listdep = ['ca', 'rgl', 'gee', 'ape', 'igraph','proxy', 'wordcloud', 'irlba', 'textometry']
+    listdep = ['ca', 'rgl', 'gee', 'ape', 'igraph','proxy', 'wordcloud',
+               'irlba', 'textometry', 'intergraph', 'sna', 'network']
      nolib = []
      i=0
      dlg = wx.ProgressDialog("Test des librairies de R", "test en cours...", maximum = len(listdep), parent=self, style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT)
@@ -174,12 +177,12 @@ def CheckRPackages(self):
  %s
  
  Sans ces bibliothèques, IRamuteq ne fonctionnera pas.
-    
+
  - Vous pouvez installer ces bibliothèques manuellement :
          Cliquez sur Annuler
          Lancez R
          Tapez install.packages('nom de la bibiothèque')
-        
+
  - ou laisser IRamuteq les installer automatiquement en cliquant sur VALIDER .
          Les bibliothèques seront téléchargées depuis le site miroir de R %s.
          """ % (txt, self.pref.get('iramuteq','rmirror'))
diff --git a/functions.py b/functions.py

index 8c0c66c..6a3d968 100755 (executable)
--- a/functions.py
+++ b/functions.py
@@ -114,6 +114,11 @@ class History :
          self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
          self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
          d.close()
+        d = {}
+        d['history'] = self.history
+        d['matrix'] = self.matrix
+#        with open('/home/pierre/hystory.json', 'w') as f :
+#            f.write(json.dumps(d, indent=4, default=str))
  
      def write(self) :
          d = shelve.open(self.filein)
@@ -805,7 +810,7 @@ def treat_var_mod(variables) :
  #                     var_mod[var].append(variable)
      return var_mod
  
-def doconcorde(corpus, uces, mots, uci = False) :
+def doconcorde(corpus, uces, mots, uci = False, et = False) :
      if not uci :
          ucestxt1 = [row for row in corpus.getconcorde(uces)]
      else :
@@ -813,8 +818,11 @@ def doconcorde(corpus, uces, mots, uci = False) :
      ucestxt1 = dict(ucestxt1)
      ucestxt = []
      ucis_txt = []
-    listmot = [corpus.getlems()[lem].formes for lem in mots]
-    listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
+    if not et :
+        listmot = [corpus.getlems()[lem].formes for lem in mots]
+        listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
+    else :
+        listmot = mots
      mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
      dmots = dict(zip(listmot, mothtml))
      for uce in uces :
@@ -877,13 +885,12 @@ def gettranslation(words, lf, lt) :
      .NET CLR 3.0.04506.30\
      )"}
      base_link = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=%s&tl=%s&dt=t&q=%s"
-    print len(words)
      totrans = urllib2.quote('\n'.join(words).encode('utf8'))
      link = base_link % (lf, lt, totrans)
      request = urllib2.Request(link, headers=agent)
      raw_data = urllib2.urlopen(request).read()
      data = json.loads(raw_data)
-    return [line[0].decode('utf8').replace(u"'", u'_').replace(u' | ', u'|').replace(u' ', u'_').replace(u'-',u'_').replace(u'\n','') for line in data[0]]
+    return [line[0].decode('utf8', errors='replace').replace(u"'", u'_').replace(u' | ', u'|').replace(u' ', u'_').replace(u'-',u'_').replace(u'\n','') for line in data[0]]
  
  def makenprof(prof, trans, deb=0) :
      nprof=[]
@@ -901,7 +908,7 @@ def treatempty(val) :
      else :
          return val
  
-def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) :
+def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 20) :
      nprof = {}
      lems = {}
      for i in range(len(dictprofile)) :
@@ -943,7 +950,7 @@ def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) :
                  nlensup = maxword
              else :
                  nlensup = lensup
-            supori = [line[6] for line in prof[(1+lenact):(lenact+nlensup)]]
+            supori = [line[6] for line in prof[(1+lenact):(lenact+nlensup+1)]]
              sup = [val.replace(u'_', u' ') for val in supori]
              sup = [treatempty(val) for val in sup]
              sup = gettranslation(sup, lf, lt)
diff --git a/iramuteq.py b/iramuteq.py

index 501b0f6..8531535 100755 (executable)
--- a/iramuteq.py
+++ b/iramuteq.py
@@ -1,8 +1,9 @@
  #!/bin/env python
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
-#Copyright (c) 2008-2016, Pierre Ratinaud
+#Copyright (c) 2008-2020, Pierre Ratinaud
  #License: GNU GPL
+#test
  
  from optparse import OptionParser
  
@@ -13,6 +14,7 @@ parser.add_option("-f", "--file", dest="filename",
  
  import sys
  reload(sys)
+#sys.path.insert(0, '/usr/lib/python2.7/dist-packages/wxPython-4.0.1-py2.7-linux-amd64.egg')
  import locale
  import tempfile
  import codecs
@@ -28,6 +30,7 @@ import wx.lib.agw.aui as aui
  import wx.html
  import wx.grid
  import wx.lib.hyperlink as hl
+#import wx.lib.agw.hyperlink as hl
  #------------------------------------
  from functions import BugReport, PlaySound, History, progressbar
  from checkversion import NewVersion
@@ -51,7 +54,7 @@ from textaslexico import Lexico
  from textlabbe import DistLabbe
  from textsimi import SimiTxt, SimiFromCluster
  from textwordcloud import WordCloud, ClusterCloud
-from textreinert import Reinert
+from textreinert import Reinert#, ReDoReinert
  #from textcheckcorpus import checkcorpus
  from openanalyse import OpenAnalyse
  from corpus import Builder, SubBuilder, MergeClusters
@@ -1031,6 +1034,9 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, États-Unis."""
          #FIXME
          AnalyseMerge(self, {'type': 'merge', 'fileout' : '/tmp/test.txt'}, dlg = 5)
  
+    def OnEliminate(self, corpus, parametres) :
+        print parametres
+
      def OnMergeClusters(self, evt) :
          builder = MergeClusters(self, {})
          if builder.res == wx.ID_OK :
diff --git a/layout.py b/layout.py

index 4ea02da..80ca8b5 100644 (file)
--- a/layout.py
+++ b/layout.py
@@ -767,14 +767,20 @@ class dolexlayout :
          self.TabStat = aui.AuiNotebook(ira.nb, -1, wx.DefaultPosition)
          self.TabStat.parametres = parametres
          self.ListPan = ListForSpec(ira, self, self.DictSpec, self.etoiles)
+        self.ListPan.pathout = self.pathout
          if os.path.exists(self.pathout['banalites.csv']) :
              self.listban = ListForSpec(ira, self, self.dictban, ['eff'] + self.etoiles, usefirst = True)
          #self.ListPan2 = ListForSpec(sash.rightwin1, self, self.DictSpec, first)
          self.ListPant = ListForSpec(ira, self, self.DictType, self.etoiles)
+        self.ListPant.pathout = self.pathout
          self.ListPanEff = ListForSpec(ira, self, self.DictEff, self.etoiles)
+        self.ListPanEff.pathout = self.pathout
          self.ListPanEffType = ListForSpec(ira, self, self.DictEffType, self.etoiles)
+        self.ListPanEffType.pathout = self.pathout
          self.ListPanEffRelForme = ListForSpec(ira, self, self.DictEffRelForme, self.etoiles)
+        self.ListPanEffRelForme.pathout = self.pathout
          self.ListPanEffRelType = ListForSpec(ira, self, self.DictEffRelType, self.etoiles)
+        self.ListPanEffRelType.pathout = self.pathout
  
          self.TabStat.AddPage(self.ListPan, _(u'Forms').decode('utf8'))
          if os.path.exists(self.pathout['banalites.csv']) :
@@ -799,6 +805,7 @@ class dolexlayout :
  
          self.TabStat.corpus = self.corpus
          self.TabStat.etoiles = self.etoiles
+        self.TabStat.pathout = self.pathout
          if os.path.exists(os.path.join(self.parametres['pathout'], 'tgenspec.csv')) :
              self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenspec.csv')
              TgenLayout(self.TabStat)
@@ -822,6 +829,8 @@ class StatLayout:
  
          #self.TabStatTot = wx.TextCtrl(self.TabStat, -1, style=wx.NO_BORDER | wx.TE_MULTILINE | wx.TE_RICH2)
          list_graph = [['zipf.png', 'zipf']]
+        if os.path.exists(self.pathout['segments_size.png']) :
+            list_graph.append(['segments_size.png', _(u'Taille des segments')])
          self.TabStatTot = GraphPanel(ira.nb, self.pathout, list_graph, self.result['glob'])
          self.TabStat.AddPage(self.TabStatTot, _(u'Abstract').decode('utf8'))
  
@@ -856,6 +865,7 @@ class GraphPanelDendro(wx.Panel):
          self.graphnb = 1
          self.dictpathout = dico
          self.dirout = os.path.dirname(self.dictpathout['ira'])
+        self.pathout = self.dictpathout
          self.list_graph = list_graph
          self.parent = self.GetParent()#parent
          self.SetFont(wx.Font(10, wx.DEFAULT, wx.NORMAL, wx.NORMAL, 0, "Arial"))
diff --git a/textstat.py b/textstat.py

index 7fdf5bd..f0a92fd 100755 (executable)
--- a/textstat.py
+++ b/textstat.py
@@ -84,7 +84,7 @@ class Stat(AnalyseText) :
          open_file_graph("%s", width = 400, height = 400)
          barplot(table(stsize[,1]))
          dev.off()
-        """ % (self.pathout['stsize.csv'], self.pathout['segments_size.png'])
+        """ % (ffr(self.pathout['stsize.csv']), ffr(self.pathout['segments_size.png']))
          tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR)
          with open(tmpscript, 'w') as f :
              f.write(txt)
author	pierre <ratinaud@univ-tlse2.fr>
	Mon, 20 Jun 2022 16:29:29 +0000 (18:29 +0200)
committer	pierre <ratinaud@univ-tlse2.fr>
	Mon, 20 Jun 2022 16:29:29 +0000 (18:29 +0200)
Rscripts/Rgraph.R		patch \| blob \| history
Rscripts/distance-labbe.R		patch \| blob \| history
checkinstall.py		patch \| blob \| history
functions.py		patch \| blob \| history
iramuteq.py		patch \| blob \| history
layout.py		patch \| blob \| history
textstat.py		patch \| blob \| history