modif chdtxt, a tester, problemes sur double sur rst

author Pierre <ratinaud@univ-tlse2.fr>

Fri, 25 Jan 2013 10:53:41 +0000 (11:53 +0100)

committer Pierre <ratinaud@univ-tlse2.fr>

Fri, 25 Jan 2013 10:53:41 +0000 (11:53 +0100)
author Pierre <ratinaud@univ-tlse2.fr>
Fri, 25 Jan 2013 10:53:41 +0000 (11:53 +0100)
committer Pierre <ratinaud@univ-tlse2.fr>
Fri, 25 Jan 2013 10:53:41 +0000 (11:53 +0100)
diff --git a/PrintRScript.py b/PrintRScript.py

index 4987b21..d0dc4f0 100644 (file)
--- a/PrintRScript.py
+++ b/PrintRScript.py
@@ -193,11 +193,18 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'sv
          rm(data2)
          """
      txt += """
-    chd.result <- Rchdtxt("%s",mincl=%i,classif_mode=%i, nbt = nbt)
+    classif_mode <- %i
+    mincl <- %i
+    uceout <- "%s"
+    if (classif_mode == 0) {
+        chd.result <- Rchdtxt(uceout, chd1, chd2 = chd2, mincl = mincl,classif_mode = classif_mode, nbt = nbt)
+    } else {
+        chd.result <- Rchdtxt(uceout, chd1, chd2 = chd1, mincl = mincl,classif_mode = classif_mode, nbt = nbt)
+    }
      n1 <- chd.result$n1
      classeuce1 <- chd.result$cuce1
      classeuce2 <- chd.result$cuce2
-    """ % (DicoPath['uce'], mincl, classif_mode)
+    """ % (classif_mode, mincl, DicoPath['uce'])
      
      txt += """
      tree.tot1 <- make_tree_tot(chd1)
@@ -704,13 +711,33 @@ class PrintSimiScript(PrintRScript) :
              cn.path <- "%s"
              selected.col <- "%s"
              """ % (self.pathout['mat01.csv'], self.pathout['actives.csv'], self.pathout['selected.csv'])
+            if 'word' in self.parametres :
+                txt += """
+                word <- TRUE
+                index <- %i + 1
+                """ % self.parametres['word']
+            else :
+                txt += """
+                word <- FALSE
+                """
              txt += """
              dm <-readMM(dm.path)
              cn <- read.table(cn.path, sep='\t', quote='"')
              colnames(dm) <- cn[,1]
-            sel.col <- read.csv2(selected.col)
-            dm <- dm[, sel.col[,1] + 1]
+            sel.col <- read.csv2(selected.col, header = FALSE)
+            sel.col <- sel.col[,1] + 1
+            if (!word) {
+                dm <- dm[, sel.col]
+            } else {
+                forme <- colnames(dm)[index]
+                if (!index %in% sel.col) {
+                    sel.col <- append(sel.col, index)
+                }
+                dm <- dm[, sel.col]
+                index <- which(colnames(dm) == forme)
+            }
              """
+
          else :
              txt += """
              load("%s")
@@ -754,6 +781,16 @@ class PrintSimiScript(PrintRScript) :
              mat[is.na(mat)] <- 0
              mat[is.infinite(mat)] <- 0
              """
+        if 'word' in self.parametres and not self.parametres['keep_coord'] :
+            txt += """
+            mat <- graph.word(mat, index)
+            cs <- colSums(mat)
+            if (length(cs)) mat <- mat[,-which(cs==0)]
+            rs <- rowSums(mat)
+            if (length(rs)) mat <- mat[-which(rs==0),]
+            if (length(cs)) dm <- dm[, -which(cs==0)]
+            """
+
          if self.parametres['layout'] == 0 : layout = 'random'
          if self.parametres['layout'] == 1 : layout = 'circle'
          if self.parametres['layout'] == 2 : layout = 'frutch'
@@ -943,18 +980,15 @@ class PrintSimiScript(PrintRScript) :
                  vertex.size <- NULL
                  """
          else :
-            #FIXME
-            tmpchi = False
-            if tmpchi :
+            if self.parametres['type'] == 'clustersimitxt' : 
                  txt += """
                  lchi <- read.table("%s")
                  lchi <- lchi[,1]
-                """ % ffr(tmpchi)
-                if 'selected_col' in dir(self.tableau) :
-                    txt += """
-                    lchi <- lchi[c%s+1]
-                    """ % datas
-            if tmpchi and self.parametres.get('cexfromchi', False) :
+                """ % ffr(self.parametres['tmpchi'])
+                txt += """
+                    lchi <- lchi[sel.col]
+                    """
+            if self.parametres['type'] == 'clustersimitxt' and self.parametres.get('cexfromchi', False) :
                  txt += """ 
                  label.cex <- norm.vec(lchi, vcexminmax[1], vcexminmax[2])
                  """
@@ -966,7 +1000,7 @@ class PrintSimiScript(PrintRScript) :
                  label.cex <- graph.simi$label.cex
              }
              """
-            if tmpchi and self.parametres.get('sfromchi', False) :
+            if self.parametres['type'] == 'clustersimitxt' and self.parametres.get('sfromchi', False) :
                  txt += """ 
                  vertex.size <- norm.vec(lchi, minmaxeff[1], minmaxeff[2])
                  """
diff --git a/ProfList.py b/ProfList.py

index 600dbcf..e2a7c84 100644 (file)
--- a/ProfList.py
+++ b/ProfList.py
@@ -74,6 +74,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
              self.la = []
              self.lchi = []
              self.lfreq = []
+        self.tmpchi = None
              
          #adding some art
          self.il = wx.ImageList(16, 16)
@@ -406,7 +407,7 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
      def quest_simi(self, evt) :
          tableau = self.Source.tableau
          tab = tableau.make_table_from_classe(self.cl, self.la)
-        pathout = ConstructPathOut(self.Source.pathout+'/', 'simi_classe_%i' %self.cl)
+        pathout = ConstructPathOut(os.path.join(self.Source.pathout, 'simi_classe_%i' %self.cl))
          self.filename = os.path.join(pathout,'mat01.csv')
          tableau.printtable(self.filename, tab)
          del tab
@@ -459,56 +460,32 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
  
      def onwordgraph(self, evt):
          word = self.getColumnText(self.GetFirstSelected(), 6)
-        dlg = progressbar(self, 2)
-        corpus = self.Source.corpus
-        uces = corpus.lc[self.cl-1]
-        dlg.Update(1, u'Tableau...')
-        #tab = corpus.make_table_with_classe(uces, self.la)
-        pathout = ConstructPathOut(self.Source.pathout.dirout + '/' , 'simi_%s' % word)
-        self.filename = os.path.join(pathout,'mat01.csv')
-        dlg.Update(2, u'Ecriture...')
-        #corpus.write_tab(tab, self.filename)
-        #del tab
-        corpus.make_and_write_sparse_matrix_from_classe(self.la, uces, self.filename)
-        dlg.Destroy()
-        paramsimi = {'coeff' : 0,
-                          'layout' : 2,
-                          'type' : 1,
-                          'arbremax' : 0,
-                          'coeff_tv' : 1,
-                          'coeff_tv_nb' : 0,
-                          'tvprop' : 0,
-                          'tvmin' : 5,
-                          'tvmax' : 30,
-                          'coeff_te' : 1,
-                          'coeff_temin' : 1,
-                          'coeff_temax' : 10,
-                          'label_v': 1,
-                          'label_e': 0,
-                          'vcex' : 1,
-                          'vcexmin' : 10,
-                          'vcexmax' : 25, 
-                          'cex' : 10,
-                          'seuil_ok' : 1,
-                          'seuil' : 1,
-                          'cols' : (255,0,0),
-                          'cola' : (200,200,200),
-                          'width' : 600,
-                          'height' : 600,
-                          'first' : True,
-                          'keep_coord' : True,
-                          'alpha' : 20,
-                          'film': False,
-                          }
-        self.tableau = Tableau(self.parent, '')
-        self.tableau.listactives = self.la
-        self.tableau.actives = {}
-        for i, val in enumerate(self.la) :
-            self.tableau.actives[val] = [self.lfreq[i]]
-        DoSimi(self, param = paramsimi, fromprof = ffr(self.filename), pathout = pathout, wordgraph = word)
+        if self.tmpchi is None :
+            self.tmpchi = tempfile.mktemp(dir=self.Source.parent.TEMPDIR)
+            with open(self.tmpchi, 'w') as f:
+                f.write('\n'.join([str(val) for val in self.lchi]))
+        index = self.la.index(word)
+        parametres = {'type' : 'clustersimitxt', 
+                        'pathout' : self.Source.parametres['pathout'],
+                        'word' : index ,
+                        'lem' : self.Source.parametres['lem'],
+                        'tmpchi' : self.tmpchi}
+        #try :
+        self.parent.SimiFromCluster(self.parent, self.Source.corpus, self.la, self.cl - 1, parametres = parametres, dlg = progressbar(self, 4))
+        #except :
+        #    print 'not acitve'
  
      def on_graph(self, evt):
-        self.parent.SimiFromCluster(self.parent, self.Source.corpus, self.la, self.cl - 1, parametres = {'type' : 'clustersimitxt', 'pathout' : self.Source.parametres['pathout']}, dlg = progressbar(self, 4))
+        if self.tmpchi is None :
+            self.tmpchi = tempfile.mktemp(dir=self.Source.parent.TEMPDIR)
+            with open(self.tmpchi, 'w') as f:
+                f.write('\n'.join([str(val) for val in self.lchi]))
+        parametres = {'type' : 'clustersimitxt', 
+                        'pathout' : self.Source.parametres['pathout'],
+                        'lem' : self.Source.parametres['lem'],
+                        'tmpchi' : self.tmpchi}
+
+        self.parent.SimiFromCluster(self.parent, self.Source.corpus, self.la, self.cl - 1, parametres = parametres, dlg = progressbar(self, 4))
          #dlg = progressbar(self, 2)
          #corpus = self.Source.corpus
          #uces = corpus.lc[self.cl-1]
diff --git a/Rscripts/CHD.R b/Rscripts/CHD.R

index 53fb813..974e901 100644 (file)
--- a/Rscripts/CHD.R
+++ b/Rscripts/CHD.R
@@ -41,6 +41,10 @@ find.max <- function(dtable, chitable, compte, rmax, maxinter, sc, TT) {
      res
  }  
  
+
+
+
+
  CHD<-function(data.in, x=9, mode.patate = FALSE, svd.method, libsvdc.path=NULL){
  #      sink('/home/pierre/workspace/iramuteq/dev/findchi2.txt')
         dataori <- data.in
@@ -103,7 +107,7 @@ CHD<-function(data.in, x=9, mode.patate = FALSE, svd.method, libsvdc.path=NULL){
          rmax <- NULL
  
          inert <- find.max(dtable, chitable, compte, rmax, maxinter, sc, TT)
-               print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
+        print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
                 pp('max inter phase 1', inert$maxinter/TT)#max(listinter))
                 print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
          ordert <- ordert[order(ordert[,3]),]
diff --git a/Rscripts/afc_graph.R b/Rscripts/afc_graph.R

index d067c16..8a62d8a 100644 (file)
--- a/Rscripts/afc_graph.R
+++ b/Rscripts/afc_graph.R
@@ -132,7 +132,11 @@ if ( qui == 3 ) {
      infp <-  which(is.infinite(maxchi) & maxchi > 0)
      if (length(infp)) {
          maxchi[infp] <- NA
-        valmax <- max(maxchi, na.rm = TRUE)
+        if (!length(infp) == length(maxchi)) {
+            valmax <- max(maxchi, na.rm = TRUE)
+        } else {
+            valmax <- 8
+        }
          maxchi[infp] <- valmax + 2
      } 
      if (cex.txt) {
diff --git a/Rscripts/chdtxt.R b/Rscripts/chdtxt.R

index a0a9cdd..436d0ba 100644 (file)
--- a/Rscripts/chdtxt.R
+++ b/Rscripts/chdtxt.R
@@ -6,39 +6,9 @@
  #fonction pour la double classification
  #cette fonction doit etre splitter en 4 ou 5 fonctions
  
-#Rchdtxt<-function(tableuc1,tableuc2,listeuce1,listeuce2,arbre1,arbre2,uceout) {
-       #source('/home/pierre/workspace/iramuteq/Rscripts/CHD.R')
-
-       #lecture des tableaux
-#      data1<-read.csv2(tableuc1)
-#      data2<-read.csv2(tableuc2)
-
-       #analyse des tableaux avec la fonction CHD qui doit etre sourcee avant
-#      chd1<-CHD(data1)
-#      chd2<-CHD(data2)
-
-       #lecture des uce
-#      listuce1<-read.csv2(listeuce1)
-#      listuce2<-read.csv2(listeuce2)
-
-       #Une fonction pour assigner une classe a chaque UCE
-#AssignClasseToUce<-function(listuce,chd) {
-#    out<-matrix(nrow=nrow(listuce),ncol=ncol(chd))
-#    for (i in 1:nrow(listuce)) {
-#              for (j in 1:ncol(chd)) {
-#                  out[i,j]<-chd[(listuce[i,2]+1),j]
-#              }
-#    }
-#    out
-#}
-
  AssignClasseToUce <- function(listuce, chd) {
      print('assigne classe -> uce')
-    out<-matrix(nrow=nrow(listuce),ncol=ncol(chd))
-    for (j in 1:ncol(chd)) {
-        out[listuce[,1]+1, j] <- chd[listuce[,2]+1, j]
-    }
-    out
+    chd[listuce[,2]+1,]
  }
  
  fille<-function(classe,classeuce) {
@@ -47,8 +17,77 @@ fille<-function(classe,classeuce) {
         listf<-unique(listf)
         listf
  }
+
+
+croiseeff <- function(croise, classeuce1, classeuce2) {
+    cl1 <- 0
+    cl2 <- 1
+    for (i in 1:ncol(classeuce1)) {
+        cl1 <- cl1 + 2
+        cl2 <- cl2 + 2
+        clj1 <- 0
+        clj2 <- 1
+        for (j in 1:ncol(classeuce2)) {
+            clj1 <- clj1 + 2
+            clj2 <- clj2 + 2
+            croise[cl1 - 1, clj1 -1] <- length(which(classeuce1[,i] == cl1 & classeuce2[,j] == clj1))
+            croise[cl1 - 1, clj2 -1] <- length(which(classeuce1[,i] == cl1 & classeuce2[,j] == clj2))
+            croise[cl2 - 1, clj1 -1] <- length(which(classeuce1[,i] == cl2 & classeuce2[,j] == clj1))
+            croise[cl2 - 1, clj2 -1] <- length(which(classeuce1[,i] == cl2 & classeuce2[,j] == clj2))
+        }
+    }
+    croise
+}
+
+addallfille <- function(lf) {
+    nlf <- list()
+    for (i in 1:length(lf)) {
+        if (! is.null(lf[[i]])) {
+            nlf[[i]] <- lf[[i]]
+            filles <- lf[[i]]
+            f1 <- filles[1]
+            f2 <- filles[2]
+            if (f1 > length(lf)) {
+                for (j in (length(lf) + 1):f2) {
+                    nlf[[j]] <- 0
+                }
+            }
+        } else {
+            nlf[[i]] <- 0
+        }
+    }
+nlf
+}
+
+getfille <- function(nlf, classe, pf) {
+    if (length(nlf[[classe]]) == 1) {
+        return(pf)
+    } else {
+        pf <- c(pf, nlf[[classe]])
+        c1 <- nlf[[classe]][1]
+        c2 <- nlf[[classe]][2]
+        pf <- getfille(nlf, c1, pf)
+        pf <- getfille(nlf, c2, pf)
+    }
+    return(pf)
+}
+
+getmere <- function(list_mere, classe) {
+    i <- classe
+    pf <- NULL
+    while (i != 1 ) {
+        pf <- c(pf, list_mere[[i]])
+        i <- list_mere[[i]]
+    }
+    pf
+}
+
+getfillemere <- function(list_fille, list_mere, classe) {
+    return(c(getfille(list_fille, classe, NULL), getmere(list_mere, classe)))
+}
+
  #nbt nbcl = nbt+1 tcl=((nbt+1) *2) - 2  n1[,ncol(n1)], nchd1[,ncol(nchd1)]
-Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) {
+Rchdtxt<-function(uceout, chd1, chd2 = NULL, mincl=0, classif_mode=0, nbt = 9) {
         #FIXME: le nombre de classe peut etre inferieur
      nbcl <- nbt + 1
      tcl <- ((nbt+1) * 2) - 2
@@ -62,16 +101,29 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) {
  
         #calcul des poids (effectifs)
  
-       makepoids<-function(classeuce,poids) {
-           for (classes in 2:(tcl + 1)){
-               for (i in 1:ncol(classeuce)) {
-                   if (poids[(classes-1)]<length(classeuce[,i][classeuce[,i]==classes])) {
-                       poids[(classes-1)]<-length(classeuce[,i][classeuce[,i]==classes])
-                   }
-               }
-           }
-           poids
-       }
+    makepoids <- function(classeuce, poids) {
+        cl1 <- 0
+        cl2 <- 1
+        for (i in 1:nbt) {
+            cl1 <- cl1 + 2
+            cl2 <- cl2 + 2
+            poids[cl1 - 1] <- length(which(classeuce[,i] == cl1))
+            poids[cl2 - 1] <- length(which(classeuce[,i] == cl2))
+        }
+        poids
+    }
+
+#      makepoids<-function(classeuce,poids) {
+#          for (classes in 2:(tcl + 1)){
+#                  for (i in 1:ncol(classeuce)) {
+#                      if (poids[(classes-1)]<length(classeuce[,i][classeuce[,i]==classes])) {
+#                          poids[(classes-1)]<-length(classeuce[,i][classeuce[,i]==classes])
+#                      }
+#                  }
+#          }
+#          poids
+#      }
+    print('make poids')
         poids1<-vector(mode='integer',length = tcl)
         poids1<-makepoids(classeuce1,poids1)
         if (classif_mode==0) {
@@ -82,24 +134,29 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) {
         }
      
      print('croisement classif')
-       croise=matrix(ncol=tcl,nrow=tcl)
-       #production du tableau de contingence
-       for (i in 1:ncol(classeuce1)) {
-           #poids[i]<-length(classeuce1[,i][x==classes])
-           for (j in 1:ncol(classeuce2)) {
-               tablecroise<-table(classeuce1[,i],classeuce2[,j])
-               tabcolnames<-as.numeric(colnames(tablecroise))
-               #tabcolnames<-c(tabcolnames[(length(tabcolnames)-1)],tabcolnames[length(tabcolnames)])
-               tabrownames<-as.numeric(rownames(tablecroise))
-               #tabrownames<-c(tabrownames[(length(tabrownames)-1)],tabrownames[length(tabrownames)])
-                   for (k in (ncol(tablecroise)-1):ncol(tablecroise)) {
-                       for (l in (nrow(tablecroise)-1):nrow(tablecroise)) {
-                           croise[(tabrownames[l]-1),(tabcolnames[k]-1)]<-tablecroise[l,k]
-                       }
-                   }
-           }
-           tablecroise
-       }
+
+#    croise=matrix(ncol=tcl,nrow=tcl)
+#
+#    docroise <- function(croise, classeuce1, classeuce2) {
+#      #production du tableau de contingence
+#      for (i in 1:ncol(classeuce1)) {
+#          #poids[i]<-length(classeuce1[,i][x==classes])
+#          for (j in 1:ncol(classeuce2)) {
+#                  tablecroise<-table(classeuce1[,i],classeuce2[,j])
+#                  tabcolnames<-as.numeric(colnames(tablecroise))
+#                  #tabcolnames<-c(tabcolnames[(length(tabcolnames)-1)],tabcolnames[length(tabcolnames)])
+#                  tabrownames<-as.numeric(rownames(tablecroise))
+#                  #tabrownames<-c(tabrownames[(length(tabrownames)-1)],tabrownames[length(tabrownames)])
+#                  for (k in (ncol(tablecroise)-1):ncol(tablecroise)) {
+#                      for (l in (nrow(tablecroise)-1):nrow(tablecroise)) {
+#                          croise[(tabrownames[l]-1),(tabcolnames[k]-1)]<-tablecroise[l,k]
+#                      }
+#                  }
+#          }
+#      }
+#        croise
+#    }
+    croise <- croiseeff( matrix(ncol=tcl,nrow=tcl), classeuce1, classeuce2)
      if (classif_mode == 0) {ind <- (nbcl * 2)} else {ind <- nbcl}
         if (mincl==0){
                 mincl<-round(nrow(classeuce1)/ind)
@@ -111,73 +168,129 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) {
         #print('table1')
         #print(croise)
         #tableau des chi2 signes
-       chicroise<-croise
-       for (i in 1:nrow(croise)) {
-           for (j in 1:ncol(croise)) {
-                   if (croise[i,j]==0) {
-                       chicroise[i,j]<-0
-                   } else if (croise[i,j]<mincl) { 
-                       chicroise[i,j]<-0
-                   } else {
-                       chitable<-matrix(ncol=2,nrow=2)
-                       chitable[1,1]<-croise[i,j]
-                       chitable[1,2]<-poids1[i]-chitable[1,1]
-                       chitable[2,1]<-poids2[j]-chitable[1,1]
-                       chitable[2,2]<-nrow(classeuce1)-poids2[j]-chitable[1,2]
-                       chitest<-chisq.test(chitable,correct=FALSE)
-                       if ((chitable[1,1]-chitest$expected)<0) {
-                           chicroise[i,j]<--round(chitest$statistic,digits=7)
-                       } else {
-                           chicroise[i,j]<-round(chitest$statistic,digits=7)
-               #print(chitest)
-                       }
-                   }
-           }   
-       }
+    print('croise chi2')
+       #chicroise<-croise
+
+#    nr <- nrow(classeuce1)
+#    newchicroise <- function(croise, mincl, nr, poids1, poids2) {
+#        chicroise <- croise
+#        chicroise[which(croise < mincl)] <- 0
+#        tocompute <- which(chicroise > 0, arr.ind = TRUE)
+#        for (i in 1:nrow(tocompute)) {
+#            chitable <- matrix(ncol=2,nrow=2)
+#            chitable[1,1] <- croise[tocompute[i,1],  tocompute[i,2]]
+#            chitable[1,2] <- poids1[tocompute[i,1]] - chitable[1,1]
+#            chitable[2,1] <- poids2[tocompute[i,2]] - chitable[1,1]
+#            chitable[2,2] <- nr - poids2[tocompute[i,2]] - chitable[1,2]
+#            chitest<-chisq.test(chitable,correct=FALSE)
+#            chicroise[tocompute[i,1],  tocompute[i,2]] <- ifelse(chitable[1,1] > chitest$expected[1,1], round(chitest$statistic,digits=7), -round(chitest$statistic,digits=7))
+#        }
+#        chicroise
+#    }
+#
+        
+
+       dochicroise <- function(croise, mincl) {
+        chicroise <- croise
+        for (i in 1:nrow(croise)) {
+               for (j in 1:ncol(croise)) {
+                   if (croise[i,j]==0) {
+                       chicroise[i,j]<-0
+                   } else if (croise[i,j]<mincl) { 
+                       chicroise[i,j]<-0
+                   } else {
+                       chitable<-matrix(ncol=2,nrow=2)
+                       chitable[1,1]<-croise[i,j]
+                       chitable[1,2]<-poids1[i]-chitable[1,1]
+                       chitable[2,1]<-poids2[j]-chitable[1,1]
+                       chitable[2,2]<-nrow(classeuce1)-poids2[j]-chitable[1,2]
+                       chitest<-chisq.test(chitable,correct=FALSE)
+                       if ((chitable[1,1]-chitest$expected[1,1])<0) {
+                           chicroise[i,j]<--round(chitest$statistic,digits=7)
+                       } else {
+                           chicroise[i,j]<-round(chitest$statistic,digits=7)
+               #print(chitest)
+                       }
+                   }
+               }   
+           }
+        chicroise
+    }
+    chicroise <- dochicroise(croise, mincl)
+    print('fin croise')
         #print(chicroise)
         #determination des chi2 les plus fort
         chicroiseori<-chicroise
-       maxi<-vector()
-       chimax<-vector()
-       for (i in 1:tcl) {
-           maxi[i]<-which.max(chicroise)
-           chimax[i]<-chicroise[maxi[i]]
-           chicroise[maxi[i]]<-0
-       }
-       testpres<-function(x,listcoord) {
-           for (i in 1:length(listcoord)) {
-                   if (x==listcoord[i]) {
-                       return(-1)
-                   } else {
-                       a<-1
-                   }
-           }
-           a
-       }
-       c.len=nrow(chicroise)
-       r.len=ncol(chicroise)
-       listx<-c(0)
-       listy<-c(0)
-       rang<-0
-       cons<-list()
-       #on garde une valeur par ligne / colonne
-       for (i in 1:length(maxi)) {
-       #coordonnées de chi2 max
-           x.co<-ceiling(maxi[i]/c.len)
-           y.co<-maxi[i]-(x.co-1)*c.len
-           a<-testpres(x.co,listx)
-           b<-testpres(y.co,listy)
-           
-           if (a==1) {
-                       if (b==1) {
-                           rang<-rang+1
-                           listx[rang]<-x.co
-                           listy[rang]<-y.co
-                       }
-           }
-           cons[[1]]<-listx
-           cons[[2]]<-listy
-       }
+
+    doxy <- function(chicroise) {
+        listx <- NULL
+        listy <- NULL
+        listxy <- which(chicroise > 3.84, arr.ind = TRUE)
+        #print(listxy)
+        val <- chicroise[which(chicroise > 3.84)]
+        ord <- order(val, decreasing = TRUE)
+        listxy <- listxy[ord,]
+        #for (i in 1:nrow(listxy)) {
+        #    if ((!listxy[,2][i] %in% listx) & (!listxy[,1][i] %in% listy)) {
+        #        listx <- c(listx, listxy[,2][i])
+        #        listy <- c(listy, listxy[,1][i])
+        #    }
+        #}
+        xy <- list(x = listxy[,2], y = listxy[,1])
+        xy
+    }
+    xy <- doxy(chicroise)
+    print(xy)
+    listx <- xy$x
+    listy <- xy$y
+
+#      maxi<-vector()
+#      chimax<-vector()
+#      for (i in 1:tcl) {
+#          maxi[i]<-which.max(chicroise)
+#          chimax[i]<-chicroise[maxi[i]]
+#          chicroise[maxi[i]]<-0
+#      }
+#      testpres<-function(x,listcoord) {
+#          for (i in 1:length(listcoord)) {
+#                  if (x==listcoord[i]) {
+#                      return(-1)
+#                  } else {
+#                      a<-1
+#                  }
+#          }
+#          a
+#      }
+#      c.len=nrow(chicroise)
+#      r.len=ncol(chicroise)
+#      listx<-c(0)
+#      listy<-c(0)
+#      rang<-0
+#      cons<-list()
+#      #on garde une valeur par ligne / colonne
+#      for (i in 1:length(maxi)) {
+#      #coordonnées de chi2 max
+#        #coord <- arrayInd(maxi[i], dim(chicroise))
+#        #x.co <- coord[1,2]
+#        #y.co <- coord[1,1]
+#          x.co<-ceiling(maxi[i]/c.len)
+#          y.co<-maxi[i]-(x.co-1)*c.len
+#        #print(x.co)
+#        #print(y.co)
+#        #print(arrayInd(maxi[i], dim(chicroise)))
+#          a<-testpres(x.co,listx)
+#          b<-testpres(y.co,listy)
+#          
+#          if (a==1) {
+#                      if (b==1) {
+#                          rang<-rang+1
+#                          listx[rang]<-x.co
+#                          listy[rang]<-y.co
+#                      }
+#          }
+#          cons[[1]]<-listx
+#          cons[[2]]<-listy
+#      }
         #pour ecrire les resultats
         for (i in 1:length(listx)) {
             txt<-paste(listx[i]+1,listy[i]+1,sep=' ')
@@ -192,73 +305,172 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) {
             unique(unlist(chd[chd[,classe%/%2]==classe,]))
         }
  
+
+#----------------------------------------------------------------------
+    findbestcoord <- function(classeuce1, classeuce2) {
+        #fillemere1 <- NULL
+        #fillemere2 <- NULL
+
+        #fillemere1 <- unique(classeuce1)
+        #if (classif_mode == 0) {
+        #    fillemere2 <- unique(classeuce2)
+        #} else {
+        #    fillemere2 <- fillemere1
+        #}
+
+        #
+        listcoordok <- list()
+        maxcl <- 0
+        nb <- 0
+        lf1 <- addallfille(chd1$list_fille) 
+        if (classif_mode == 0) {
+            lf2 <- addallfille(chd2$list_fille)
+        } else {
+            lf2 <- lf1
+        }
+        lme1 <- chd1$list_mere
+        if (classif_mode == 0) {
+            lme2 <- chd2$list_mere
+        } else {
+            lme2 <- lme1
+        }
+        for (first in 1:length(listx)) {
+            coordok <- NULL
+            f1 <- NULL
+            f2 <- NULL
+            listxp<-listx
+           listyp<-listy
+            
+           #listxp<-listx[first:length(listx)]
+           #listxp<-c(listxp,listx[1:(first-1)])
+           #listyp<-listy[first:length(listy)]
+           #listyp<-c(listyp,listy[1:(first-1)])
+            listxp <- listxp[order(listx, decreasing = TRUE)]
+            listyp <- listyp[order(listx, decreasing = TRUE)]
+            #listxp<-c(listxp[first:length(listx)], listx[1:(first-1)])
+            #listyp<-c(listyp[first:length(listy)], listy[1:(first-1)])
+            for (i in 1:length(listx)) {
+                if( (!(listxp[i]+1) %in% f1) & (!(listyp[i]+1) %in% f2) ) {
+                    #print(listyp[i]+1)
+                    #print('not in')
+                    #print(f2)
+                    coordok <- rbind(coordok, c(listyp[i] + 1,listxp[i] + 1))
+                    #print(c(listyp[i] + 1,listxp[i] + 1))
+                    un1 <- getfillemere(lf2, chd2$list_mere, listxp[i] + 1)
+                    f1 <- c(f1, un1)
+                    f1 <- c(f1, listxp[i] + 1)
+                    un2 <- getfillemere(lf1, chd1$list_mere, listyp[i] + 1)
+                    f2 <- c(f2, un2)
+                    f2 <- c(f2, listyp[i] + 1)
+                }
+                #print(coordok)
+            }
+            #if (nrow(coordok) > maxcl) {
+                nb <- 1
+            #    listcoordok <- list()
+                listcoordok[[nb]] <- coordok
+            #    maxcl <- nrow(coordok)
+            #} else if (nrow(coordok) == maxcl) {
+                nb <- nb + 1
+            #    listcoordok[[nb]] <- coordok
+            #}
+        }
+        listcoordok <- unique(listcoordok)
+        print(listcoordok)
+        best <- 1
+        if (length(listcoordok) > 1) {
+            maxchi <- 0
+            for (i in 1:length(listcoordok)) {
+                chi <- NULL
+                uce <- NULL
+                for (j in 1:nrow(listcoordok[[i]])) {
+                    chi<-c(chi,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
+                    uce<-c(uce,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
+                }
+               if (maxchi < sum(chi)) {
+                   maxchi <- sum(chi)
+                   suce <- sum(uce)
+                   best <- i
+               }
+            }
+        print(suce/nrow(classeuce1))
+        }
+        listcoordok[[best]]
+    }
+#---------------------------------------------------------------------------------   
         #pour trouver une valeur dans une liste
         #is.element(elem, list)
         #== elem%in%list
-
-       coordok<-NULL
-       trouvecoordok<-function(first) {
-           fillemere1<-NULL
-           fillemere2<-NULL
-           listxp<-listx
-           listyp<-listy
-           listxp<-listx[first:length(listx)]
-           listxp<-c(listxp,listx[1:(first-1)])
-           listyp<-listy[first:length(listy)]
-           listyp<-c(listyp,listy[1:(first-1)])
-           for (i in 1:length(listxp)) {
-               if (!(listxp[i]+1)%in%fillemere1) {
-                       if (!(listyp[i]+1)%in%fillemere2) {
-                           coordok<-rbind(coordok,c(listyp[i]+1,listxp[i]+1))
-                           fillemere1<-c(fillemere1,trouvefillemere(listxp[i]+1,chd2$n1))
-                           fillemere2<-c(fillemere2,trouvefillemere(listyp[i]+1,chd1$n1))
-                       }
-              }
-           }
+    oldfindbestcoord <- function(listx, listy) {
+       coordok<-NULL
+       trouvecoordok<-function(first) {
+           fillemere1<-NULL
+           fillemere2<-NULL
+           listxp<-listx
+           listyp<-listy
+           listxp<-listx[first:length(listx)]
+           listxp<-c(listxp,listx[1:(first-1)])
+           listyp<-listy[first:length(listy)]
+           listyp<-c(listyp,listy[1:(first-1)])
+           for (i in 1:length(listxp)) {
+               if (!(listxp[i]+1)%in%fillemere1) {
+                       if (!(listyp[i]+1)%in%fillemere2) {
+                           coordok<-rbind(coordok,c(listyp[i]+1,listxp[i]+1))
+                           fillemere1<-c(fillemere1,trouvefillemere(listxp[i]+1,chd2$n1))
+                           fillemere2<-c(fillemere2,trouvefillemere(listyp[i]+1,chd1$n1))
+                       }
+              }
+           }
+           coordok
+       }
+    #fonction pour trouver le nombre maximum de classes
+       findmaxclasse<-function(listx,listy) {
+           listcoordok<-list()
+           maxcl<-0
+           nb<-1
+           for (i in 1:length(listy)) {
+                       coordok<-trouvecoordok(i)
+                       if (maxcl <= nrow(coordok)) {
+                           maxcl<-nrow(coordok)
+                           listcoordok[[nb]]<-coordok
+                           nb<-nb+1
+                       }
+           }
+           listcoordok<-unique(listcoordok)
+            print(listcoordok)
+               #si plusieurs ensemble avec le meme nombre de classe, on conserve
+               #la liste avec le plus fort chi2
+           if (length(listcoordok)>1) {
+                   maxchi<-0
+                   best<-NULL
+                   for (i in 1:length(listcoordok)) {
+                       chi<-NULL
+                       uce<-NULL
+                       if (nrow(listcoordok[[i]])==maxcl) {
+                           for (j in 1:nrow(listcoordok[[i]])) {
+                               chi<-c(chi,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
+                               uce<-c(uce,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
+                           }
+                           if (maxchi < sum(chi)) {
+                               maxchi <- sum(chi)
+                               suce <- sum(uce)
+                               best <- i
+                           } 
+                       }
+                   }
+           }
+           print((maxchi/nrow(classeuce1)*100))
+           listcoordok[[best]]
+       }
+        print('cherche max')
+           coordok<-findmaxclasse(listx,listy)
             coordok
-       }
-#fonction pour trouver le nombre maximum de classes
-       findmaxclasse<-function(listx,listy) {
-           listcoordok<-list()
-           maxcl<-0
-           nb<-1
-           for (i in 1:length(listy)) {
-                       coordok<-trouvecoordok(i)
-                       if (maxcl <= nrow(coordok)) {
-                           maxcl<-nrow(coordok)
-                           listcoordok[[nb]]<-coordok
-                           nb<-nb+1
-                       }
-           }
-           listcoordok<-unique(listcoordok)
-               #si plusieurs ensemble avec le meme nombre de classe, on conserve
-               #la liste avec le plus fort chi2
-           if (length(listcoordok)>1) {
-                   maxchi<-0
-                   best<-NULL
-                   for (i in 1:length(listcoordok)) {
-                       chi<-NULL
-                       uce<-NULL
-                       if (nrow(listcoordok[[i]])==maxcl) {
-                           for (j in 1:nrow(listcoordok[[i]])) {
-                               chi<-c(chi,croise[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
-                               uce<-c(uce,chicroiseori[(listcoordok[[i]][j,1]-1),(listcoordok[[i]][j,2]-1)])
-                           }
-                           if (maxchi < sum(chi)) {
-                               maxchi <- sum(chi)
-                               suce <- sum(uce)
-                               best <- i
-                           } 
-                       }
-                   }
-           }
-           print((suce/nrow(classeuce1)*100))
-           listcoordok[[best]]
-       }
+    }
         #findmaxclasse(listx,listy)
         #coordok<-trouvecoordok(1)
-       coordok<-findmaxclasse(listx,listy)
-       print(coordok)
+    #coordok <- oldfindbestcoord(listx, listy)
+    coordok <- findbestcoord(listx, listy)
+
  
         lfilletot<-function(classeuce,x) {
             listfille<-NULL
@@ -267,7 +479,7 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) {
                         listfille
             }
         }
-
+    print('listfille')
         listfille1<-lfilletot(classeuce1,1)
         listfille2<-lfilletot(classeuce2,2)
  
@@ -283,10 +495,11 @@ Rchdtxt<-function(uceout,mincl=0,classif_mode=0, nbt = 9) {
         }
         print('commence assigne new classe')
         nchd1<-Assignclasse(classeuce1,1)
-       if (classif_mode==0)
+       if (classif_mode==0) {
                 nchd2<-Assignclasse(classeuce2,2)
-       else
+       } else {
                 nchd2<-nchd1
+    }
         print('fini assign new classe')
         #croisep<-matrix(ncol=nrow(coordok),nrow=nrow(coordok))
      nchd2[which(nchd1[,ncol(nchd1)]==0),] <- 0
diff --git a/Rscripts/simi.R b/Rscripts/simi.R

index 98bacf7..067eaec 100644 (file)
--- a/Rscripts/simi.R
+++ b/Rscripts/simi.R
@@ -297,4 +297,9 @@ graph.word <- function(mat.simi, index) {
      nm[,index] <- mat.simi[,index]
      nm[index,] <- mat.simi[index,]
      nm
+#    cs <- colSums(nm)
+#    if (cs) nm <- nm[,-which(cs==0)]
+#    rs <- rowSums(nm)
+#    if (rs) nm <- nm[-which(rs==0),]
+#    nm
  }
diff --git a/analysetxt.py b/analysetxt.py

index ff37c98..2811a7e 100644 (file)
--- a/analysetxt.py
+++ b/analysetxt.py
@@ -19,18 +19,19 @@ from time import time
  log = logging.getLogger('iramuteq.analyse')
  
  class AnalyseText :
-    def __init__(self, ira, corpus, parametres = None, dlg = False) :
+    def __init__(self, ira, corpus, parametres = None, dlg = False, lemdial = True) :
          self.corpus = corpus
          self.ira = ira
          self.parent = ira
          self.dlg = dlg
          self.dialok = True
          self.parametres = parametres
+        self.lemdial = lemdial
          self.val = False
          if not 'pathout' in self.parametres :
              self.pathout = PathOut(corpus.parametres['originalpath'], analyse_type = parametres['type'], dirout = corpus.parametres['pathout'])
          else :
-            self.pathout = PathOut(filename = corpus.parametres['originalpath'], dirout = self.parametres['pathout'], analyse_type = self.parametres['name'])
+            self.pathout = PathOut(filename = corpus.parametres['originalpath'], dirout = self.parametres['pathout'], analyse_type = self.parametres['type'])
          self.parametres = self.lemparam()
          if self.parametres is not None :
              self.parametres = self.make_config(parametres)
@@ -77,7 +78,7 @@ class AnalyseText :
          pass
  
      def lemparam(self) :
-        if self.dlg :
+        if self.dlg and self.lemdial:
              dial = StatDialog(self, self.parent)
              dial.CenterOnParent()
              val = dial.ShowModal()
@@ -117,7 +118,7 @@ class AnalyseText :
          log.info('R code...')
          pid = exec_rcode(self.ira.RPath, Rscript, wait = wait)
          while pid.poll() is None :
-            if dlg is not None :
+            if dlg :
                  self.dlg.Pulse(message)
                  sleep(0.2)
              else :
@@ -143,6 +144,7 @@ class Alceste(AnalyseText) :
              self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1'])
          Rscript = self.printRscript()
          self.doR(Rscript, dlg = self.dlg, message = 'CHD...')
+
          self.corpus.make_ucecl_from_R(self.pathout['uce'])
          self.corpus.make_and_write_profile(self.actives, self.corpus.lc, self.pathout['Contout'])
          self.sup, lim = self.corpus.make_actives_nb(self.parametres['max_actives'], 2)
diff --git a/corpus.py b/corpus.py

index d4357cf..90332c1 100644 (file)
--- a/corpus.py
+++ b/corpus.py
@@ -213,13 +213,23 @@ class Corpus :
      def getetoileuces(self) :
          log.info('get uces etoiles')
          etoileuces = {}
+        idpara = 0
          for uci in self.ucis :
-            etoiles = uci.etoiles[1:] + uci.paras
+            etoiles = uci.etoiles[1:]
              for et in etoiles :
                  if et in etoileuces :
                      etoileuces[et] += [uce.ident for uce in uci.uces]
                  else :
                      etoileuces[et] = [uce.ident for uce in uci.uces]
+            if uci.paras != [] :
+                for et in uci.paras :
+                    if et in etoileuces :
+                        etoileuces[et] += [uce.ident for uce in uci.uces if uce.para == idpara]
+                    else :
+                        etoileuces[et] = [uce.ident for uce in uci.uces if uce.para == idpara]
+                    idpara += 1
+            else :
+                idpara += 1
          return etoileuces
  
      def getucefromid(self, uceid) :
@@ -542,7 +552,7 @@ class Corpus :
      def make_etoiles(self) :
          etoiles = set([])
          for uci in self.ucis :
-            etoiles.update(uci.etoiles[1:] + uci.paras)
+            etoiles.update(uci.etoiles[1:])
          return list(etoiles)
  
      def make_etoiles_dict(self) :
diff --git a/iracmd.py b/iracmd.py

index cec02e9..8c000cb 100644 (file)
--- a/iracmd.py
+++ b/iracmd.py
@@ -12,7 +12,7 @@ import locale
  import codecs
  sys.setdefaultencoding(locale.getpreferredencoding())
  from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath
-from functions import ReadLexique, DoConf, History
+from functions import ReadLexique, DoConf, History, ReadDicoAsDico
  from ConfigParser import *
  #######################################
  #from textchdalc import AnalyseAlceste
diff --git a/layout.py b/layout.py

index 2a30f5c..8cfcd96 100644 (file)
--- a/layout.py
+++ b/layout.py
@@ -250,7 +250,7 @@ class GraphPanel(wx.ScrolledWindow):
          self.Dict = dico
          self.txt = txt
          self.parent = parent
-        self.SetFont(wx.Font(10, wx.DEFAULT, wx.NORMAL, wx.NORMAL, 0, "courier"))
+        self.SetFont(wx.Font(10, wx.DEFAULT, wx.NORMAL, wx.FONTWEIGHT_BOLD, 0, "courier"))
          self.labels = []
          self.listimg = []
          self.dirout = os.path.dirname(self.Dict['ira'])
@@ -561,11 +561,10 @@ def PrintRapport(self, corpus, parametres, istxt = True):
  
  
  """ % datetime.datetime.now().ctime()
-    print istxt
      if istxt :
          totocc = corpus.gettotocc()
-        txt += u'nombre d\'uci: %i%s' % (corpus.getucinb(), sep)
-        txt += u'nombre d\'uce: %i%s' % (corpus.getucenb(), sep)
+        txt += u'nombre de textes: %i%s' % (corpus.getucinb(), sep)
+        txt += u'nombre de segments de textes: %i%s' % (corpus.getucenb(), sep)
          txt += u'nombre de formes: %i%s' % (len(corpus.formes), sep)
          txt += u'nombre d\'occurrences: %i%s' % (totocc, sep)
          txt += u'moyenne d\'occurrences par forme: %f%s' % (float(totocc) / float(len(self.corpus.formes)), sep)
@@ -575,10 +574,8 @@ def PrintRapport(self, corpus, parametres, istxt = True):
          txt += u'nombre de formes actives de fréquence >= %i: %i%s' % (parametres['eff_min_forme'], parametres['nbactives'], sep)
          txt += u'moyenne d\'occurrences par uce :%f%s' % (float(totocc) / float(corpus.getucenb()), sep)
          if 'tailleuc1' in parametres :
-            if parametres['classif_mode'] != 0 :
-                txt += u'taille uc1 : %i\n' % parametres['tailleuc1']
-            else:
-                txt += u'taille uc1 / uc2: %i / %i - %i / %i%s' % (parametres['tailleuc1'], parametres['tailleuc2'], parametres['lenuc1'], parametres['lenuc2'], sep)
+            if parametres['classif_mode'] == 0 :
+                txt += u'taille rst1 / rst2: %i / %i - %i / %i%s' % (parametres['tailleuc1'], parametres['tailleuc2'], parametres['lenuc1'], parametres['lenuc2'], sep)
      else :
          self.Ucenb = self.nbind
          txt += u'nombre d\'individus : %i%s' % (self.nbind, sep)
@@ -586,9 +583,9 @@ def PrintRapport(self, corpus, parametres, istxt = True):
      if istxt :
          txt += u'nombre de classes : %i%s' % (parametres['clnb'], sep)
          if parametres['classif_mode'] == 0 or parametres['classif_mode'] == 1 :
-            txt += u'%i uce classées sur %i (%.2f%%)%s' % (sum([len(cl) for cl in corpus.lc]), corpus.getucenb(), (float(sum([len(cl) for cl in corpus.lc])) / float(corpus.getucenb())) * 100, sep)
+            txt += u'%i segments classés sur %i (%.2f%%)%s' % (sum([len(cl) for cl in corpus.lc]), corpus.getucenb(), (float(sum([len(cl) for cl in corpus.lc])) / float(corpus.getucenb())) * 100, sep)
          elif self.parametres['classif_mode'] == 2 :
-            txt += u'%i uci classées sur %i (%.2f%%)%s' % (sum([len(cl) for cl in corpus.lc]), corpus.getucinb(), (float(sum([len(cl) for cl in corpus.lc]))) / float(corpus.getucinb()) * 100, sep)
+            txt += u'%i textes classés sur %i (%.2f%%)%s' % (sum([len(cl) for cl in corpus.lc]), corpus.getucinb(), (float(sum([len(cl) for cl in corpus.lc]))) / float(corpus.getucinb()) * 100, sep)
      else :
          txt += u'%i uce classées sur %i (%.2f%%)%s' % (self.ucecla, self.Ucenb, (float(self.ucecla) / float(self.Ucenb)) * 100, sep)
   
@@ -692,12 +689,13 @@ class GraphPanelDendro(wx.Panel):
          self.dirout = os.path.dirname(self.dictpathout['ira'])
          self.list_graph = list_graph
          self.parent = self.GetParent()#parent
-        self.SetFont(wx.Font(10, wx.DEFAULT, wx.NORMAL, wx.NORMAL, 0, "courier"))
+        self.SetFont(wx.Font(10, wx.DEFAULT, wx.NORMAL, wx.NORMAL, 0, "Arial"))
          self.labels = []
          self.listimg = []
          self.tabchd = self.parent.GetParent()
          self.ira = self.tabchd.GetParent()
          self.panel_1 = wx.ScrolledWindow(self, -1, style=wx.TAB_TRAVERSAL)
+        self.panel_1.SetBackgroundColour('white')
          self.deb = wx.StaticText(self.panel_1, -1, txt)
          dendro_img = wx.Image(os.path.join(self.ira.images_path,'but_dendro.png'), wx.BITMAP_TYPE_ANY).ConvertToBitmap()
          self.butdendro = wx.BitmapButton(self, -1, dendro_img)
diff --git a/textsimi.py b/textsimi.py

index 8650be1..0bebb95 100644 (file)
--- a/textsimi.py
+++ b/textsimi.py
@@ -121,14 +121,12 @@ class SimiTxt(AnalyseText):
          with open(self.pathout['actives.csv'], 'w') as f :
              f.write('\n'.join(self.actives).encode(self.ira.syscoding))
  
-
-
  class SimiFromCluster(SimiTxt) :
      def __init__(self, ira, corpus, actives, numcluster, parametres = None, dlg = False) :
          self.actives = actives
          self.numcluster = numcluster
          parametres['name'] = 'simi_classe_%i' % (numcluster + 1)
-        SimiTxt.__init__(self, ira, corpus, parametres, dlg)
+        SimiTxt.__init__(self, ira, corpus, parametres, dlg, lemdial = False)
      
      def preferences(self) :
          return self.parametres
diff --git a/textstat.py b/textstat.py

index afc9ff1..2618c51 100644 (file)
--- a/textstat.py
+++ b/textstat.py
@@ -81,12 +81,17 @@ class Stat(AnalyseText) :
          txt = """
          source("%s")
          tot <- read.csv2("%s", header = FALSE, row.names = 1)
-        hapax <- read.csv2("%s", header = FALSE, row.names = 1)
-        tot <- rbind(tot, hapax)
+        """ % (self.parent.RscriptsPath['Rgraph'], self.pathout['total.csv'])
+        if len(hapax) :
+            txt += """
+            hapax <- read.csv2("%s", header = FALSE, row.names = 1)
+            tot <- rbind(tot, hapax)
+            """ % self.pathout['hapax.csv']
+        txt += """
          open_file_graph("%s", width = 400, height = 400)
          plot(log(tot[,1]), log = 'x', xlab='log(rangs)', ylab = 'log(frequences)', col = 'red', pch=16)
          dev.off()
-        """ % (self.parent.RscriptsPath['Rgraph'], self.pathout['total.csv'], self.pathout['hapax.csv'], self.pathout['zipf.png'])
+        """ % (self.pathout['zipf.png'])
          tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR)
          with open(tmpscript, 'w') as f :
              f.write(txt)
author	Pierre <ratinaud@univ-tlse2.fr>
	Fri, 25 Jan 2013 10:53:41 +0000 (11:53 +0100)
committer	Pierre <ratinaud@univ-tlse2.fr>
	Fri, 25 Jan 2013 10:53:41 +0000 (11:53 +0100)
PrintRScript.py		patch \| blob \| history
ProfList.py		patch \| blob \| history
Rscripts/CHD.R		patch \| blob \| history
Rscripts/afc_graph.R		patch \| blob \| history
Rscripts/chdtxt.R		patch \| blob \| history
Rscripts/simi.R		patch \| blob \| history
analysetxt.py		patch \| blob \| history
corpus.py		patch \| blob \| history
iracmd.py		patch \| blob \| history
layout.py		patch \| blob \| history
textsimi.py		patch \| blob \| history
textstat.py		patch \| blob \| history