projects
/
iramuteq
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
14ef9ad
)
...
author
pierre
<ratinaud@univ-tlse2.fr>
Sun, 4 Feb 2024 21:58:12 +0000
(22:58 +0100)
committer
pierre
<ratinaud@univ-tlse2.fr>
Sun, 4 Feb 2024 21:58:12 +0000
(22:58 +0100)
PrintRScript.py
patch
|
blob
|
history
corpus.py
patch
|
blob
|
history
diff --git
a/PrintRScript.py
b/PrintRScript.py
index
27c4d27
..
0699b3f
100755
(executable)
--- a/
PrintRScript.py
+++ b/
PrintRScript.py
@@
-1,7
+1,6
@@
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
-#Copyright (c) 2008-2020 Pierre Ratinaud
-#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
+#Copyright (c) 2008-2011 Pierre Ratinaud
#License: GNU/GPL
#------------------------------------
#License: GNU/GPL
#------------------------------------
@@
-1385,6
+1384,17
@@
class LabbeScript(PrintRScript):
tab <- read.csv2("%s", header=TRUE, sep=';', row.names=1)
""" % (ffr(self.pathout['tableafcm.csv']))
txt += """
tab <- read.csv2("%s", header=TRUE, sep=';', row.names=1)
""" % (ffr(self.pathout['tableafcm.csv']))
txt += """
+ cs <- colSums(tab)
+ if (min(cs) == 0) {
+ print('empty columns !!')
+ vide <- which(cs==0)
+ print(vide)
+ tab <- tab[,-vide]
+ }
+ #print('#### RcppIramuteq for C++ Labbe ####')
+ #library(RcppIramuteq)
+ #dist.mat <- labbe(as.matrix(tab))
+ #rownames(dist.mat) <- colnames(tab)
dist.mat <- dist.labbe(tab)
dist.mat <- as.dist(dist.mat, upper=F, diag=F)
write.table(as.matrix(dist.mat), "%s", sep='\t')
dist.mat <- dist.labbe(tab)
dist.mat <- as.dist(dist.mat, upper=F, diag=F)
write.table(as.matrix(dist.mat), "%s", sep='\t')
diff --git
a/corpus.py
b/corpus.py
index
7ab0ebb
..
e6b0bf2
100644
(file)
--- a/
corpus.py
+++ b/
corpus.py
@@
-20,6
+20,9
@@
from operator import itemgetter
from uuid import uuid4
import datetime
from copy import copy
from uuid import uuid4
import datetime
from copy import copy
+#------test spacy------------
+#import spacy
+#nlp = spacy.load("fr_core_news_lg")
#------------------------------------
# import des fichiers du projet
#------------------------------------
# import des fichiers du projet
@@
-1546,6
+1549,7
@@
class BuildSubCorpus(BuildCorpus):
class BuildFromAlceste(BuildCorpus) :
def read_corpus(self, infile) :
class BuildFromAlceste(BuildCorpus) :
def read_corpus(self, infile) :
+
if self.dlg is not None :
self.dlg.Pulse('textes : 0 - segments : 0')
self.limitshow = 0
if self.dlg is not None :
self.dlg.Pulse('textes : 0 - segments : 0')
self.limitshow = 0
@@
-1565,6
+1569,8
@@
class BuildFromAlceste(BuildCorpus) :
if self.testuci(line) :
iduci += 1
if txt != [] :
if self.testuci(line) :
iduci += 1
if txt != [] :
+ #doc = nlp(' '.join(txt))
+ #print([[word, word.pos_, word.lemma_] for word in doc])
iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1)
txt = []
self.corpus.ucis.append(Uci(iduci, line))
iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1)
txt = []
self.corpus.ucis.append(Uci(iduci, line))