fileout.write(txt)
fileout.close()
-def AlcesteTxtProf(DictChdTxtOut, RscriptsPath, clnb, taillecar):
+def ReinertTxtProf(DictChdTxtOut, RscriptsPath, clnb, taillecar):
txt = "clnb<-%i\n" % clnb
txt += """
source("%s")
""" % ffr(self.pathout['tgenspec.csv'])
self.add(txt)
+class TgenProfScript(PrintRScript):
+ def make_script(self):
+ self.sources([self.analyse.ira.RscriptsPath['chdfunct']])
+ txt = """
+ tgen <- read.csv2("%s", row.names = 1, sep = '\\t')
+ """ % ffr(self.parametres['tgeneff'])
+ txt += """
+ res <- build.prof.tgen(tgen)
+ write.table(res$chi2, file = "%s", sep='\\t', col.names = NA)
+ write.table(res$pchi2, file = "%s", sep='\\t', col.names = NA)
+ """ % (ffr(self.pathout['tgenchi2.csv']), ffr(self.pathout['tgenpchi2.csv']))
+ self.add(txt)
+
class FreqMultiScript(PrintRScript):
def make_script(self):
self.sources([self.analyse.parent.RscriptsPath['Rgraph']])
self.idexport = wx.NewId()
self.idexporttropes = wx.NewId()
self.idexportowledge = wx.NewId()
+ self.onmaketgen = wx.NewId()
# self.export_classes = wx.NewId()
self.Bind(wx.EVT_MENU, self.OnPopupOne, id=self.popupID1)
self.Bind(wx.EVT_MENU, self.onexport, id = self.idexport)
self.Bind(wx.EVT_MENU, self.onexporttropes, id = self.idexporttropes)
self.Bind(wx.EVT_MENU, self.onexportowledge, id = self.idexportowledge)
+ self.Bind(wx.EVT_MENU, self.OnMakeTgen, id=self.onmaketgen)
# self.Bind(wx.EVT_MENU, self.on_export_classes, id = self.export_classes)
# self.Bind(wx.EVT_MENU, self.OnPopupThree, id=self.popupID3)
menu_conc.Append(self.popupID2, u"dans les segments de texte de la classe")
menu_conc.Append(self.popupID3, u"dans les segments de texte classés")
menu_conc.Append(self.popupID4, u"dans tous les segments de texte")
- menu.AppendMenu(-1, u"Concordancier", menu_conc)
+ menu.AppendMenu(-1, u"Concordancier", menu_conc)
+ menu.Append(self.onmaketgen, _(u"Make Tgen").decode('utf8'))
menu_cnrtl = wx.Menu()
menu_cnrtl.Append(self.popupID5, u"Définition")
menu_cnrtl.Append(self.popupID6, u"Etymologie")
menu.Append(self.pop2, u"Chi2 par classe")
menu.Append(self.pop3, u"Chi2 modalités de la variable")
menu.AppendSeparator()
- menu.Append(self.pop1, u"Graph de la classe")
+ menu.Append(self.pop1, u"Graphe de la classe")
self.PopupMenu(menu)
menu.Destroy()
#win.html = '<html>\n' + '<br>'.join([' : '.join([str(val) for val in forme]) for forme in rep]) + '\n</html>'
#win.HtmlPage.SetPage(win.html)
win.Show(True)
+
+ def OnMakeTgen(self, evt):
+ self.parent.tree.OnTgenEditor(self.getselectedwords())
class wliste(wx.Frame):
mat
}
+build.prof.tgen <- function(x) {
+ nbst <- sum(x[nrow(x),])
+ totcl <- x[nrow(x),]
+ tottgen <- rowSums(x)
+ nbtgen <- nrow(x) - 1
+ chi2 <- x[1:(nrow(x)-1),]
+ pchi2 <- chi2
+ for (classe in 1:ncol(x)) {
+ for (tg in 1:nbtgen) {
+ cont <- c(x[tg, classe], tottgen[tg] - x[tg, classe], totcl[classe] - x[tg, classe], (nbst - totcl[classe]) - (tottgen[tg] - x[tg, classe]))
+ cont <- matrix(unlist(cont), nrow=2)
+ chiresult<-chisq.test(cont,correct=FALSE)
+ if (is.na(chiresult$p.value)) {
+ chiresult$p.value<-1
+ chiresult$statistic<-0
+ }
+ if (chiresult$expected[1,1] > cont[1,1]) {
+ chiresult$statistic <- chiresult$statistic * -1
+ }
+ chi2[tg,classe] <- chiresult$statistic
+ pchi2[tg,classe] <- chiresult$p.value
+ }
+ }
+ res <- list(chi2 = chi2, pchi2 = pchi2)
+}
+
BuildProf<- function(x,dataclasse,clusternb,lim=2) {
####
#r.names<-rownames(x)
author = Pierre Ratinaud
gpl-fr = gpl-2.0-fr.txt
dev = Pierre Ratinaud (Université de Toulouse - Laboratoire LERASS - ratinaud@univ-tlse2.fr);Sébastien Déjean (Université de Toulouse);David Skalinder (Mash Strategy - davids@mash.uk.com);
-version = 0.6 alpha 10
+version = 0.6 alpha 11
licence = GNU GPL (v2)
-version_nb = 0.6.a10
\ No newline at end of file
+version_nb = 0.6.a11
\ No newline at end of file
guilanguage=french
R_mem = false
R_max_mem = 1535
-version_nb = 0.6.a10
+version_nb = 0.6.a11
rlibs = false
libsvdc = false
libsvdc_path = /usr/bin/svd
-rmirror = http://cran.rstudio.com/
+rmirror = http://cran.rstudio.com/
\ No newline at end of file
query = 'SELECT uces FROM uces where id IN (%s) ORDER BY id' % formesid
res = self.cformes.execute(query)
return list(set(list(itertools.chain(*[[int(val) for val in row[0].split()] if not isinstance(row[0], int) else [row[0]] for row in res]))))
+
+ def gettgentxt(self, tgen):
+ sts = self.gettgenst(tgen)
+ return list(set([self.getucefromid(val).uci for val in sts]))
def getlemucis(self, lem) :
uces = self.getlemuces(lem)
tgenoccurrences[t][etoiles[i]] += sum([lemuceeff[uce] for uce in concern])
return tgenoccurrences, totoccurrences
+ def make_tgen_profile(self, tgen, ucecl, uci = False) :
+ log.info('tgen/classes')
+ if uci :
+ tab = [[lem] + [len(set(self.gettgentxt(tgen[lem])).intersection(classe)) for classe in ucecl] for lem in tgen]
+ else :
+ tab = [[lem] + [len(set(self.gettgenst(tgen[lem])).intersection(classe)) for classe in ucecl] for lem in tgen]
+ tab = [[line[0]] + [val for val in line[1:]] for line in tab if sum(line[1:]) >= 3]
+ return tab
+ #i = 0
+ #nam = 'total'
+ #while nam + `i` in tgen :
+ # i += 1
+ #nam = nam + `i`
+ #last = [nam] + [`len(classe)` for classe in ucecl]
+ #tab += [last]
+ #line0 = ['tgen'] + ['_'.join(['cluster', `i+1`]) for i in range(len(ucecl))]
+ #tab = [line0] + tab
+ #with open(fileout, 'w') as f :
+ # f.write('\n'.join(['\t'.join(line) for line in tab]).encode(self.parametres['syscoding']))
+
def make_efftype_from_etoiles(self, etoiles) :
dtype = {}
etuces = [[] for et in etoiles]
class SearchDial ( wx.Frame ):
def __init__( self, parent, listctrl, col, shown):
- wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = wx.EmptyString, pos = wx.DefaultPosition, size = wx.DefaultSize, style = wx.DEFAULT_FRAME_STYLE )
+ wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = wx.EmptyString, pos = wx.DefaultPosition, size = wx.DefaultSize, style = wx.DEFAULT_FRAME_STYLE | wx.STAY_ON_TOP )
self.parent = parent
self.listctrl = listctrl
self.col = col
return path
while '\\\\' in path :
path = path.replace('\\\\', '\\')
- if sys.platform == 'win32' and path.startswith('\\') and not path.startswith('\\\\') :
+ if path.startswith('\\') and not path.startswith('\\\\') :
path = '\\' + path
return path
from tabrsimple import InputText
from tabverges import Prototypical
from tabsplitvar import SplitMatrixFromVar
-from textdist import AnalysePam
+#from textdist import AnalysePam
from textstat import Stat
from textaslexico import Lexico
from textsimi import SimiTxt, SimiFromCluster
# f.write('')
self.history = History(os.path.join(UserConfigPath, 'history.db'))
self.tree = LeftTree(self)
- self._mgr.AddPane(self.tree, aui.AuiPaneInfo().Name("lefttree").Caption(_(u"Navigator").decode('utf8')).
+ self._mgr.AddPane(self.tree, aui.AuiPaneInfo().Name("lefttree").Caption(_(u"Historic").decode('utf8')).
Left().MinSize(wx.Size(200,500)).Layer(1).Position(1).CloseButton(False).MaximizeButton(True).
MinimizeButton(True))
panel.TabChdSim.AddPage(self.prof_seg_nb, _(u"Repeated segments profiles").decode('utf8'))
# panel.Bind(wx.EVT_BUTTON, self.ongetrapport, id = self.ID_rapport)
+ if os.path.exists(os.path.join(self.parametres['pathout'], 'tgenchi2.csv')) :
+ self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenchi2.csv')
+ TgenLayout(panel)
+ panel.TabChdSim.SetSelection(0)
self.parent.nb.AddPage(panel, _(u"Clustering").decode('utf8') + ' - %s' % corpname)
self.parent.ShowTab(True)
self.parent.nb.SetSelection(self.parent.nb.GetPageCount() - 1)
tgen.read()
tgentab = False
gparent = None
+ if 'TabChdSim' in dir(page) :
+ page = page.TabChdSim
for i in range(page.GetPageCount()) :
tab = page.GetPage(i)
if 'gparent' in dir(tab) :
if tab.tgen :
tgentab = tab
break
+
if tgentab :
self.page.tgentab.RefreshData(self.page.tgens)
self.page.tgentab.tgens = tgen.tgen
- self.page.SetSelection(i)
+ page.SetSelection(i)
else :
self.page.tgentab = ListForSpec(ira, gparent, self.page.tgens, etoiles[1:])
self.page.tgentab.tgen = True
self.page.tgentab.tgens = tgen.tgen
- self.page.AddPage(self.page.tgentab, u'Tgens Specificities')
- self.page.SetSelection(self.page.GetPageCount() - 1)
+ page.AddPage(self.page.tgentab, u'Tgens Specificities')
+ page.SetSelection(page.GetPageCount() - 1)
class dolexlayout :
def __init__(self, ira, corpus, parametres):
menu.Append(self.popupID3, u"Graphique")
menu_stcaract = wx.Menu()
self.menuid = {}
- for i, et in enumerate(self.etoiles) :
- nid = wx.NewId()
- self.menuid[nid] = i
- menu_stcaract.Append(nid, et)
- self.Bind(wx.EVT_MENU, self.onstcaract, id = nid)
- menu.AppendMenu(-1, u"Segments de texte caractéristiques", menu_stcaract)
- #menu.Append(self.popup_Tgen_glob, "Tgen global")
if not self.tgen :
- menu.Append(self.onmaketgen, "Make Tgen")
+ for i, et in enumerate(self.etoiles) :
+ nid = wx.NewId()
+ self.menuid[nid] = i
+ menu_stcaract.Append(nid, et)
+ self.Bind(wx.EVT_MENU, self.onstcaract, id = nid)
+ menu.AppendMenu(-1, u"Segments de texte caractéristiques", menu_stcaract)
+ menu.Append(self.onmaketgen, _(u"Make Tgen").decode('utf8'))
self.PopupMenu(menu)
menu.Destroy()
import sys
from functions import print_liste, exec_rcode, CreateIraFile, progressbar, check_Rresult, BugDialog
from layout import PrintRapport
-from PrintRScript import AlcesteTxtProf, RPamTxt
+from PrintRScript import ReinertTxtProf, RPamTxt
from openanalyse import OpenAnalyse
from time import time, sleep
from time import time
from analysetxt import AnalyseText
from OptionAlceste import OptionAlc
-from PrintRScript import RchdTxt, AlcesteTxtProf
+from PrintRScript import RchdTxt, ReinertTxtProf, TgenProfScript
from layout import PrintRapport
-from chemins import ChdTxtPathOut
-from functions import DoConf, print_liste
+from chemins import ChdTxtPathOut, PathOut
+from functions import DoConf, print_liste, TGen
class Reinert(AnalyseText) :
return self.pathout['Rchdtxt']
def printRscript2(self) :
- AlcesteTxtProf(self.pathout, self.parent.RscriptsPath, self.clnb, 0.9)
+ ReinertTxtProf(self.pathout, self.parent.RscriptsPath, self.clnb, 0.9)
return self.pathout['RTxtProfGraph']
def print_graph_files(self) :
chd_graph_list.append([os.path.basename(self.pathout['arbre2']), u'chd2'])
print_liste(self.pathout['liste_graph_afc'], afc_graph_list)
print_liste(self.pathout['liste_graph_chd'], chd_graph_list)
- PrintRapport(self, self.corpus, self.parametres)
\ No newline at end of file
+ PrintRapport(self, self.corpus, self.parametres)
+
+class TgenProf(AnalyseText):
+ def __init__(self, ira, corpus, parametres, cluster_size):
+ self.ira = ira
+ self.corpus = corpus
+ self.parametres = parametres
+ self.pathout = PathOut(dirout = self.parametres['pathout'])
+ self.cluster_size = [len(classe) for classe in corpus.lc]
+ print cluster_size
+ self.doanalyse()
+
+ def doanalyse(self):
+ self.tgen = TGen(path = self.parametres['tgenpath'], encoding = self.ira.syscoding)
+ self.tgen.read(self.tgen.path)
+ #self.parametres['etoiles'].sort()
+ self.parametres['tgeneff'] = os.path.join(self.parametres['pathout'], 'tgeneff.csv')
+ tgenst = self.corpus.make_tgen_profile(self.tgen.tgen, self.corpus.lc)
+ clnames = ['cluster_%03d' % i for i in range(1, len(self.cluster_size) + 1)]
+ et = dict(zip(clnames, self.cluster_size))
+ tgenst = dict([[line[0], dict(zip(clnames, line[1:]))] for line in tgenst])
+ self.tgen.writetable(self.parametres['tgeneff'], tgenst, et)
+ self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenchi2.csv')
+ self.Rscript = TgenProfScript(self)
+ self.Rscript.make_script()
+ self.Rscript.write()
+ self.doR(self.Rscript.scriptout, dlg = False, message = 'R...')
+
+
+
+
+
+
\ No newline at end of file