+class SubBuilder :
+ def __init__(self, parent, corpus, parametres = None, dlg = None):
+ self.parent = parent
+ self.ori = corpus
+ self.dlg = dlg
+ corpus_name = 'Sub' + corpus.parametres['corpus_name']
+ if dlg is not None :
+ busy = wx.BusyInfo(_("Please wait...").decode('utf8'), self)
+ wx.SafeYield()
+ parametres['corpus_name'] = corpus_name
+ if parametres.get('frommeta', False) :
+ parametres['meta'] = corpus.make_etoiles()
+ elif parametres.get('fromtheme', False) :
+ parametres['meta'] = corpus.make_themes()
+ elif parametres.get('fromclusters', False) :
+ parametres['meta'] = [' '.join(['classe', `i`]) for i in range(1,parametres['clnb'] + 1)]
+ else :
+ parametres['meta'] = []
+ if 'fromclusters' not in parametres :
+ parametres['meta'].sort()
+ if dlg is not None :
+ del busy
+ dial = SubTextFromMetaDial(parent, parametres)
+ self.res = dial.ShowModal()
+ if self.res == 5100 :
+ if dial.subcorpusname.GetValue() != '' :
+ corpus_name = ''.join([l for l in dial.subcorpusname.GetValue() if l.isalnum() or l in ['_']])
+ if corpus_name != '' :
+ parametres['corpus_name'] = corpus_name
+ else :
+ parametres['corpus_name'] = 'Sub' + corpus.parametres['corpus_name']
+ pathout = os.path.join(corpus.parametres['pathout'], parametres['corpus_name'])
+ i = 1
+ while os.path.exists(pathout + '_%i' % i) :
+ i += 1
+ parametres['pathout'] = pathout + '_%i' % i
+ meta = dial.m_listBox1.GetSelections()
+ if not 'fromclusters' in parametres :
+ parametres['meta'] = [parametres['meta'][val] for val in meta]
+ else :
+ parametres['meta'] = meta
+ self.parametres = parametres
+ dial.Destroy()
+ else :
+ dial.Destroy()
+
+ def doanalyse(self):
+ return BuildSubCorpus(self.ori, parametres = self.parametres, dlg = self.dlg).corpus
+
+class BuildMergeFromClusters(BuildCorpus):
+ def __init__(self, analyses, parametres, dlg = None) :
+ log.info('begin subcorpus...')
+ self.dlg = dlg
+ self.infile = None
+ self.corpus = Corpus(self, {'type' : 'corpus', 'originalpath' : 'MergeFromClusters', 'encoding' : 'merge'})
+ self.last = 0
+ self.analyses = analyses
+ self.lcl = []
+ self.parametres = parametres
+ #self.encoding = corpus.parametres['encoding']
+ self.corpus.parametres['corpus_name'] = parametres['corpus_name']
+ self.corpus.pathout = PathOut(filename = 'MFC', dirout = parametres['pathout'])
+ self.corpus.pathout.createdir(parametres['pathout'])
+ self.corpus.parametres['pathout'] = parametres['pathout']
+ self.corpus.parametres['meta'] = parametres.get('meta', False)
+ self.corpus.parametres['uuid'] = str(uuid4())
+ for i, analyse in enumerate(analyses) :
+ self.lcl.append([])
+ self.analyseid = i
+ corpus_uuid = analyse['corpus']
+ #if corpus_uuid not in self.parent.history.openedcorpus :
+ irapath = parametres['corpusira'][i]
+ corpus = Corpus(self, parametres = DoConf(irapath).getoptions('corpus'), read = True)
+ ucepath = os.path.join(analyse['pathout'], 'uce.csv')
+ corpus.make_ucecl_from_R(ucepath)
+ self.ori = corpus
+ for j, cl in enumerate(parametres['clusters'][i]) :
+ #print cl, self.ori.lc[cl-1]
+ self.parametres['uceids'] = self.ori.lc[cl-1]#[st for st in self.ori['lc'][cl-1]]
+ self.lcl[i] += self.ori.lc[cl-1]
+ self.et = parametres['newet'][i][j]
+ self.fromuceids()
+ #create database
+ self.connect()
+ self.dobuild()
+
+ def fromuceids(self):
+ print 'fromuceids'
+ dictucekeep = dict(zip(self.parametres['uceids'], self.parametres['uceids']))
+ idpara = 0
+ for uci in self.ori.ucis :
+ if uci.paras == [] :
+ keepuces = [CopyUce(uce) for uce in uci.uces if uce.ident in dictucekeep]
+ if keepuces != [] :
+ nuci = CopyUci(uci)
+ nuci.uces = keepuces
+ nuci.etoiles.append(self.et)
+ nuci.analyseid = self.analyseid
+ self.corpus.ucis.append(nuci)
+ idpara += 1
+ else :
+ newuces = []
+ newpara = []
+ for et in uci.paras :
+ keepuces = [CopyUce(uce) for uce in uci.uces if uce.ident in dictucekeep]
+ idpara += 1
+ if keepuces != [] :
+ newuces += keepuces
+ newpara.append(et)
+ if newuces != [] :
+ nuci = CopyUci(uci)
+ nuci.uces = newuces
+ nuci.paras = newpara
+ nuci.etoiles.append(self.et)
+ nuci.analyseid = self.analyseid
+ self.corpus.ucis.append(nuci)
+ #print nuci.etoiles, nuci.ident, nuci.uces
+
+ def read_corpus(self, infile = None):
+ #self.olduceid = [uce.ident for uci in self.corpus.ucis for uce in uci.uces]
+ ident_uci = 0
+ ident_uce = 0
+ ident_para = -1
+ lastpara = -1
+ newuceident = {}
+ print 'redo text, para and st ident'
+ for uci in self.corpus.ucis :
+ #print uci.ident, ident_uci, [uce.ident for uce in uci.uces], uci.etoiles
+ uci.ident = ident_uci
+ ident_uci += 1
+ for uce in uci.uces :
+ uce.uci = uci.ident
+ if uce.para != lastpara :
+ ident_para += 1
+ lastpara = uce.para
+ uce.para = ident_para
+ else :
+ uce.para = ident_para
+ newuceident['%i-%i' %(uci.analyseid, uce.ident)] = ident_uce
+ uce.ident = ident_uce
+ #print uce.ident
+ ident_uce += 1
+ print 'backup st text and forms'
+ rowid = 0
+ for i, analyse in enumerate(self.analyses) :
+ #print analyse, self.parametres['corpusira']
+ irapath = self.parametres['corpusira'][i]
+ old = Corpus(self, parametres = DoConf(irapath).getoptions('corpus'), read = True)
+ for row in old.getconcorde(self.lcl[i]) :
+ self.c.execute('INSERT INTO uces VALUES(?,?);', (newuceident['%i-%i' % (i,row[0])], row[1]))
+ for word in row[1].split() :
+ self.corpus.add_word_from_forme(old.formes[word], newuceident['%i-%i' % (i,row[0])])
+ rowid += 1
+ self.backup_uce()
+ print 'done'
+
+
+class MergeClusters :
+ def __init__(self, parent, parametres = None, dlg = None):
+ self.parent = parent
+ #self.ori = corpus
+ self.dlg = dlg
+ corpus_name = 'MergeFromClusters'
+ if dlg is not None :
+ busy = wx.BusyInfo(_("Please wait...").decode('utf8'), self)
+ wx.SafeYield()
+ parametres['corpus_name'] = corpus_name
+ if dlg is not None :
+ del busy
+ dial = MergeClusterFrame(parent)
+ dial.m_textCtrl4.SetValue(corpus_name)
+ self.res = dial.ShowModal()
+ if self.res == 5100 :
+ self.analyses = {}
+ self.clusters = {}
+ self.newet = {}
+ self.corpusira = {}
+ if dial.m_textCtrl4.GetValue() != '' :
+ corpus_name = ''.join([l for l in dial.m_textCtrl4.GetValue() if l.isalnum() or l in ['_']])
+ if corpus_name != '' :
+ parametres['corpus_name'] = corpus_name
+ else :
+ parametres['corpus_name'] = 'MergeFromClusters'
+ for cl in dial.selected :
+ corpus_uuid = cl[1]
+ #if corpus_uuid not in self.parent.history.openedcorpus :
+ irapath = self.parent.history.corpus[corpus_uuid]['ira']
+ #corpus = Corpus(self.parent, parametres = DoConf(irapath).getoptions('corpus'), read = True)
+ #self.parent.history.openedcorpus[corpus_uuid] = corpus
+ if cl[0] not in self.analyses :
+ analyse = DoConf(dial.irapath[cl[0]]).getoptions()
+ #ucepath = os.path.join(os.path.dirname(dial.irapath[cl[0]]), 'uce.csv')
+ #corpus = copycorpus(self.parent.history.openedcorpus[corpus_uuid])
+ #corpus.make_ucecl_from_R(ucepath)
+ self.analyses[cl[0]] = analyse
+ self.clusters[cl[0]] = [cl[2]]
+ self.newet[cl[0]] = [dial.selected[cl]]
+ self.corpusira[cl[0]] = irapath
+ else :
+ self.clusters[cl[0]].append(cl[2])
+ self.newet[cl[0]].append(dial.selected[cl])
+
+
+ analyses = [val for val in self.clusters]
+ clusters = [self.clusters[val] for val in analyses]
+ self.newet = [self.newet[val] for val in analyses]
+ corpusira = [self.corpusira[val] for val in analyses]
+ analyses = [self.analyses[val] for val in analyses]
+ pathout = os.path.dirname(os.path.dirname(analyses[0]['pathout']))
+ self.analyses = analyses
+
+ pathout = os.path.join(pathout, parametres['corpus_name'])
+ i = 1
+ while os.path.exists(pathout + '_%i' % i) :
+ i += 1
+ parametres['pathout'] = pathout + '_%i' % i
+ self.parametres = parametres
+ self.parametres['clusters'] = clusters
+ self.parametres['newet'] = self.newet
+ self.parametres['corpusira'] = corpusira
+ dial.Destroy()
+ else :
+ dial.Destroy()
+
+ def doanalyse(self):
+ return BuildMergeFromClusters(self.analyses, parametres = self.parametres, dlg = self.dlg).corpus