# -*- coding: utf-8 -*-
#Author: Pierre Ratinaud
#Copyright (c) 2013 Pierre Ratinaud
-#Lisense: GNU/GPL
+#License: GNU/GPL
import os
-#infiledir = '/home/pierre/TXM/corpora/voeux-bin/txm/VOEUX/'
+#infiledir = '/home/pierre/TXM/corpus/voeux-bin/txm/VOEUX/'
#fileout = 'VOEUXExportfromTXM.txt'
pass
def endElement(self, name) :
- if name == 's' :
+ if name == 's' or name == 'w' :
self.printsent()
if name == 'p' :
self.printsent()
#self.fileout.write(content.encode('utf8'))
def text2stars(self, attrs) :
- stars = ['_'.join(val).replace(' ', '_').replace("'", '_') for val in attrs.items()]
+ stars = ['_'.join(val).replace(' ', '_').replace("'", '_').replace('/','').replace('.','').replace(';', '').replace(':', '').replace(u'ยท','') for val in attrs.items()]
stars = [''.join([u'*', val]) for val in stars]
stars = u'**** ' + ' '.join(stars)
self.fileout.write(stars.encode(self.encodage_out))
for f in files :
parser.parse(open(f, 'r'))
fout.write('\n\n')
+ print 'done'
#TXM2IRA(infiledir, fileout)