From 278fceaa7db7b84d7c6f3bbd3f86e5ddb0ebda09 Mon Sep 17 00:00:00 2001 From: pierre Date: Sun, 17 Feb 2013 13:56:12 +0100 Subject: [PATCH 1/1] unicode... --- corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corpus.py b/corpus.py index cd6c364..5a18f62 100644 --- a/corpus.py +++ b/corpus.py @@ -1081,7 +1081,7 @@ class BuildCorpus : def firstclean(self, txt) : txt = txt.replace(u'’',"'") txt = txt.replace(u'œ', u'oe') - return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', ' £$£ ') + return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', u' £$£ ') def make_cleans(self, txt) : for clean in self.cleans : -- 2.7.4