From: pierre Date: Sun, 17 Feb 2013 12:56:12 +0000 (+0100) Subject: unicode... X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=commitdiff_plain;h=278fceaa7db7b84d7c6f3bbd3f86e5ddb0ebda09 unicode... --- diff --git a/corpus.py b/corpus.py index cd6c364..5a18f62 100644 --- a/corpus.py +++ b/corpus.py @@ -1081,7 +1081,7 @@ class BuildCorpus : def firstclean(self, txt) : txt = txt.replace(u'’',"'") txt = txt.replace(u'œ', u'oe') - return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', ' £$£ ') + return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', u' £$£ ') def make_cleans(self, txt) : for clean in self.cleans :