projects
/
iramuteq
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
72f6969
)
unicode...
author
pierre
<ratinaud@univ-tlse2.fr>
Sun, 17 Feb 2013 12:56:12 +0000
(13:56 +0100)
committer
pierre
<ratinaud@univ-tlse2.fr>
Sun, 17 Feb 2013 12:56:12 +0000
(13:56 +0100)
corpus.py
patch
|
blob
|
history
diff --git
a/corpus.py
b/corpus.py
index
cd6c364
..
5a18f62
100644
(file)
--- a/
corpus.py
+++ b/
corpus.py
@@
-1081,7
+1081,7
@@
class BuildCorpus :
def firstclean(self, txt) :
txt = txt.replace(u'’',"'")
txt = txt.replace(u'œ', u'oe')
def firstclean(self, txt) :
txt = txt.replace(u'’',"'")
txt = txt.replace(u'œ', u'oe')
- return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…', ' £$£ ')
+ return txt.replace('...',u' £$£ ').replace('?',' ? ').replace('.',' . ').replace('!', ' ! ').replace(',',' , ').replace(';', ' ; ').replace(':',' : ').replace(u'…',
u
' £$£ ')
def make_cleans(self, txt) :
for clean in self.cleans :
def make_cleans(self, txt) :
for clean in self.cleans :