projects
/
iramuteq
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
d66f8fb
)
search in sub directories
author
Pierre Ratinaud
<ratinaud@univ-tlse2.fr>
Mon, 3 Nov 2014 08:39:04 +0000
(09:39 +0100)
committer
Pierre Ratinaud
<ratinaud@univ-tlse2.fr>
Mon, 3 Nov 2014 08:39:04 +0000
(09:39 +0100)
parse_factiva_txt.py
patch
|
blob
|
history
diff --git
a/parse_factiva_txt.py
b/parse_factiva_txt.py
index
18461e0
..
fd856f4
100644
(file)
--- a/
parse_factiva_txt.py
+++ b/
parse_factiva_txt.py
@@
-82,7
+82,7
@@
def parsetxtpaste(txt):
else :
pass
if keepline and line.strip() not in ['LP', 'TD', ''] :
else :
pass
if keepline and line.strip() not in ['LP', 'TD', ''] :
- ucis[-1][1] = '\n'.join([ucis[-1][1],line])
+ ucis[-1][1] = '\n'.join([ucis[-1][1],line
.replace(u'*', ' ')
])
return ucis
return ucis
@@
-94,14
+94,15
@@
def print_ucis(ucis, ofile, encodage) :
class ParseFactivaPaste :
def __init__(self, txtdir, fileout, encodage_in, encodage_out) :
class ParseFactivaPaste :
def __init__(self, txtdir, fileout, encodage_in, encodage_out) :
- files = os.listdir(txtdir)
- files = [f for f in files if f.split('.')[-1] == 'txt']
+ files = []
+ for root, subfolders, subfiles in os.walk(txtdir) :
+ nf = [os.path.join(root, f) for f in subfiles if f.split('.')[-1] == 'txt']
+ nf.sort()
+ files += nf
tot = 0
with open(fileout,'w') as outf :
for f in files :
print f
tot = 0
with open(fileout,'w') as outf :
for f in files :
print f
- f = os.path.join(txtdir, f)
- print f
with codecs.open(f, 'rU', encodage_in) as infile :
content = infile.read()
ucis = parsetxtpaste(content)
with codecs.open(f, 'rU', encodage_in) as infile :
content = infile.read()
ucis = parsetxtpaste(content)