projects
/
iramuteq
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
test
[iramuteq]
/
parse_factiva_txt.py
diff --git
a/parse_factiva_txt.py
b/parse_factiva_txt.py
index
18461e0
..
fd856f4
100644
(file)
--- a/
parse_factiva_txt.py
+++ b/
parse_factiva_txt.py
@@
-82,7
+82,7
@@
def parsetxtpaste(txt):
else :
pass
if keepline and line.strip() not in ['LP', 'TD', ''] :
else :
pass
if keepline and line.strip() not in ['LP', 'TD', ''] :
- ucis[-1][1] = '\n'.join([ucis[-1][1],line])
+ ucis[-1][1] = '\n'.join([ucis[-1][1],line
.replace(u'*', ' ')
])
return ucis
return ucis
@@
-94,14
+94,15
@@
def print_ucis(ucis, ofile, encodage) :
class ParseFactivaPaste :
def __init__(self, txtdir, fileout, encodage_in, encodage_out) :
class ParseFactivaPaste :
def __init__(self, txtdir, fileout, encodage_in, encodage_out) :
- files = os.listdir(txtdir)
- files = [f for f in files if f.split('.')[-1] == 'txt']
+ files = []
+ for root, subfolders, subfiles in os.walk(txtdir) :
+ nf = [os.path.join(root, f) for f in subfiles if f.split('.')[-1] == 'txt']
+ nf.sort()
+ files += nf
tot = 0
with open(fileout,'w') as outf :
for f in files :
print f
tot = 0
with open(fileout,'w') as outf :
for f in files :
print f
- f = os.path.join(txtdir, f)
- print f
with codecs.open(f, 'rU', encodage_in) as infile :
content = infile.read()
ucis = parsetxtpaste(content)
with codecs.open(f, 'rU', encodage_in) as infile :
content = infile.read()
ucis = parsetxtpaste(content)