From 295f132ac094d89920f7ff8643f7004514b18b7a Mon Sep 17 00:00:00 2001 From: Pierre Ratinaud Date: Mon, 3 Nov 2014 09:39:04 +0100 Subject: [PATCH] search in sub directories --- parse_factiva_txt.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/parse_factiva_txt.py b/parse_factiva_txt.py index 18461e0..fd856f4 100644 --- a/parse_factiva_txt.py +++ b/parse_factiva_txt.py @@ -82,7 +82,7 @@ def parsetxtpaste(txt): else : pass if keepline and line.strip() not in ['LP', 'TD', ''] : - ucis[-1][1] = '\n'.join([ucis[-1][1],line]) + ucis[-1][1] = '\n'.join([ucis[-1][1],line.replace(u'*', ' ')]) return ucis @@ -94,14 +94,15 @@ def print_ucis(ucis, ofile, encodage) : class ParseFactivaPaste : def __init__(self, txtdir, fileout, encodage_in, encodage_out) : - files = os.listdir(txtdir) - files = [f for f in files if f.split('.')[-1] == 'txt'] + files = [] + for root, subfolders, subfiles in os.walk(txtdir) : + nf = [os.path.join(root, f) for f in subfiles if f.split('.')[-1] == 'txt'] + nf.sort() + files += nf tot = 0 with open(fileout,'w') as outf : for f in files : print f - f = os.path.join(txtdir, f) - print f with codecs.open(f, 'rU', encodage_in) as infile : content = infile.read() ucis = parsetxtpaste(content) -- 2.7.4