X-Git-Url: http://iramuteq.org/git?p=iramuteq;a=blobdiff_plain;f=tools.py;h=669337b548f5296f800066bfea5f34c14df6736d;hp=e35ea4b790f1fe9146aece68f2e4c5d5b1f6c6b1;hb=eb6087e90df00aee22d69cada9ef473331e85344;hpb=eb614c725930bc65a7ad43eda1b769b504433f88 diff --git a/tools.py b/tools.py index e35ea4b..669337b 100644 --- a/tools.py +++ b/tools.py @@ -22,6 +22,12 @@ def istext(line) : else : return False +def isthem(line): + if line.startswith(u'-*') : + return True + else : + return False + def testvar(line, variable) : line = line.split() varmod = [val.split('_') for val in line[1:]] @@ -48,12 +54,16 @@ class Extract : parametres = dial.make_param() if option == 'splitvar' : SplitFromVar(parametres) - else : + elif option == 'mods' : ExtractMods(parametres) - dial.Destroy() - dial = wx.MessageDialog(parent, 'Done !', style = wx.OK) - dial.ShowModal() - dial.Destroy() + elif option == 'them' : + SplitFromThem(parametres) + dial.Destroy() + dial = wx.MessageDialog(parent, 'Done !', style = wx.OK) + dial.ShowModal() + dial.Destroy() + else : + dial.Destroy() class SplitFromVar : def __init__(self, parametres) : @@ -84,6 +94,43 @@ class SplitFromVar : for f in filedict : filedict[f].close() +class SplitFromThem : + def __init__(self, parametres) : + self.filein = parametres['filein'] + self.them = parametres['them'] + self.encodein = parametres['encodein'] + self.encodeout = parametres['encodeout'] + self.basepath = os.path.dirname(self.filein) + self.pathout = os.path.join(self.basepath, '_'.join([them.replace(u'-*','') for them in self.them])) + self.fileout = open(self.pathout, 'w') + self.doparse() + self.fileout.close() + + def doparse(self): + text = '' + keepline = False + lastet = '' + with codecs.open(self.filein, 'r', self.encodein) as fin : + for line in fin : + if istext(line) : + self.writetext(self.fileout, lastet, text) + text = '' + lastet = line + if isthem(line) : + l = line.strip().rstrip('\n\r') + if l in self.them : + keepline = True + else : + keepline = False + if keepline : + text += line + self.writetext(self.fileout, lastet, text) + + def writetext(self, fileout, lastet, text): + if text != '' : + self.fileout.write(lastet.encode(self.encodeout) + text.encode(self.encodeout)) + + class ExtractMods : def __init__(self, parametres) : self.onefile = parametres.get('onefile', False) @@ -167,3 +214,4 @@ def converttabletocorpus(table, fileout, enc='UTF8') : if __name__ == '__main__' : #SplitFromVar(parametres) ExtractMods(parametres, True) + #end