Created
August 9, 2014 11:48
-
-
Save meoow/45923f6206c42485ad21 to your computer and use it in GitHub Desktop.
Filtering One File by Another File
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
from argparse import ArgumentParser | |
import sys | |
def parseArg(): | |
'''parse command line arguments''' | |
parser = ArgumentParser() | |
parser.add_argument('-i', '--input', dest='input', metavar='INPUT',\ | |
type=file, default=sys.stdin, help='Full input file') | |
parser.add_argument('-f', '--filter', dest='filter', metavar='OUTPUT',\ | |
type=file, help='File for filtering out sub lines of input file') | |
parser.add_argument('-1', dest='col1', type=int, default=1,\ | |
metavar='NUM',\ | |
help='Column number for input file to match filter file') | |
parser.add_argument('-2', dest='col2', type=int, default=1,\ | |
metavar='NUM',\ | |
help='Column number of filter file') | |
parser.add_argument('-s', '--sep', dest='sep', type=str, default=None,\ | |
metavar='SEP',\ | |
help='Field seperater, default is whitespace character') | |
return parser.parse_args() | |
def cacheFilter(filer, col=0, sep=None): | |
'''read filter file for match''' | |
filters = {} | |
for line in filer: | |
linelist = line.split(sep) | |
filters[linelist[col]] = 1 | |
return filters | |
def main(): | |
'''main process''' | |
opts = parseArg() | |
opts.col1 -= 1 | |
opts.col2 -= 1 | |
if opts.col1 < 0 or opts.col2 < 0: | |
raise SystemExit("Col must be greater than 0") | |
filters = cacheFilter(opts.filter, opts.col2, opts.sep) | |
for line in opts.input: | |
llist = line.split(opts.sep) | |
if llist[opts.col1] in filters: | |
sys.stdout.write(line) | |
sys.stdout.flush() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment