Skip to content

Instantly share code, notes, and snippets.

@meoow
Created August 9, 2014 11:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save meoow/45923f6206c42485ad21 to your computer and use it in GitHub Desktop.
Save meoow/45923f6206c42485ad21 to your computer and use it in GitHub Desktop.
Filtering One File by Another File
#!/usr/bin/env python2.7
from argparse import ArgumentParser
import sys
def parseArg():
'''parse command line arguments'''
parser = ArgumentParser()
parser.add_argument('-i', '--input', dest='input', metavar='INPUT',\
type=file, default=sys.stdin, help='Full input file')
parser.add_argument('-f', '--filter', dest='filter', metavar='OUTPUT',\
type=file, help='File for filtering out sub lines of input file')
parser.add_argument('-1', dest='col1', type=int, default=1,\
metavar='NUM',\
help='Column number for input file to match filter file')
parser.add_argument('-2', dest='col2', type=int, default=1,\
metavar='NUM',\
help='Column number of filter file')
parser.add_argument('-s', '--sep', dest='sep', type=str, default=None,\
metavar='SEP',\
help='Field seperater, default is whitespace character')
return parser.parse_args()
def cacheFilter(filer, col=0, sep=None):
'''read filter file for match'''
filters = {}
for line in filer:
linelist = line.split(sep)
filters[linelist[col]] = 1
return filters
def main():
'''main process'''
opts = parseArg()
opts.col1 -= 1
opts.col2 -= 1
if opts.col1 < 0 or opts.col2 < 0:
raise SystemExit("Col must be greater than 0")
filters = cacheFilter(opts.filter, opts.col2, opts.sep)
for line in opts.input:
llist = line.split(opts.sep)
if llist[opts.col1] in filters:
sys.stdout.write(line)
sys.stdout.flush()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment