Skip to content

Instantly share code, notes, and snippets.

@jonchang
Created December 10, 2014 00:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonchang/72c993dbf6063349ef92 to your computer and use it in GitHub Desktop.
Save jonchang/72c993dbf6063349ef92 to your computer and use it in GitHub Desktop.
select only certain columns from a tab delimited file (optionally gzipped)
#!/usr/bin/env python
from __future__ import print_function
import gzip
import argparse
from operator import itemgetter
def get_args():
parser = argparse.ArgumentParser(description="selects fields from a tab-delimited file")
parser.add_argument("input", help="filename of a tab-delimited input (.gz OK)")
parser.add_argument("output", help="filename of the output")
parser.add_argument("columns", nargs="+", help="names of columns to keep")
return parser.parse_args()
def reader(fn, cols):
if fn.endswith(".gz"):
from gzip import open
getter = None
with open(fn, "r") as rfile:
for idx, line in enumerate(rfile):
splat = line.strip().split("\t")
if idx == 0:
wanted = [splat.index(x) for x in cols if x in splat]
getter = itemgetter(*wanted)
yield getter(splat)
def main():
args = get_args()
with open(args.output, "w") as wfile:
for cols in reader(args.input, args.columns):
print(*cols, sep="\t", file=wfile)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment