Skip to content

Instantly share code, notes, and snippets.

@ShaiberAlon
Created October 18, 2019 20:52
Show Gist options
  • Save ShaiberAlon/2a8c1b12a372c77a7569dec7c317d37b to your computer and use it in GitHub Desktop.
Save ShaiberAlon/2a8c1b12a372c77a7569dec7c317d37b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import argparse
import pandas as pd
parser = argparse.ArgumentParser(description='Find the items that occur in all rows of a table (i.e. rows in which all values are greater than 0) and save to output file.')
parser.add_argument('-i', '--input', help='Input file.')
parser.add_argument('-o', '--output', help='Output file.')
parser.add_argument('--items-label', help='Header for the items column. If none is provided then there will be no header row.', default=None)
parser.add_argument('--index-label', help='Header for the index column. If none is provided then there will be no header row.', default=None)
parser.add_argument('--get-only-single-copy', help='Only return items the occur only once in all rows.', default=False)
args = parser.parse_args()
d = pd.read_csv(args.input, sep='\t', index_col=0)
def is_item_single_or_multi_copy_core(x):
if x.all():
if args.get_only_single_copy:
if not any(i>1 for i in x):
return 'Single copy core function'
else:
return 'Core'
return None
o = d.apply(is_item_single_or_multi_copy_core, axis=1)
o.loc[o.notnull()].to_csv(args.output, sep='\t', header=args.items_label, index_label=args.index_label)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment