Skip to content

Instantly share code, notes, and snippets.

@hmartiniano
Forked from johnbowes/king.py
Created March 17, 2024 20:04
Show Gist options
  • Save hmartiniano/8d2f2bc53e77b0c829915d3d2e901afd to your computer and use it in GitHub Desktop.
Save hmartiniano/8d2f2bc53e77b0c829915d3d2e901afd to your computer and use it in GitHub Desktop.
Select related individuals for exclusion based on output from KING
#!/usr/bin/python
# Run KING to generate sample QC and IBD summary stats
# ./king_1.9 -b <data>.bed --bysample --prefix <prefix_for_output>
# ./king_1.9 -b <data>.bed --kinship --related --degree 2 --prefix <prefix_for_output>
#
# Run this script to create a list of exclusions (member of pair with least data will be excluded)
# python king.py --prefix <prefix_for_output> --out <output_file_name>
# Add error if no samples in kinship file.
import sys
import os
import pandas as pd
import numpy as np
import argparse
def get_options():
parser = argparse.ArgumentParser()
parser.add_argument('--prefix', action='store', dest='prefix', required=True,
help='File prefix to KING files')
parser.add_argument('--out', action='store', dest='out_path', required=True,
help='Directory path for output file')
args = parser.parse_args()
return args
def main():
# define input/output
options = get_options()
# read kin0 file
related_file = options.prefix + '.kin0'
related = pd.read_table(related_file)
# read bysample file
qc_file = options.prefix + 'bySample.txt'
qc = pd.read_table(qc_file, sep=' ')
qc_dict = dict(zip(qc.IID, qc.Missing))
# compare missing rates
results = []
for fid1,id1,fid2,id2 in zip(related.FID1, related.ID1, related.FID2, related.ID2):
results.append((fid1,id1) if qc_dict[id1] >= qc_dict[id2] else (fid2,id2))
# write exclusion file
out_file = options.out_path + '/fail_ibd.list'
pd.DataFrame(results).to_csv(out_file, sep=' ', header=False, index=False)
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment