Skip to content

Instantly share code, notes, and snippets.

@geocarvalho
Last active October 24, 2017 04:21
Show Gist options
  • Save geocarvalho/f2be445ed2f1e560b84d98265849e226 to your computer and use it in GitHub Desktop.
Save geocarvalho/f2be445ed2f1e560b84d98265849e226 to your computer and use it in GitHub Desktop.
Annotate exons from bed file present in CNVs from exomedepth
#/usr/bin/env python3
'''
Using exomedepth result and a bed file, organize a txt file with the exons in
the CNV region
usage:
python cnv_regions.py <path/to/exome_depth_output.txt> <path/to/bed_file.bed> <padding>
'''
__author__ = 'George Carvalho'
import sys
import time
import os
import pandas as pd
def main():
cnv = os.path.abspath(sys.argv[1])
bed = os.path.abspath(sys.argv[2])
p = 5 if len(sys.argv) < 4 else sys.argv[3]
df_cnv = pd.read_table(cnv)
bed_header = ['chr', 'start', 'end', 'exon', 'value', 'strand']
df_bed = pd.read_table(bed, names=bed_header)
exons = []
for cnv_index, cnv_row in df_cnv.iterrows():
for bed_index, bed_row in df_bed.iterrows():
start_cnv = cnv_row.start - p
end_cnv =cnv_row.end + p
if (cnv_row.chromosome == bed_row.chr) \
and (int(start_cnv) <= int(bed_row.start) < int(end_cnv)) \
and (int(start_cnv) < int(bed_row.end) <= int(end_cnv)):
exons.append(bed_row.exon)
df_cnv.loc[cnv_index, 'exons'] = ', '.join(i for i in exons)
exons = []
cnv_path, cnv_file = os.path.split(cnv)
time_str = time.strftime("_%Y-%m-%d_%H:%M_")
new_cnv = os.path.join(cnv_path, 'exons' + time_str + cnv_file)
df_cnv.to_csv(new_cnv, index=None, sep='\t', mode='a')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment