geocarvalho/cnv_regions.py

## cnv_regions.py
#/usr/bin/env python3

'''
Using exomedepth result and a bed file, organize a txt file with the exons in
the CNV region
usage:
python cnv_regions.py <path/to/exome_depth_output.txt> <path/to/bed_file.bed> <padding>
'''

__author__ = 'George Carvalho'


import sys
import time
import os
import pandas as pd


def main():
    cnv = os.path.abspath(sys.argv[1])
    bed = os.path.abspath(sys.argv[2])
    p = 5 if len(sys.argv) < 4 else sys.argv[3]

    df_cnv = pd.read_table(cnv)
    bed_header = ['chr', 'start', 'end', 'exon', 'value', 'strand']
    df_bed = pd.read_table(bed, names=bed_header)
    exons = []

    for cnv_index, cnv_row in df_cnv.iterrows():
        for bed_index, bed_row in df_bed.iterrows():
            start_cnv = cnv_row.start - p
            end_cnv =cnv_row.end + p

            if (cnv_row.chromosome == bed_row.chr) \
            and (int(start_cnv) <= int(bed_row.start) < int(end_cnv)) \
            and (int(start_cnv) < int(bed_row.end) <= int(end_cnv)):
                exons.append(bed_row.exon)
        df_cnv.loc[cnv_index, 'exons'] = ', '.join(i for i in exons)
        exons = []

    cnv_path, cnv_file = os.path.split(cnv)
    time_str = time.strftime("_%Y-%m-%d_%H:%M_")
    new_cnv = os.path.join(cnv_path, 'exons' + time_str + cnv_file)
    df_cnv.to_csv(new_cnv, index=None, sep='\t', mode='a')

if __name__ == "__main__":
    main()
	#/usr/bin/env python3

	'''
	Using exomedepth result and a bed file, organize a txt file with the exons in
	the CNV region
	usage:
	python cnv_regions.py <path/to/exome_depth_output.txt> <path/to/bed_file.bed> <padding>
	'''

	__author__ = 'George Carvalho'


	import sys
	import time
	import os
	import pandas as pd


	def main():
	cnv = os.path.abspath(sys.argv[1])
	bed = os.path.abspath(sys.argv[2])
	p = 5 if len(sys.argv) < 4 else sys.argv[3]

	df_cnv = pd.read_table(cnv)
	bed_header = ['chr', 'start', 'end', 'exon', 'value', 'strand']
	df_bed = pd.read_table(bed, names=bed_header)
	exons = []

	for cnv_index, cnv_row in df_cnv.iterrows():
	for bed_index, bed_row in df_bed.iterrows():
	start_cnv = cnv_row.start - p
	end_cnv =cnv_row.end + p

	if (cnv_row.chromosome == bed_row.chr) \
	and (int(start_cnv) <= int(bed_row.start) < int(end_cnv)) \
	and (int(start_cnv) < int(bed_row.end) <= int(end_cnv)):
	exons.append(bed_row.exon)
	df_cnv.loc[cnv_index, 'exons'] = ', '.join(i for i in exons)
	exons = []

	cnv_path, cnv_file = os.path.split(cnv)
	time_str = time.strftime("_%Y-%m-%d_%H:%M_")
	new_cnv = os.path.join(cnv_path, 'exons' + time_str + cnv_file)
	df_cnv.to_csv(new_cnv, index=None, sep='\t', mode='a')

	if __name__ == "__main__":
	main()