Skip to content

Instantly share code, notes, and snippets.

@kclem
Created June 25, 2024 16:26
Show Gist options
  • Save kclem/504da721cca4c9ff2c157a4a09f9e660 to your computer and use it in GitHub Desktop.
Save kclem/504da721cca4c9ff2c157a4a09f9e660 to your computer and use it in GitHub Desktop.
import argparse
import subprocess
import os
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Print annotated genome location for hand analysis')
parser.add_argument('chr', help='chr position', type=str)
parser.add_argument('start', help='start position', type=int)
parser.add_argument('end', help='end position', type=int)
parser.add_argument('-g','--genome', help='genome', default="hg38")
args = parser.parse_args()
genome_loc = os.path.expanduser("~/clement/genomes/Homo_sapiens/UCSC/" + args.genome + "/Sequence/WholeGenomeFasta/genome.fa")
if not os.path.exists(genome_loc):
raise Exception('Cannot find genome at ' + str(genome_loc))
faidx_command = f"samtools faidx {genome_loc} {args.chr}:{args.start}-{args.end}"
faidx_output = subprocess.check_output(faidx_command, shell=True)
faidx_output_els = faidx_output.decode().split('\n')
if len(faidx_output_els) < 2:
raise Exception(f'Faidx command "{faidx_command}" failed: ' + "\n".join(faidx_output_els))
genome_seq = ''.join(faidx_output_els[1:])
curr_val = args.start
row_10000 = str(int(curr_val/10000))[-1]
row_1000 = str(int(curr_val/1000))[-1]
row_100 = str(int(curr_val/100))[-1]
row_10 = str(int(curr_val/10))[-1]
row_1 = str(int(curr_val/1))[-1] # I always forget what /1 does...
curr_val += 1
while curr_val < args.end:
if curr_val % 10 == 0:
row_10000 += str(int(curr_val/10000))[-1]
row_1000 += str(int(curr_val/1000))[-1]
row_100 += str(int(curr_val/100))[-1]
row_10 += str(int(curr_val/10))[-1]
row_1 += str(int(curr_val/1))[-1] # I always forget what /1 does...
else:
row_10000 += " "
row_1000 += " "
row_100 += " "
row_10 += " "
row_1 += str(curr_val)[-1]
curr_val += 1
row_10000 += str(int(curr_val/10000))[-1]
row_1000 += str(int(curr_val/1000))[-1]
row_100 += str(int(curr_val/100))[-1]
row_10 += str(int(curr_val/10))[-1]
row_1 += str(int(curr_val/1))[-1] # I always forget what /1 does...
print(row_10000+"\n"+row_1000+"\n"+row_100+"\n"+row_10+"\n"+row_1+"\n" + genome_seq)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment