Created
July 24, 2015 02:28
-
-
Save bruab/5a7bef9f205fa5735245 to your computer and use it in GitHub Desktop.
For making Hi-C Box and GRAAL play nice together
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
## Read two GRAALy files and produce a new, improved | |
## hetero_contacts table using different indices. | |
## It's complicated. | |
import sys | |
INFO_CONTIGS_FILE = "info_contigs.txt" | |
FRAGMENTS_HETERO_FILE = "fragments_hetero_contacts.txt" | |
contig_to_offset = {} | |
with open(INFO_CONTIGS_FILE, 'r') as contigs,\ | |
open(FRAGMENTS_HETERO_FILE, 'r') as contacts: | |
# Store the offset for each contig, as given in INFO_CONTIGS_FILE | |
for line in contigs: | |
if line.startswith("contig"): | |
# header line | |
continue | |
fields = line.strip().split() | |
contig = fields[0] | |
offset = int(fields[3]) | |
if contig in contig_to_offset: | |
sys.stderr.write("weird, contig %s is already in dict\n" % contig) | |
continue | |
contig_to_offset[contig] = offset | |
# For each line of FRAGMENTS_HETERO_FILE, find the id | |
# corresponding to the offset/contig in columns 1 and 2, | |
# and the id corresponding to the offset/contig in columns | |
# 3 and 4. Output the first id, the second id, and the | |
# original count from column 5. | |
for line in contacts: | |
fields = line.strip().split() | |
count = fields[4] | |
contig1 = fields[1] | |
offset1 = int(fields[0]) | |
id1 = str(contig_to_offset[contig1] + offset1) | |
contig2 = fields[3] | |
offset2 = int(fields[2]) | |
id2 = str(contig_to_offset[contig2] + offset2) | |
sys.stdout.write("%s\t%s\t%s\n" % (id1, id2, count)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment