Skip to content

Instantly share code, notes, and snippets.

@bruab
Created July 24, 2015 02:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bruab/5a7bef9f205fa5735245 to your computer and use it in GitHub Desktop.
Save bruab/5a7bef9f205fa5735245 to your computer and use it in GitHub Desktop.
For making Hi-C Box and GRAAL play nice together
#!/usr/bin/env python
## Read two GRAALy files and produce a new, improved
## hetero_contacts table using different indices.
## It's complicated.
import sys
INFO_CONTIGS_FILE = "info_contigs.txt"
FRAGMENTS_HETERO_FILE = "fragments_hetero_contacts.txt"
contig_to_offset = {}
with open(INFO_CONTIGS_FILE, 'r') as contigs,\
open(FRAGMENTS_HETERO_FILE, 'r') as contacts:
# Store the offset for each contig, as given in INFO_CONTIGS_FILE
for line in contigs:
if line.startswith("contig"):
# header line
continue
fields = line.strip().split()
contig = fields[0]
offset = int(fields[3])
if contig in contig_to_offset:
sys.stderr.write("weird, contig %s is already in dict\n" % contig)
continue
contig_to_offset[contig] = offset
# For each line of FRAGMENTS_HETERO_FILE, find the id
# corresponding to the offset/contig in columns 1 and 2,
# and the id corresponding to the offset/contig in columns
# 3 and 4. Output the first id, the second id, and the
# original count from column 5.
for line in contacts:
fields = line.strip().split()
count = fields[4]
contig1 = fields[1]
offset1 = int(fields[0])
id1 = str(contig_to_offset[contig1] + offset1)
contig2 = fields[3]
offset2 = int(fields[2])
id2 = str(contig_to_offset[contig2] + offset2)
sys.stdout.write("%s\t%s\t%s\n" % (id1, id2, count))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment