Skip to content

Instantly share code, notes, and snippets.

@pansapiens
Last active September 7, 2018 06:59
Show Gist options
  • Save pansapiens/64c9c5bfaa02934284dbf32ad5a59703 to your computer and use it in GitHub Desktop.
Save pansapiens/64c9c5bfaa02934284dbf32ad5a59703 to your computer and use it in GitHub Desktop.
Join CSVs for DART BLAST hits
#!/usr/bin/env python
from __future__ import print_function
import sys
from collections import OrderedDict
# Usage:
# python csv_join_rm_mm.py my_dart_ref.csv my_blast_hits.csv >joined_hits.csv 2>warnings.txt
REF_DART_ID_PREFIX = "SNP_"
try:
darts_ref_fn = sys.argv[1]
except IndexError:
sys.stderr.write("Please specify a DART reference csv !!\n")
sys.exit(1)
try:
blast_mapping_fn = sys.argv[2]
except IndexError:
sys.stderr.write("Please specify a BLAST hit csv !!\n")
sys.exit(1)
# Reads in reference DARTS
darts = OrderedDict()
with open(darts_ref_fn) as csvfile:
darts_header = csvfile.readline()
for l in csvfile:
s = l.rstrip('\n').split(',')
dart_id = "%s%s" % (REF_DART_ID_PREFIX, s[0])
darts[dart_id] = s[1:]
# Reads in BLAST hits, discards multimappers
blasts = OrderedDict()
ignore = []
with open(blast_mapping_fn) as csvfile:
blasts_header = csvfile.readline()
for l in csvfile:
s = l.rstrip('\n').split(',')
dart_id = s[0]
if dart_id in ignore or dart_id in blasts:
ignore.append(dart_id)
del blasts[dart_id]
blasts[dart_id] = s[1:]
# Remove any DARTs from our (pre) joined table that don't have BLAST hits
joined = OrderedDict(darts)
for dart_id, row in list(joined.items()):
if dart_id not in blasts:
del joined[dart_id]
# Join BLAST hits to reference table
for dart_id, row in blasts.items():
try:
# joined[dart_id].extend(row)
# splice the BLAST hit row in between column 4 and 5 of DART row
joined[dart_id] = joined[dart_id][0:4] + row + joined[dart_id][4:]
pass
except KeyError:
sys.stderr.write('Warning: DART ID %s not found in %s\n' % (dart_id, darts_ref_fn))
pass
for dart_id, row in joined.items():
print("%s,%s" % (dart_id, ','.join(row)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment