Skip to content

Instantly share code, notes, and snippets.

@ggonnella
Created August 29, 2018 12:55
Show Gist options
  • Save ggonnella/055862bc86cb4325b52550cab1ad6ed0 to your computer and use it in GitHub Desktop.
Save ggonnella/055862bc86cb4325b52550cab1ad6ed0 to your computer and use it in GitHub Desktop.
Add sequences from a Fasta file to a GFA file.
#!/usr/bin/env python3
"""
Add sequences from a Fasta file to a GFA file.
"""
import argparse
import sys
import gfapy
op = argparse.ArgumentParser(description=__doc__)
op.add_argument("inputgfa")
op.add_argument("inputfasta")
op.add_argument("outputgfa")
op.add_argument("-v", "--verbose", action="store_true", help="verbose output")
op.add_argument("-V", '--version', action='version', version='%(prog)s 0.1')
opts = op.parse_args()
# note: to apply to Canu 1.6 (did not check for newer versions)
# the following fix to the GFA VN tag was necessary:
# sed -i s'/VN:Z:bogart\/edges/VN:Z:1.0/' canu.contigs.gfa
g = gfapy.Gfa.from_file(opts.inputgfa)
segment = None
slines = []
with open(opts.inputfasta) as f:
for line in f:
line = line.strip()
if line.startswith(">"):
if segment:
segment.sequence = "".join(slines)
sname = line[1:].split(" ")[0]
if opts.verbose:
sys.stderr.write("Processing segment {}...\n".format(sname))
segment = g.segment(sname)
if not segment:
sys.stderr.write("ERROR: Segment with ID {}".format(sname)+
"found in Fasta but not in GFA file\n")
exit(1)
slines = []
else:
slines.append(line)
if segment:
segment.sequence = "".join(slines)
g.to_file(opts.outputgfa)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment