Skip to content

Instantly share code, notes, and snippets.

@michaelenger
Created December 21, 2013 21:29
Show Gist options
  • Save michaelenger/8075359 to your computer and use it in GitHub Desktop.
Save michaelenger/8075359 to your computer and use it in GitHub Desktop.
Example of how to use regular expressions to modify FASTA headers. See: https://en.wikipedia.org/wiki/FASTA_format
#!/usr/bin python
# We're using the sys and re modules
import sys
import re
# This is the function which will do our replacing
def modify_fasta_headers(inpath, outpath):
# Open the in and out files
infile = open(inpath, "r")
outfile = open(outpath, "w")
# Create the pattern we're looking for
pattern = re.compile(r"^>[^|]+\|[^|]+\|[^|]+\|(?P<refnum>[^|]+)\|(?P<name>.+)$")
# Iterate through the lines in the infile
for line in infile.readlines():
# Check if the line matches the pattern
match = pattern.match(line)
if match:
# Extract the refnum and name from the match
refnum = match.group("refnum")
name = match.group("name")
# Convert the refnum and name to the desired formats
refnum = re.sub(r"^(.+)\..+$", r"\1", refnum)
name = re.sub(r"^.*?\[(.+?)\].*$", r"\1", name).replace(" ", "_")
# Write the modified line
outfile.write(">" + refnum + "_" + name + "\n")
else:
# Just write the line
outfile.write(line)
# Close the files
infile.close()
outfile.close()
# Check if this is the main script
if __name__ == "__main__":
# If there are no arguments (the list only has one entry: modifyFastaHeaders.py)
if len(sys.argv) == 1:
# Print out instructions
print "Usage: python modifyFastaHeaders.py infile [outfile]"
# Exit
sys.exit(1);
# Use the first argument as the infile
infile = sys.argv[1]
# If there are at least 2 arguments
if len(sys.argv) > 2:
# Use the second argument as the outfile
outfile = sys.argv[2]
else:
# Add "_converted" to the end of the infile
# For example:
# example.fasta => example_converted.fasta
outfile = re.sub(r"(\.\w+)$", r"_converted\1", infile)
# Run the convertions function
modify_fasta_headers(infile, outfile)
# Let the user know what has happened
print infile + " converted to: " + outfile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment