Skip to content

Instantly share code, notes, and snippets.

@RobColeman
Last active October 27, 2015 18:39
Show Gist options
  • Save RobColeman/823362a58ea3f635d6a7 to your computer and use it in GitHub Desktop.
Save RobColeman/823362a58ea3f635d6a7 to your computer and use it in GitHub Desktop.
Counting nucleotides from a sequence
def count_nucleotides_by_type(sequence):
# use a dictionary to keep track of the counts of each nucleotide type in the sequence
counts = {
"A": 0,
"C": 0,
"G": 0,
"T": 0,
"length": 0
}
for nucleotide in sequence:
# we iterate through the sequence once
counts[nucleotide] += 1
# for each type, we incriment that count by one
counts["length"] += 1
# then we incriment the total count by one too
# there are several ways we could get the 'counts["length"]'
# here we're counting as we go
# we could also just add up the other counts 'counts["length"] = counts["A"] + counts["C"] + counts["G"] + counts["T"]
# either way is fine, the second way is technically more efficient
return counts
def normalize_nucleotide_count(counts):
# we compute the percentages by dividing the individual nucleotide counts by the total count
percentages = {
"A": counts["A"] / float(counts["length"]),
"C": counts["C"] / float(counts["length"]),
"G": counts["G"] / float(counts["length"]),
"T": counts["T"] / float(counts["length"])
}
return percentages
sequence = "GATGTTCAGCTGAAGTGGAGTAAAGAGTTGCAGCA"
# count the nucleotides
counts = count_nucleotides_by_type(sequence)
# calculate percentages using the counts
percentages = normalize_nucleotide_count(counts)
# to get the percent of both G and C, we add up their percentages
G_and_C_percent = percentages["G"] + percentages["C"]
print "G and C make up %f percent of the sequence" % G_and_C_percent
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment