Create a gist now

Instantly share code, notes, and snippets.

Gerdus Van Zyl's PyPy optimized version, with optimized number of lines. See http://saml.rilspace.org/node/248
def main():
file = open("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa","rb")
gcCount = 0
atCount = 0
while 1:
lines = file.readlines(1000)
if not lines:
break
for line in lines:
if line and not line[0] == ">":
atCount += line.count("A") + line.count("T")
gcCount += line.count('G') + line.count("C")
totalBaseCount = gcCount + atCount
gcFraction = float(gcCount) / float(totalBaseCount)
print gcFraction * 100
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment