Skip to content

Instantly share code, notes, and snippets.

@ipurusho
Created May 7, 2015 00:58
Show Gist options
  • Save ipurusho/33fd95a331d3e6ea47a9 to your computer and use it in GitHub Desktop.
Save ipurusho/33fd95a331d3e6ea47a9 to your computer and use it in GitHub Desktop.
Serial version of GC_calc.py for speedup testing purposes
def FASTA(filename):
try:
f = file(filename)
except IOError:
print "The file, %s, does not exist" % filename
return
sequences = {}
for line in f:
if line.startswith('>'):
name = line[1:].rstrip('\n')
name = name.replace('_', ' ')
sequences[name] = ''
else:
sequences[name] += line.rstrip('\n').rstrip('*')
return sequences
def getGC(sequence):
seqLen = len(sequence)
G = sequence.count('G')
C = sequence.count('C')
GC_counts = G + C
GC_content = float(GC_counts)/float(seqLen)*100
return GC_content
test = FASTA(sys.argv[1])
for key,value in test.iteritems():
print (key,getGC(value) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment