alexholehouse/analyse_sequence_data.py

## analyse_sequence_data.py
from localcider.sequenceParameters import SequenceParameters
from pyfasta import Fasta

#
# This assumes you have previously installed localCIDER and pyfasta (both
# are available via pip)
#

# read in the FASTA file using pyfasta
F = Fasta('swissprot_human_proteome.fasta')

# get all the header lines associated with each sequence
# (a header line is the line that starts with a ">" and
# generally contains identifying information about the
# protein). This is going to be the dictionary key we're going
# use
all_fasta_keys = F.keys()

# for each header we use this to extract the full amino acid
# sequence
header_to_sequence = {}
for k in all_fasta_keys:

    # this assigns the value in the dictionary to
    # the sequence
    header_to_sequence[k] = str(F[k][:])


# now header_to_sequence is a dictionary where each key-value pair
# is a FASTA file header line and the associated amino acid sequence
	from localcider.sequenceParameters import SequenceParameters
	from pyfasta import Fasta

	#
	# This assumes you have previously installed localCIDER and pyfasta (both
	# are available via pip)
	#

	# read in the FASTA file using pyfasta
	F = Fasta('swissprot_human_proteome.fasta')

	# get all the header lines associated with each sequence
	# (a header line is the line that starts with a ">" and
	# generally contains identifying information about the
	# protein). This is going to be the dictionary key we're going
	# use
	all_fasta_keys = F.keys()

	# for each header we use this to extract the full amino acid
	# sequence
	header_to_sequence = {}
	for k in all_fasta_keys:

	# this assigns the value in the dictionary to
	# the sequence
	header_to_sequence[k] = str(F[k][:])


	# now header_to_sequence is a dictionary where each key-value pair
	# is a FASTA file header line and the associated amino acid sequence