Skip to content

Instantly share code, notes, and snippets.

@mduvall
Created April 2, 2014 04:20
Show Gist options
  • Save mduvall/9927873 to your computer and use it in GitHub Desktop.
Save mduvall/9927873 to your computer and use it in GitHub Desktop.
Get possible protein strings from reading frames in DNA
package main
import (
"fasta"
"fmt"
"strings"
)
func main() {
fastaFile := fasta.NewFastaFileWithPath("orf.txt")
candidateStrings := make(map[string]bool)
for _, dnaSeq := range fastaFile.DnaSeqs {
orfs := dnaSeq.GetOpenReadingFrames()
for _, orf := range orfs {
aminoAcidSequence := orf.GetAminoAcidSequence()
for i, amino := range aminoAcidSequence {
if amino == "M" {
sequence := strings.Join((getAminoSequenceFromIndex(aminoAcidSequence[i:])), "")
candidateStrings[sequence] = true
}
}
}
}
for candidate, _ := range candidateStrings {
if len(candidate) > 0 {
fmt.Println(candidate)
}
}
}
func getAminoSequenceFromIndex(sequence []string) []string {
for i, val := range sequence {
if val == "Stop" {
return sequence[0:i]
}
}
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment