Created
September 18, 2014 02:28
-
-
Save quwubin/fdf9a9b40f4c4fbbeb02 to your computer and use it in GitHub Desktop.
A fasta parser in go lang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"fmt" | |
"log" | |
"os" | |
"strings" | |
"bufio" | |
"io" | |
) | |
type fasta struct { | |
id string | |
desc string | |
seq string | |
} | |
func build_fasta(header string, seq bytes.Buffer) (record fasta) { | |
fields := strings.SplitN(header, " ", 2) | |
if len(fields) > 1 { | |
record.id = fields[0] | |
record.desc = fields[1] | |
}else{ | |
record.id = fields[0] | |
record.desc = "" | |
} | |
record.seq = seq.String() | |
return record | |
} | |
func parse(fastaFh io.Reader) chan fasta { | |
outputChannel := make(chan fasta) | |
scanner := bufio.NewScanner(fastaFh) | |
// scanner.Split(bufio.ScanLines) | |
header := "" | |
var seq bytes.Buffer | |
go func() { | |
// Loop over the letters in inputString | |
for scanner.Scan() { | |
line := strings.TrimSpace(scanner.Text()) | |
if len(line) == 0 { | |
continue | |
} | |
// line := scanner.Text() | |
if line[0] == '>' { | |
// If we stored a previous identifier, get the DNA string and map to the | |
// identifier and clear the string | |
if header != "" { | |
// outputChannel <- build_fasta(header, seq.String()) | |
outputChannel <- build_fasta(header, seq) | |
// fmt.Println(record.id, len(record.seq)) | |
header = "" | |
seq.Reset() | |
} | |
// Standard FASTA identifiers look like: ">id desc" | |
header = line[1:] | |
} else { | |
// Append here since multi-line DNA strings are possible | |
seq.WriteString(line) | |
} | |
} | |
outputChannel <- build_fasta(header, seq) | |
// Close the output channel, so anything that loops over it | |
// will know that it is finished. | |
close(outputChannel) | |
}() | |
return outputChannel | |
} | |
func main() { | |
fastaFh, err := os.Open(os.Args[1]) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer fastaFh.Close() | |
for record := range parse(fastaFh) { | |
fmt.Println(record.id, len(record.seq), record.seq[:100]) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment