Skip to content

Instantly share code, notes, and snippets.

@cpl
Created December 21, 2019 19:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cpl/8d1d53e81657ee655f27717c858ef576 to your computer and use it in GitHub Desktop.
Save cpl/8d1d53e81657ee655f27717c858ef576 to your computer and use it in GitHub Desktop.
Parsing 23andMe genome data
package main
import (
"bufio"
"bytes"
"fmt"
"io"
"log"
"os"
"strconv"
)
// SNP or Single-nucleotide polymorphism
type SNP struct {
RSID string
Chromosome string
Position int
Genotype string
}
type Genome []SNP
func checkErr(err error) {
if err != nil {
panic(err)
}
}
func tsvLineToSNP(line []byte) (snp SNP, err error) {
data := bytes.Split(line, []byte("\t"))
if l := len(data); l != 4 {
return SNP{}, fmt.Errorf("invalid len for SNP TSV, %d", l)
}
snp.Position, err = strconv.Atoi(string(data[2]))
if err != nil {
return SNP{}, fmt.Errorf("invalid position for SNP, %w", err)
}
snp.RSID = string(data[0])
snp.Chromosome = string(data[1])
snp.Genotype = string(data[3])
return snp, nil
}
func readGenome(reader io.Reader) (genome Genome, err error) {
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
snp, err := tsvLineToSNP(scanner.Bytes())
if err != nil {
return nil, fmt.Errorf("failed parsing tsv line, %w", err)
}
genome = append(genome, snp)
}
return genome, scanner.Err()
}
func main() {
fp, err := os.Open("genome.txt")
checkErr(err)
defer fp.Close()
genome, err := readGenome(fp)
if err != nil {
checkErr(err)
}
fmt.Println(len(genome))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment