|
package main |
|
|
|
/* |
|
#cgo CFLAGS: -I ./ |
|
#cgo LDFLAGS: libtabix.a -lm -lz |
|
#include <stdlib.h> |
|
#include <stdio.h> |
|
#include <tabix.h> |
|
char *getStringAt(char **strlist, int row) { |
|
return strlist[row]; |
|
}; |
|
|
|
*/ |
|
import "C" |
|
import ( |
|
"encoding/csv" |
|
"fmt" |
|
"github.com/codegangsta/cli" |
|
"io" |
|
"os" |
|
"strconv" |
|
"strings" |
|
) |
|
|
|
/* |
|
* query tabix using channel feature |
|
* and GoString |
|
*/ |
|
func reverseStrand(strand string) string { |
|
switch strand { |
|
case "+": |
|
return "-" |
|
case "-": |
|
return "+" |
|
} |
|
return "." |
|
} |
|
func checkchr_factory(fn string) func(string) bool { |
|
idx := C.ti_index_load(C.CString(fn)) |
|
seqnum := C.int(0) |
|
seqnames := C.ti_seqname(idx, &seqnum) |
|
m := make(map[string]int) |
|
for i := 0; i < int(seqnum); i++ { |
|
m[C.GoString(C.getStringAt(seqnames, C.int(i)))] = i |
|
} |
|
return func(chr string) bool { |
|
if _, ok := m[chr]; ok { |
|
return true |
|
} else { |
|
return false |
|
} |
|
} |
|
|
|
} |
|
func main() { |
|
if len(os.Args) == 1 { |
|
println("query_tabix help") |
|
os.Exit(0) |
|
} |
|
app := cli.NewApp() |
|
app.Name = "query_tabix_bed6" |
|
app.Usage = "query_tabix_bed6 input.bed data.bed6.gz" |
|
app.Action = func(c *cli.Context) { |
|
f := C.ti_open(C.CString(os.Args[2]), C.CString(os.Args[2]+".tbi")) |
|
defer C.ti_close(f) |
|
line := make(chan []string) |
|
defer close(line) |
|
checkchr := checkchr_factory(os.Args[2]) |
|
go read(os.Args[1], f, line) |
|
process(f, line, checkchr) |
|
|
|
} |
|
app.Version = "0.0.3" |
|
|
|
app.Run(os.Args) |
|
|
|
} |
|
func query(f *C.tabix_t, chr string, start int64, end int64, strand string, parse func(i string) string) int { |
|
iter := C.ti_query(f, C.CString(chr), C.int(start), C.int(end)) |
|
defer C.ti_iter_destroy(iter) |
|
len := C.int(0) |
|
s := C.CString("") |
|
r := []string{} |
|
l := 0 |
|
for { |
|
s = C.ti_read(f, iter, &len) |
|
if s == nil { |
|
break |
|
} |
|
r = append(r, parse(C.GoString(s))) |
|
l++ |
|
} |
|
if strand == "-" { |
|
for i := l - 1; i >= 0; i-- { |
|
fmt.Printf(r[i]) |
|
} |
|
} else { |
|
for _, v := range r { |
|
fmt.Printf(v) |
|
} |
|
} |
|
return 0 |
|
} |
|
func read(fn string, f *C.tabix_t, line chan []string) { |
|
file, err := os.Open(fn) |
|
if err != nil { |
|
fmt.Println("Error:", err) |
|
return |
|
} |
|
defer file.Close() |
|
reader := csv.NewReader(file) |
|
reader.Comma = '\t' |
|
for { |
|
record, err := reader.Read() |
|
if err == io.EOF { |
|
break |
|
} else if err != nil { |
|
fmt.Println("Error:", err) |
|
return |
|
} |
|
line <- record |
|
} |
|
line <- nil |
|
} |
|
func process(f *C.tabix_t, line chan []string, checkchr func(chr string) bool) { |
|
start, end := int64(0), int64(0) |
|
strand := "." |
|
name := "noname" |
|
parse := func(a string) string { |
|
r := strings.Split(a, "\t") |
|
rstrand := r[5] |
|
rstart, _ := strconv.ParseInt(r[1], 10, 64) |
|
rend, _ := strconv.ParseInt(r[2], 10, 64) |
|
if strand == "+" || strand == "." { |
|
return fmt.Sprintf("HT\t%s\t%d\t%d\t%s\t%s\t%s\n", name, rstart-start, rend-start, r[3], r[4], rstrand) |
|
|
|
} else { |
|
return fmt.Sprintf("HT\t%s\t%d\t%d\t%s\t%s\t%s\n", name, end-rend, end-rstart, r[3], r[4], reverseStrand(rstrand)) |
|
} |
|
} |
|
|
|
for { |
|
record := <-line |
|
if record == nil { |
|
break |
|
} |
|
start, _ = strconv.ParseInt(record[1], 10, 64) |
|
end, _ = strconv.ParseInt(record[2], 10, 64) |
|
strand = record[5] |
|
name = record[3] |
|
fmt.Printf("QR\t%s\n", format(record)) |
|
if checkchr(record[0]) { |
|
query(f, record[0], start, end, strand, parse) |
|
} else { |
|
fmt.Print("NOCHR_IN_TABIX_FILE\n") |
|
} |
|
} |
|
} |
|
func format(s []string) string { |
|
output := s[0] |
|
for i := range s[1:] { |
|
output += "\t" + s[i+1] |
|
} |
|
return output |
|
} |