Skip to content

Instantly share code, notes, and snippets.

@knmkr
Forked from nimezhu/README.md
Created September 28, 2020 08:06
Show Gist options
  • Save knmkr/96eadfbd17435d154330e37225201ef0 to your computer and use it in GitHub Desktop.
Save knmkr/96eadfbd17435d154330e37225201ef0 to your computer and use it in GitHub Desktop.
A simple example using tabix C library in GO language.

A simple example to link tabix C library into GO language.

  1. download tabix C source code
  2. make dynamic libtabix.so.1 [ or make to get libtabix.a]
  3. cp libtabix.so.1 to your LD_LIBRARY_PATH [ or use the static version libtabix.a ]
  4. mv query_tabix_example.go query_tabix.go query_tabix_example_static.go into tabix directory which contains example.gtf.gz and example.gtf.gz.tbi and libtabix.a
  5. go run query_tabix_example.go [ or go run query_tabix_example_static.go ]
  6. go run query_tabix.go [file.bed] [file.gz]
  7. query_tabix_bed6.go are designed for query motifs and coordinates translate to query region.

References

package main
/*
#cgo CFLAGS: -I ./
#cgo LDFLAGS: libtabix.a -lm -lz
#include <stdlib.h>
#include <stdio.h>
#include <tabix.h>
*/
import "C"
import (
"encoding/csv"
"fmt"
"github.com/codegangsta/cli"
"io"
"os"
"strconv"
)
func main() {
if len(os.Args) == 1 {
println("query_tabix help")
os.Exit(0)
}
app := cli.NewApp()
app.Name = "query_tabix"
app.Usage = "query_tabix input.bed data.gz"
app.Action = func(c *cli.Context) {
f := C.ti_open(C.CString(os.Args[2]), C.CString(os.Args[2]+".tbi"))
line := make(chan []string)
defer close(line)
go read(os.Args[1], f, line)
process(f, line)
}
app.Version = "0.0.2"
app.Run(os.Args)
}
func query(f *C.tabix_t, chr *C.char, start int64, end int64, parse func(i string)) int {
iter := C.ti_query(f, chr, C.int(start), C.int(end))
len := C.int(0)
s := C.CString("")
for {
s = C.ti_read(f, iter, &len)
if s == nil {
break
}
parse(C.GoString(s))
}
return 0
}
func read(fn string, f *C.tabix_t, line chan []string) {
file, err := os.Open(fn)
if err != nil {
fmt.Println("Error:", err)
return
}
defer file.Close()
reader := csv.NewReader(file)
reader.Comma = '\t'
for {
record, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
fmt.Println("Error:", err)
return
}
line <- record
}
line <- nil
}
func process(f *C.tabix_t, line chan []string) {
parse := func(a string) {
println("HT\t" + a)
}
for {
record := <-line
if record == nil {
break
}
start, _ := strconv.ParseInt(record[1], 10, 64)
end, _ := strconv.ParseInt(record[2], 10, 64)
println("QR\t" + format(record))
query(f, C.CString(record[0]), start, end, parse)
}
}
func format(s []string) string {
output := s[0]
for i := range s[1:] {
output += "\t" + s[i+1]
}
return output
}
package main
/*
#cgo CFLAGS: -I ./
#cgo LDFLAGS: libtabix.a -lm -lz
#include <stdlib.h>
#include <stdio.h>
#include <tabix.h>
char *getStringAt(char **strlist, int row) {
return strlist[row];
};
*/
import "C"
import (
"encoding/csv"
"fmt"
"github.com/codegangsta/cli"
"io"
"os"
"strconv"
"strings"
)
/*
* query tabix using channel feature
* and GoString
*/
func reverseStrand(strand string) string {
switch strand {
case "+":
return "-"
case "-":
return "+"
}
return "."
}
func checkchr_factory(fn string) func(string) bool {
idx := C.ti_index_load(C.CString(fn))
seqnum := C.int(0)
seqnames := C.ti_seqname(idx, &seqnum)
m := make(map[string]int)
for i := 0; i < int(seqnum); i++ {
m[C.GoString(C.getStringAt(seqnames, C.int(i)))] = i
}
return func(chr string) bool {
if _, ok := m[chr]; ok {
return true
} else {
return false
}
}
}
func main() {
if len(os.Args) == 1 {
println("query_tabix help")
os.Exit(0)
}
app := cli.NewApp()
app.Name = "query_tabix_bed6"
app.Usage = "query_tabix_bed6 input.bed data.bed6.gz"
app.Action = func(c *cli.Context) {
f := C.ti_open(C.CString(os.Args[2]), C.CString(os.Args[2]+".tbi"))
defer C.ti_close(f)
line := make(chan []string)
defer close(line)
checkchr := checkchr_factory(os.Args[2])
go read(os.Args[1], f, line)
process(f, line, checkchr)
}
app.Version = "0.0.3"
app.Run(os.Args)
}
func query(f *C.tabix_t, chr string, start int64, end int64, strand string, parse func(i string) string) int {
iter := C.ti_query(f, C.CString(chr), C.int(start), C.int(end))
defer C.ti_iter_destroy(iter)
len := C.int(0)
s := C.CString("")
r := []string{}
l := 0
for {
s = C.ti_read(f, iter, &len)
if s == nil {
break
}
r = append(r, parse(C.GoString(s)))
l++
}
if strand == "-" {
for i := l - 1; i >= 0; i-- {
fmt.Printf(r[i])
}
} else {
for _, v := range r {
fmt.Printf(v)
}
}
return 0
}
func read(fn string, f *C.tabix_t, line chan []string) {
file, err := os.Open(fn)
if err != nil {
fmt.Println("Error:", err)
return
}
defer file.Close()
reader := csv.NewReader(file)
reader.Comma = '\t'
for {
record, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
fmt.Println("Error:", err)
return
}
line <- record
}
line <- nil
}
func process(f *C.tabix_t, line chan []string, checkchr func(chr string) bool) {
start, end := int64(0), int64(0)
strand := "."
name := "noname"
parse := func(a string) string {
r := strings.Split(a, "\t")
rstrand := r[5]
rstart, _ := strconv.ParseInt(r[1], 10, 64)
rend, _ := strconv.ParseInt(r[2], 10, 64)
if strand == "+" || strand == "." {
return fmt.Sprintf("HT\t%s\t%d\t%d\t%s\t%s\t%s\n", name, rstart-start, rend-start, r[3], r[4], rstrand)
} else {
return fmt.Sprintf("HT\t%s\t%d\t%d\t%s\t%s\t%s\n", name, end-rend, end-rstart, r[3], r[4], reverseStrand(rstrand))
}
}
for {
record := <-line
if record == nil {
break
}
start, _ = strconv.ParseInt(record[1], 10, 64)
end, _ = strconv.ParseInt(record[2], 10, 64)
strand = record[5]
name = record[3]
fmt.Printf("QR\t%s\n", format(record))
if checkchr(record[0]) {
query(f, record[0], start, end, strand, parse)
} else {
fmt.Print("NOCHR_IN_TABIX_FILE\n")
}
}
}
func format(s []string) string {
output := s[0]
for i := range s[1:] {
output += "\t" + s[i+1]
}
return output
}
package main
/*
#cgo CFLAGS: -I ./
#cgo LDFLAGS: -L . -ltabix
#include <stdlib.h>
#include <stdio.h>
#include <bgzf.h>
#include <tabix.h>
void prints(char* str){
printf("%s\n", str);
}
*/
import "C"
import "unsafe"
func Prints(str string) {
cstr := C.CString(str)
C.prints(cstr)
defer C.free(unsafe.Pointer(cstr))
}
func main() {
f := C.ti_open(C.CString("example.gtf.gz"),C.CString("example.gtf.gz.tbi"))
iter := C.ti_queryi(f,0,60000,80000)
len := C.int(0)
s := C.CString("")
for 1==1 {
s=C.ti_read(f,iter,&len)
if s == nil {break;}
C.prints(s)
}
}
package main
/*
#cgo CFLAGS: -I ./
#cgo LDFLAGS: libtabix.a -lm -lz
#include <stdlib.h>
#include <stdio.h>
#include <bgzf.h>
#include <tabix.h>
void prints(char* str){
printf("%s\n", str);
}
*/
import "C"
import "unsafe"
func Prints(str string) {
cstr := C.CString(str)
C.prints(cstr)
defer C.free(unsafe.Pointer(cstr))
}
func main() {
f := C.ti_open(C.CString("example.gtf.gz"),C.CString("example.gtf.gz.tbi"))
iter := C.ti_queryi(f,0,60000,80000)
len := C.int(0)
s := C.CString("")
for 1==1 {
s=C.ti_read(f,iter,&len)
if s == nil {break;}
C.prints(s)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment