Skip to content

Instantly share code, notes, and snippets.

@unixpickle
Created December 9, 2017 05:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save unixpickle/7304c78032c9f433e28a87409f4d5aca to your computer and use it in GitHub Desktop.
Save unixpickle/7304c78032c9f433e28a87409f4d5aca to your computer and use it in GitHub Desktop.
Index imagenet tar
package main
import (
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"strconv"
"strings"
)
func main() {
resp, err := http.Get("http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_train.tar")
if err != nil {
log.Println(err)
return
}
err = ReadTar(resp.Body, func(wnid string, folderTar io.Reader, offset int64) {
if !strings.HasSuffix(wnid, ".tar") {
return
}
wnid = wnid[:len(wnid)-4]
log.Println("Processing wnid:", wnid)
err := ReadTar(folderTar, func(imageName string, img io.Reader, subOffset int64) {
imgData, err := ioutil.ReadAll(img)
if err != nil {
log.Println(err)
}
fmt.Printf("%s/%s,%d-%d\n", wnid, imageName, subOffset+offset,
subOffset+offset+int64(len(imgData)))
})
if err != nil {
log.Println("Sub-error:", err)
}
})
log.Println("Final error:", err)
}
func ReadTar(r io.Reader, cb func(name string, data io.Reader, offset int64)) error {
var offset int64
for {
name, size, err := ReadTarHeader(r)
if err != nil {
return err
}
if name == "" {
return nil
}
limited := io.LimitReader(r, size)
cb(name, limited, offset+512)
if _, err := io.Copy(ioutil.Discard, limited); err != nil {
return err
}
offset += 512 + size
if size%512 != 0 {
extra := 512 - (size % 512)
if _, err := io.Copy(ioutil.Discard, io.LimitReader(r, extra)); err != nil {
return err
}
offset += extra
}
}
}
func ReadTarHeader(r io.Reader) (name string, size int64, err error) {
buf := make([]byte, 512)
if _, err := io.ReadFull(r, buf); err != nil {
return "", 0, err
}
name = NullTermStr(buf[:100])
size, err = strconv.ParseInt(NullTermStr(buf[124:136]), 8, 64)
if err != nil {
return "", 0, err
}
return name, size, nil
}
func NullTermStr(data []byte) string {
for i, b := range data {
if b == 0 {
return string(data[:i])
}
}
return string(data)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment