Created
December 9, 2017 05:40
-
-
Save unixpickle/7304c78032c9f433e28a87409f4d5aca to your computer and use it in GitHub Desktop.
Index imagenet tar
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"io" | |
"io/ioutil" | |
"log" | |
"net/http" | |
"strconv" | |
"strings" | |
) | |
func main() { | |
resp, err := http.Get("http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_train.tar") | |
if err != nil { | |
log.Println(err) | |
return | |
} | |
err = ReadTar(resp.Body, func(wnid string, folderTar io.Reader, offset int64) { | |
if !strings.HasSuffix(wnid, ".tar") { | |
return | |
} | |
wnid = wnid[:len(wnid)-4] | |
log.Println("Processing wnid:", wnid) | |
err := ReadTar(folderTar, func(imageName string, img io.Reader, subOffset int64) { | |
imgData, err := ioutil.ReadAll(img) | |
if err != nil { | |
log.Println(err) | |
} | |
fmt.Printf("%s/%s,%d-%d\n", wnid, imageName, subOffset+offset, | |
subOffset+offset+int64(len(imgData))) | |
}) | |
if err != nil { | |
log.Println("Sub-error:", err) | |
} | |
}) | |
log.Println("Final error:", err) | |
} | |
func ReadTar(r io.Reader, cb func(name string, data io.Reader, offset int64)) error { | |
var offset int64 | |
for { | |
name, size, err := ReadTarHeader(r) | |
if err != nil { | |
return err | |
} | |
if name == "" { | |
return nil | |
} | |
limited := io.LimitReader(r, size) | |
cb(name, limited, offset+512) | |
if _, err := io.Copy(ioutil.Discard, limited); err != nil { | |
return err | |
} | |
offset += 512 + size | |
if size%512 != 0 { | |
extra := 512 - (size % 512) | |
if _, err := io.Copy(ioutil.Discard, io.LimitReader(r, extra)); err != nil { | |
return err | |
} | |
offset += extra | |
} | |
} | |
} | |
func ReadTarHeader(r io.Reader) (name string, size int64, err error) { | |
buf := make([]byte, 512) | |
if _, err := io.ReadFull(r, buf); err != nil { | |
return "", 0, err | |
} | |
name = NullTermStr(buf[:100]) | |
size, err = strconv.ParseInt(NullTermStr(buf[124:136]), 8, 64) | |
if err != nil { | |
return "", 0, err | |
} | |
return name, size, nil | |
} | |
func NullTermStr(data []byte) string { | |
for i, b := range data { | |
if b == 0 { | |
return string(data[:i]) | |
} | |
} | |
return string(data) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment