Last active
February 2, 2020 11:01
-
-
Save kawasin73/fa2daacfd138e72f002c72839f300571 to your computer and use it in GitHub Desktop.
SECCON 2019 tanuki
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"compress/gzip" | |
"crypto/sha1" | |
"encoding/base64" | |
"hash" | |
"hash/crc32" | |
"io" | |
"log" | |
"net/http" | |
_ "net/http/pprof" | |
"os" | |
"unicode/utf8" | |
) | |
func main() { | |
go func() { | |
log.Println(http.ListenAndServe("localhost:6060", nil)) | |
}() | |
filename := "tanuki.txt.gz.gz.gz.gz.gz.gz.gz.gz.gz.gz.gz.gz" | |
f, err := os.Open(filename) | |
if err != nil { | |
log.Panic(err) | |
} | |
defer f.Close() | |
r, err := gzip.NewReader(f) | |
if err != nil { | |
log.Panic(err) | |
} | |
//m := newPreviousMap(10) | |
buffer := newChanneledBuffer() | |
original := buffer | |
for i := 0; i < 11; i++ { | |
//pow := 3 | |
//for j := 0; j < 11-i; j++ { | |
// pow *= 3 | |
//} | |
next := newChanneledBuffer() | |
size := 3 | |
//if i < 8 { | |
// size = 1 | |
//} | |
go Run(buffer, next, i, size) | |
//if i < 11 { | |
// go Run(buffer, next, i) | |
//} else { | |
// go RunParallel(buffer, next, i, 4) | |
//} | |
buffer = next | |
} | |
go func() { | |
buf := make([]byte, 1024*1024) | |
for { | |
n, err := r.Read(buf) | |
if n == 0 && err == io.EOF { | |
log.Println("first level finish") | |
original.Close() | |
break | |
} else if err != nil && err != io.EOF { | |
log.Panic(err) | |
} | |
log.Println("read unique") | |
if _, err = original.Write(buf[:n]); err != nil { | |
log.Panic(err) | |
} | |
buf = make([]byte, 1024*1024) | |
//if m.Count(buf[:n]) { | |
// | |
//} | |
} | |
}() | |
var ( | |
rest []byte | |
result string | |
) | |
for { | |
buf, err := buffer.GetAll() | |
if err == io.EOF { | |
log.Println("finish :", result) | |
break | |
} | |
log.Println("get", len(buf)) | |
h := crc32.NewIEEE() | |
h.Write(buf) | |
log.Println("chksum :", h.Sum32()) | |
var head int | |
if len(rest) > 0 { | |
size := 10 | |
if len(buf) < 10 { | |
size = len(buf) | |
} | |
restSize := len(rest) | |
rest = append(rest, buf[:size]...) | |
if isTa(rest) { | |
head = 3 - restSize | |
rest = nil | |
} else { | |
r, n := utf8.DecodeRune(rest) | |
if r == utf8.RuneError { | |
continue | |
} | |
str := string(rest[:n]) | |
log.Println("new rune :", r, str) | |
result += str | |
log.Println("current :", result) | |
head = n - restSize | |
rest = nil | |
} | |
} | |
for { | |
if isTa(buf[head:]) { | |
head += 3 | |
} else { | |
r, n := utf8.DecodeRune(buf[head:]) | |
if r == utf8.RuneError { | |
rest = buf[head:] | |
log.Println("rest :", len(rest)) | |
break | |
} | |
str := string(buf[head : head+n]) | |
log.Println("new rune :", r, str) | |
result += str | |
log.Println("current :", result) | |
head += n | |
} | |
} | |
} | |
} | |
func isTa(buf []byte) bool { | |
return len(buf) >= 3 && buf[0] == 227 && buf[1] == 129 && buf[2] == 159 | |
} | |
const ( | |
bufSize = 50 * 1024 * 1024 | |
maxGzipSize = 40000 * 10 | |
) | |
//func RunParallel(r *ChanneledBuffer, w io.WriteCloser, level, num int) { | |
// for { | |
// var outputs []*ChanneledBuffer | |
// for i := 0; i < num; i++ { | |
// buf, err := r.GetAll() | |
// if err == io.EOF { | |
// break | |
// } else if err != nil { | |
// log.Panic("level", level, err) | |
// } | |
// | |
// in := newChanneledBuffer() | |
// out := newChanneledBuffer() | |
// outputs = append(outputs, out) | |
// | |
// go Run(in, out, level, 0) | |
// _, err = in.Write(buf) | |
// if err != nil { | |
// log.Panic("level", level, err) | |
// } | |
// in.Close() | |
// } | |
// | |
// for _, out := range outputs { | |
// for { | |
// buf, err := out.GetAll() | |
// if err == io.EOF { | |
// break | |
// } else if err != nil { | |
// log.Panic("level", level, err) | |
// } | |
// _, err = w.Write(buf) | |
// if err != nil { | |
// log.Panic("level", level, err) | |
// } | |
// } | |
// } | |
// if len(outputs) != num { | |
// w.Close() | |
// break | |
// } | |
// } | |
//} | |
func Run(r io.Reader, w io.WriteCloser, level int, group int) { | |
// 1 MB | |
buf := make([]byte, bufSize) | |
prev := make([]uint32, group) | |
current := make([]uint32, group) | |
//prev := make([]string, group) | |
//current := make([]string, group) | |
//m := newPreviousMap(10) | |
g, err := gzip.NewReader(r) | |
if err != nil { | |
log.Panic("level", level, "create gzip : ", err) | |
} | |
defer g.Close() | |
var ( | |
idx int | |
size int | |
) | |
var count int | |
for { | |
n, err := g.Read(buf[size:]) | |
if n == 0 { | |
if err != io.EOF { | |
log.Panic("level", level, "n == 0 but ", err) | |
} | |
if size > 0 { | |
if _, err = w.Write(buf[:size]); err != nil { | |
log.Panic("level", level, "write to writer : ", err) | |
} | |
} | |
if err = w.Close(); err != nil { | |
log.Panic("level", level, "close writer : ", err) | |
} | |
log.Println("level", level, "finish loop") | |
break | |
} | |
if n+size == len(buf) { | |
log.Panic("level", level, "buf size is small") | |
} | |
if err != nil && err != io.EOF { | |
log.Panic("level", level, n, size, err, count) | |
} | |
if len(buf) <= size+n { | |
log.Panic(len(buf), size+n, size, n) | |
} | |
current[idx] = getHash32(buf[size : size+n]) | |
//current[idx] = getHashString(buf[size : size+n]) | |
size += n | |
idx++ | |
if idx == group { | |
if isSameGroup(prev, current) { | |
// skip next layer | |
if group == 1 { | |
if _, err = w.Write(buf[:size]); err != nil { | |
log.Panic("level", level, "write to writer : ", err) | |
} | |
} | |
idx = 0 | |
size = 0 | |
} else { | |
count++ | |
if _, err = w.Write(buf[:size]); err != nil { | |
log.Panic("level", level, "write to writer : ", err) | |
} | |
if level == 2 { | |
log.Println("count", count) | |
} | |
buf = make([]byte, bufSize) | |
prev, current = current, prev | |
idx = 0 | |
size = 0 | |
} | |
} | |
} | |
} | |
func getHash32(buf []byte) uint32 { | |
h := crc32.NewIEEE() | |
h.Write(buf) | |
return h.Sum32() | |
} | |
func getHashString(buf []byte) string { | |
h := sha1.New() | |
h.Write(buf) | |
return base64.StdEncoding.EncodeToString(h.Sum(nil)) | |
} | |
func isSameGroup(prev, current []uint32) bool { | |
for i, p := range prev { | |
if current[i] != p { | |
return false | |
} | |
} | |
return true | |
} | |
func isSameGroupString(prev, current []string) bool { | |
for i, p := range prev { | |
if current[i] != p { | |
return false | |
} | |
} | |
return true | |
} | |
type ChanneledBuffer struct { | |
current []byte | |
ch chan []byte | |
} | |
func newChanneledBuffer() *ChanneledBuffer { | |
return &ChanneledBuffer{ | |
ch: make(chan []byte, 1), | |
} | |
} | |
func (buffer *ChanneledBuffer) Write(buf []byte) (int, error) { | |
if len(buf) == 0 { | |
log.Panic("write zero buffer") | |
return 0, nil | |
} | |
buffer.ch <- buf | |
return len(buf), nil | |
} | |
func (buffer *ChanneledBuffer) Read(buf []byte) (int, error) { | |
if len(buffer.current) == 0 { | |
p, ok := <-buffer.ch | |
if !ok { | |
return 0, io.EOF | |
} | |
buffer.current = p | |
} | |
n := copy(buf, buffer.current) | |
buffer.current = buffer.current[n:] | |
return n, nil | |
} | |
func (buffer *ChanneledBuffer) GetAll() ([]byte, error) { | |
if len(buffer.current) == 0 { | |
p, ok := <-buffer.ch | |
if !ok { | |
return nil, io.EOF | |
} | |
buffer.current = p | |
} | |
buf := buffer.current | |
buffer.current = nil | |
return buf, nil | |
} | |
func (buffer *ChanneledBuffer) Close() error { | |
close(buffer.ch) | |
return nil | |
} | |
type UniqueMap struct { | |
hashmap map[string]int | |
h hash.Hash | |
} | |
func newUniqueMap() *UniqueMap { | |
return &UniqueMap{ | |
hashmap: make(map[string]int), | |
h: crc32.NewIEEE(), | |
} | |
} | |
func (u *UniqueMap) Count(buf []byte) bool { | |
// calc checksum hash of one message | |
_, err := u.h.Write(buf) | |
if err != nil { | |
log.Panic(err) | |
} | |
sum := u.h.Sum(nil) | |
strsum := base64.StdEncoding.EncodeToString(sum) | |
u.h.Reset() | |
l, ok := u.hashmap[strsum] | |
if !ok { | |
u.hashmap[strsum] = 1 | |
return true | |
} else { | |
// duplicate hash | |
u.hashmap[strsum] = l + 1 | |
return l < 10 | |
} | |
} | |
type PreviousMap struct { | |
prev []string | |
max int | |
h hash.Hash | |
} | |
func newPreviousMap(max int) *PreviousMap { | |
return &PreviousMap{ | |
h: crc32.NewIEEE(), | |
max: max, | |
} | |
} | |
func (u *PreviousMap) Count(buf []byte) bool { | |
// calc checksum hash of one message | |
_, err := u.h.Write(buf) | |
if err != nil { | |
log.Panic(err) | |
} | |
sum := u.h.Sum(nil) | |
strsum := base64.StdEncoding.EncodeToString(sum) | |
u.h.Reset() | |
//strsum := base64.StdEncoding.EncodeToString(buf) | |
if len(u.prev) == u.max { | |
u.prev = append(u.prev[1:], strsum) | |
} else { | |
u.prev = append(u.prev, strsum) | |
} | |
return !havePeriod(u.prev) | |
} | |
func havePeriod(base []string) bool { | |
if len(base) < 2 { | |
return false | |
} | |
n := len(base) / 2 | |
loop: | |
for i := 1; i <= n; i ++ { | |
for j := 0; j < i; j++ { | |
if base[len(base)-i-j-1] != base[len(base)-j-1] { | |
continue loop | |
} | |
} | |
return true | |
} | |
return false | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ go run main.go | |
2019/10/21 14:44:31 read unique | |
2019/10/21 14:44:31 first level finish | |
2019/10/21 14:44:31 count 1 | |
2019/10/21 14:44:31 count 2 | |
2019/10/21 14:44:31 get 98304 | |
2019/10/21 14:44:31 chksum : 27181800 | |
2019/10/21 14:44:31 rest : 0 | |
2019/10/21 14:44:31 get 98304 | |
2019/10/21 14:44:31 chksum : 3146956325 | |
2019/10/21 14:44:31 new rune : 83 S | |
2019/10/21 14:44:31 current : S | |
2019/10/21 14:44:31 rest : 2 | |
2019/10/21 14:44:31 get 98304 | |
2019/10/21 14:44:31 chksum : 844742614 | |
2019/10/21 14:44:31 rest : 2 | |
2019/10/21 14:44:31 get 98304 | |
2019/10/21 14:44:31 chksum : 2416082288 | |
2019/10/21 14:44:31 new rune : 69 E | |
2019/10/21 14:44:31 current : SE | |
2019/10/21 14:44:31 rest : 1 | |
2019/10/21 14:44:31 get 98304 | |
2019/10/21 14:44:31 chksum : 966525959 | |
2019/10/21 14:44:31 rest : 1 | |
2019/10/21 14:44:31 get 98304 | |
2019/10/21 14:44:31 chksum : 579621287 | |
2019/10/21 14:44:31 new rune : 67 C | |
2019/10/21 14:44:31 current : SEC | |
2019/10/21 14:44:31 rest : 0 | |
2019/10/21 14:44:31 get 98304 | |
2019/10/21 14:44:31 chksum : 27181800 | |
2019/10/21 14:44:31 rest : 0 | |
2019/10/21 14:44:32 get 98304 | |
2019/10/21 14:44:32 chksum : 3841342476 | |
2019/10/21 14:44:32 new rune : 67 C | |
2019/10/21 14:44:32 current : SECC | |
2019/10/21 14:44:32 rest : 2 | |
2019/10/21 14:44:32 get 98304 | |
2019/10/21 14:44:32 chksum : 844742614 | |
2019/10/21 14:44:32 rest : 2 | |
2019/10/21 14:44:32 get 98304 | |
2019/10/21 14:44:32 chksum : 3668103898 | |
2019/10/21 14:44:32 new rune : 79 O | |
2019/10/21 14:44:32 current : SECCO | |
2019/10/21 14:44:32 rest : 1 | |
2019/10/21 14:44:32 get 98304 | |
2019/10/21 14:44:32 chksum : 966525959 | |
2019/10/21 14:44:32 rest : 1 | |
2019/10/21 14:44:32 get 98304 | |
2019/10/21 14:44:32 chksum : 445702597 | |
2019/10/21 14:44:32 new rune : 78 N | |
2019/10/21 14:44:32 current : SECCON | |
2019/10/21 14:44:32 rest : 0 | |
2019/10/21 14:44:32 get 98304 | |
2019/10/21 14:44:32 chksum : 27181800 | |
2019/10/21 14:44:32 rest : 0 | |
2019/10/21 14:44:33 get 98304 | |
2019/10/21 14:44:33 chksum : 424982357 | |
2019/10/21 14:44:33 new rune : 123 { | |
2019/10/21 14:44:33 current : SECCON{ | |
2019/10/21 14:44:33 rest : 2 | |
2019/10/21 14:44:33 get 98304 | |
2019/10/21 14:44:33 chksum : 844742614 | |
2019/10/21 14:44:33 rest : 2 | |
2019/10/21 14:44:33 get 98304 | |
2019/10/21 14:44:33 chksum : 4186290212 | |
2019/10/21 14:44:33 new rune : 68 D | |
2019/10/21 14:44:33 current : SECCON{D | |
2019/10/21 14:44:33 rest : 1 | |
2019/10/21 14:44:33 get 98304 | |
2019/10/21 14:44:33 chksum : 966525959 | |
2019/10/21 14:44:33 rest : 1 | |
2019/10/21 14:44:33 get 98304 | |
2019/10/21 14:44:33 chksum : 3852859570 | |
2019/10/21 14:44:33 new rune : 51 3 | |
2019/10/21 14:44:33 current : SECCON{D3 | |
2019/10/21 14:44:33 rest : 0 | |
2019/10/21 14:44:33 get 98304 | |
2019/10/21 14:44:33 chksum : 27181800 | |
2019/10/21 14:44:33 rest : 0 | |
2019/10/21 14:44:33 get 98304 | |
2019/10/21 14:44:33 chksum : 1346560736 | |
2019/10/21 14:44:33 new rune : 70 F | |
2019/10/21 14:44:33 current : SECCON{D3F | |
2019/10/21 14:44:33 rest : 2 | |
2019/10/21 14:44:33 get 98304 | |
2019/10/21 14:44:33 chksum : 844742614 | |
2019/10/21 14:44:33 rest : 2 | |
2019/10/21 14:44:34 get 98304 | |
2019/10/21 14:44:34 chksum : 3358212964 | |
2019/10/21 14:44:34 new rune : 76 L | |
2019/10/21 14:44:34 current : SECCON{D3FL | |
2019/10/21 14:44:34 rest : 1 | |
2019/10/21 14:44:34 get 98304 | |
2019/10/21 14:44:34 chksum : 966525959 | |
2019/10/21 14:44:34 rest : 1 | |
2019/10/21 14:44:34 get 98304 | |
2019/10/21 14:44:34 chksum : 1213316851 | |
2019/10/21 14:44:34 new rune : 97 a | |
2019/10/21 14:44:34 current : SECCON{D3FLa | |
2019/10/21 14:44:34 rest : 0 | |
2019/10/21 14:44:34 get 98304 | |
2019/10/21 14:44:34 chksum : 27181800 | |
2019/10/21 14:44:34 rest : 0 | |
2019/10/21 14:44:34 get 98304 | |
2019/10/21 14:44:34 chksum : 3198682266 | |
2019/10/21 14:44:34 new rune : 84 T | |
2019/10/21 14:44:34 current : SECCON{D3FLaT | |
2019/10/21 14:44:34 rest : 2 | |
2019/10/21 14:44:34 get 98304 | |
2019/10/21 14:44:34 chksum : 844742614 | |
2019/10/21 14:44:34 rest : 2 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 1351333787 | |
2019/10/21 14:44:35 new rune : 51 3 | |
2019/10/21 14:44:35 current : SECCON{D3FLaT3 | |
2019/10/21 14:44:35 rest : 1 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 966525959 | |
2019/10/21 14:44:35 rest : 1 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 3674589378 | |
2019/10/21 14:44:35 new rune : 95 _ | |
2019/10/21 14:44:35 current : SECCON{D3FLaT3_ | |
2019/10/21 14:44:35 rest : 0 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 27181800 | |
2019/10/21 14:44:35 rest : 0 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 470335582 | |
2019/10/21 14:44:35 new rune : 49 1 | |
2019/10/21 14:44:35 current : SECCON{D3FLaT3_1 | |
2019/10/21 14:44:35 rest : 2 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 844742614 | |
2019/10/21 14:44:35 rest : 2 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 3485074450 | |
2019/10/21 14:44:35 new rune : 115 s | |
2019/10/21 14:44:35 current : SECCON{D3FLaT3_1s | |
2019/10/21 14:44:35 rest : 1 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 966525959 | |
2019/10/21 14:44:35 rest : 1 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 3350359246 | |
2019/10/21 14:44:35 new rune : 95 _ | |
2019/10/21 14:44:35 current : SECCON{D3FLaT3_1s_ | |
2019/10/21 14:44:35 rest : 0 | |
2019/10/21 14:44:35 get 98304 | |
2019/10/21 14:44:35 chksum : 27181800 | |
2019/10/21 14:44:35 rest : 0 | |
2019/10/21 14:44:36 count 3 | |
2019/10/21 14:44:36 count 4 | |
2019/10/21 14:44:36 count 5 | |
2019/10/21 14:44:36 count 6 | |
2019/10/21 14:44:36 count 7 | |
2019/10/21 14:44:36 count 8 | |
2019/10/21 14:44:36 count 9 | |
2019/10/21 14:44:36 count 10 | |
2019/10/21 14:44:36 count 11 | |
2019/10/21 14:44:36 count 12 | |
2019/10/21 14:44:36 count 13 | |
2019/10/21 14:44:36 count 14 | |
2019/10/21 14:44:36 count 15 | |
2019/10/21 14:44:36 count 16 | |
2019/10/21 14:44:36 count 17 | |
2019/10/21 14:44:36 count 18 | |
2019/10/21 14:44:36 count 19 | |
2019/10/21 14:44:36 count 20 | |
2019/10/21 14:44:36 count 21 | |
2019/10/21 14:44:36 count 22 | |
2019/10/21 14:44:36 count 23 | |
2019/10/21 14:44:36 count 24 | |
2019/10/21 14:44:36 count 25 | |
2019/10/21 14:44:36 count 26 | |
2019/10/21 14:44:36 count 27 | |
2019/10/21 14:44:36 count 28 | |
2019/10/21 14:44:36 count 29 | |
2019/10/21 14:44:36 count 30 | |
2019/10/21 14:44:36 count 31 | |
2019/10/21 14:44:36 count 32 | |
2019/10/21 14:44:36 count 33 | |
2019/10/21 14:44:36 count 34 | |
2019/10/21 14:44:36 count 35 | |
2019/10/21 14:44:36 count 36 | |
2019/10/21 14:44:36 count 37 | |
2019/10/21 14:44:36 count 38 | |
2019/10/21 14:44:36 count 39 | |
2019/10/21 14:44:36 count 40 | |
2019/10/21 14:44:36 count 41 | |
2019/10/21 14:44:36 count 42 | |
2019/10/21 14:44:36 count 43 | |
2019/10/21 14:44:36 count 44 | |
2019/10/21 14:44:36 count 45 | |
2019/10/21 14:44:36 count 46 | |
2019/10/21 14:44:36 count 47 | |
2019/10/21 14:44:36 count 48 | |
2019/10/21 14:44:36 count 49 | |
2019/10/21 14:44:36 level3 25699 32768 flate: corrupt input before offset 4788729 20 | |
panic: level3 25699 32768 flate: corrupt input before offset 4788729 20 | |
goroutine 22 [running]: | |
log.Panic(0xc0034baf40, 0x6, 0x6) | |
/usr/local/Cellar/go/1.12.7/libexec/src/log/log.go:333 +0xac | |
main.Run(0x1481720, 0xc0000c8440, 0x1484180, 0xc0000c8480, 0x3, 0x3) | |
/Users/kawasin73/work/sample-go/gzipdecode/main.go:238 +0x91a | |
created by main.main | |
/Users/kawasin73/work/sample-go/gzipdecode/main.go:50 +0x243 | |
exit status 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment