Last active
October 14, 2022 10:05
-
-
Save jeffguorg/0705f7157a3d4cb13db4c2643b4a2ffd to your computer and use it in GitHub Desktop.
baseX encoding - 演示用任意字符集合编码数据
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package baseX | |
import ( | |
"errors" | |
"math" | |
) | |
type Encoder struct { | |
encodeMap []rune | |
decodeMap map[rune]int | |
padding rune | |
n int | |
d int | |
} | |
var ( | |
ErrIsNotPowerOf2 = errors.New("length of encode map is not power of 2") | |
ErrTooShort = errors.New("length of encode map is too short. need 2 unicode characters at least") | |
ErrTooLong = errors.New("length of encode map is too long. need 128 unicode characters at most") | |
ErrPaddingInMap = errors.New("padding rune should not appear in encodeMap") | |
ErrCorrupted = errors.New("encoded content is corrupted") | |
) | |
func New(encodeMap []rune, padding rune) (Encoder, error) { | |
if len(encodeMap) < 2 { | |
return Encoder{}, ErrTooShort | |
} | |
if len(encodeMap) > 128 { | |
return Encoder{}, ErrTooLong | |
} | |
l := math.Log2(float64(len(encodeMap))) | |
if l != math.Floor(l) { | |
return Encoder{}, ErrIsNotPowerOf2 | |
} | |
n := 8 | |
d := int(l) | |
for d%2 == 0 { | |
n /= 2 | |
d /= 2 | |
} | |
encoder := Encoder{ | |
encodeMap: encodeMap, | |
decodeMap: make(map[rune]int), | |
padding: padding, | |
n: n, | |
d: d, | |
} | |
for i, r := range encodeMap { | |
encoder.decodeMap[r] = i | |
} | |
if _, ok := encoder.decodeMap[padding]; ok { | |
return Encoder{}, ErrPaddingInMap | |
} | |
return encoder, nil | |
} | |
func (encoder Encoder) Encode(bytes []byte) (result []rune) { | |
length := int(math.Ceil(float64(len(bytes))/3)) * 3 | |
for rangeStart, encodeLength := 0, 8*encoder.d/encoder.n; rangeStart < 8*length; rangeStart += encodeLength { | |
runeIdx := 0 | |
encoded := false | |
for encodeStart := 0; encodeStart < encodeLength; encodeStart += 1 { | |
cIdx := (rangeStart + encodeStart) / 8 | |
cOffset := 8 - (rangeStart+encodeStart)%8 - 1 | |
if cIdx < len(bytes) { | |
encoded = true | |
c := bytes[cIdx] | |
digit := (int(c) >> cOffset) & 1 | |
runeIdx = (runeIdx << 1) | digit | |
} else if encoded { | |
runeIdx <<= 1 | |
} | |
} | |
if !encoded { | |
if encoder.padding != -1 { | |
result = append(result, encoder.padding) | |
} | |
} else { | |
result = append(result, encoder.encodeMap[runeIdx]) | |
} | |
} | |
return | |
} | |
func (encoder Encoder) Decode(encoded []rune) (result []byte, err error) { | |
if encoder.padding != -1 && len(encoded)%encoder.n != 0 { | |
return nil, ErrCorrupted | |
} | |
length := len(encoded) * encoder.d / encoder.n | |
padLen := 0 | |
if encoder.padding != -1 { | |
for _, r := range encoded[len(encoded)-3:] { | |
if r == encoder.padding { | |
padLen += 1 | |
} | |
} | |
} | |
rIdx := make([]int, len(encoded)-padLen) | |
for idx, r := range encoded[:len(encoded)-padLen] { | |
if i, ok := encoder.decodeMap[r]; ok { | |
rIdx[idx] = i | |
} else { | |
return nil, ErrCorrupted | |
} | |
} | |
result = make([]byte, len(encoded)*encoder.d/encoder.n-padLen) | |
cycle := encoder.d * 8 / encoder.n | |
for idx := 0; idx < (length-padLen)*8; idx++ { | |
rCycleOffset := idx / cycle | |
rBitOffset := idx % cycle | |
dCycleOffset := idx / 8 | |
dBitOffset := idx % 8 | |
bit := 1 & (rIdx[rCycleOffset] >> (cycle - 1 - rBitOffset)) | |
result[dCycleOffset] |= byte(bit) << (7 - dBitOffset) | |
} | |
return | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package baseX | |
import ( | |
"bytes" | |
"encoding/base64" | |
"fmt" | |
"log" | |
"testing" | |
) | |
func TestEncoder(t *testing.T) { | |
for _, s := range []string{"123", "Hello, World", "1", "11"} { | |
base64Encoder, err := New([]rune("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"), '=') | |
if err != nil { | |
t.Fatal(err) | |
} | |
t.Run(fmt.Sprintf("base64 std padding: %v", s), func(t *testing.T) { | |
result := string(base64Encoder.Encode([]byte(s))) | |
if b64Result := base64.StdEncoding.EncodeToString([]byte(s)); result != b64Result { | |
t.Fatal("result not matched: ", result, b64Result) | |
} | |
log.Println(result) | |
decoded, err := base64Encoder.Decode([]rune(result)) | |
if err != nil { | |
t.Fatal("failed to decode: ", err) | |
} | |
if bytes.Compare(decoded, []byte(s)) != 0 { | |
t.Fatalf("decoded is not equal to original: %v vs %v", decoded, []byte(s)) | |
} | |
}) | |
} | |
for _, s := range []string{"123", "Hello, World", "1", "11"} { | |
base64Encoder, err := New([]rune("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"), -1) | |
if err != nil { | |
t.Fatal(err) | |
} | |
t.Run(fmt.Sprintf("base64 no padding: %v", s), func(t *testing.T) { | |
result := string(base64Encoder.Encode([]byte(s))) | |
if b64Result := base64.RawStdEncoding.EncodeToString([]byte(s)); result != b64Result { | |
t.Fatal("result not matched: ", result, b64Result) | |
} | |
log.Println(result) | |
decoded, err := base64Encoder.Decode([]rune(result)) | |
if err != nil { | |
t.Fatal("failed to decode: ", err) | |
} | |
if bytes.Compare(decoded, []byte(s)) != 0 { | |
t.Fatalf("decoded is not equal to original: %v vs %v", decoded, []byte(s)) | |
} | |
}) | |
} | |
for _, encodeMap := range []string{"喵呜猫咪", "阿巴", "阿比巴布"} { | |
for _, s := range []string{"123", "Hello, World"} { | |
mewEncoder, err := New([]rune(encodeMap), '!') | |
if err != nil { | |
t.Fatal(err) | |
} | |
t.Run(fmt.Sprintf("%v no padding: %v", encodeMap, s), func(t *testing.T) { | |
result := string(mewEncoder.Encode([]byte(s))) | |
log.Println(result) | |
decoded, err := mewEncoder.Decode([]rune(result)) | |
if err != nil { | |
t.Fatal("failed to decode: ", err) | |
} | |
if bytes.Compare(decoded, []byte(s)) != 0 { | |
t.Fatalf("decoded is not equal to original: %v vs %v", decoded, []byte(s)) | |
} | |
}) | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=== RUN TestEncoder | |
=== RUN TestEncoder/base64_std_padding:_123 | |
2020/11/19 23:02:11 MTIz | |
=== RUN TestEncoder/base64_std_padding:_Hello,_World | |
2020/11/19 23:02:11 SGVsbG8sIFdvcmxk | |
=== RUN TestEncoder/base64_std_padding:_1 | |
2020/11/19 23:02:11 MQ== | |
=== RUN TestEncoder/base64_std_padding:_11 | |
2020/11/19 23:02:11 MTE= | |
=== RUN TestEncoder/base64_no_padding:_123 | |
2020/11/19 23:02:11 MTIz | |
=== RUN TestEncoder/base64_no_padding:_Hello,_World | |
2020/11/19 23:02:11 SGVsbG8sIFdvcmxk | |
=== RUN TestEncoder/base64_no_padding:_1 | |
2020/11/19 23:02:11 MQ | |
=== RUN TestEncoder/base64_no_padding:_11 | |
2020/11/19 23:02:11 MTE | |
=== RUN TestEncoder/喵呜猫咪_no_padding:_123 | |
2020/11/19 23:02:11 喵咪喵呜喵咪喵猫喵咪喵咪 | |
=== RUN TestEncoder/喵呜猫咪_no_padding:_Hello,_World | |
2020/11/19 23:02:11 呜喵猫喵呜猫呜呜呜猫咪喵呜猫咪喵呜猫咪咪喵猫咪喵喵猫喵喵呜呜呜咪呜猫咪咪呜咪喵猫呜猫咪喵呜猫呜喵 | |
=== RUN TestEncoder/阿巴_no_padding:_123 | |
2020/11/19 23:02:11 阿阿巴巴阿阿阿巴阿阿巴巴阿阿巴阿阿阿巴巴阿阿巴巴 | |
=== RUN TestEncoder/阿巴_no_padding:_Hello,_World | |
2020/11/19 23:02:11 阿巴阿阿巴阿阿阿阿巴巴阿阿巴阿巴阿巴巴阿巴巴阿阿阿巴巴阿巴巴阿阿阿巴巴阿巴巴巴巴阿阿巴阿巴巴阿阿阿阿巴阿阿阿阿阿阿巴阿巴阿巴巴巴阿巴巴阿巴巴巴巴阿巴巴巴阿阿巴阿阿巴巴阿巴巴阿阿阿巴巴阿阿巴阿阿 | |
=== RUN TestEncoder/阿比巴布_no_padding:_123 | |
2020/11/19 23:02:11 阿布阿比阿布阿巴阿布阿布 | |
=== RUN TestEncoder/阿比巴布_no_padding:_Hello,_World | |
2020/11/19 23:02:11 比阿巴阿比巴比比比巴布阿比巴布阿比巴布布阿巴布阿阿巴阿阿比比比布比巴布布比布阿巴比巴布阿比巴比阿 | |
--- PASS: TestEncoder (0.00s) | |
--- PASS: TestEncoder/base64_std_padding:_123 (0.00s) | |
--- PASS: TestEncoder/base64_std_padding:_Hello,_World (0.00s) | |
--- PASS: TestEncoder/base64_std_padding:_1 (0.00s) | |
--- PASS: TestEncoder/base64_std_padding:_11 (0.00s) | |
--- PASS: TestEncoder/base64_no_padding:_123 (0.00s) | |
--- PASS: TestEncoder/base64_no_padding:_Hello,_World (0.00s) | |
--- PASS: TestEncoder/base64_no_padding:_1 (0.00s) | |
--- PASS: TestEncoder/base64_no_padding:_11 (0.00s) | |
--- PASS: TestEncoder/喵呜猫咪_no_padding:_123 (0.00s) | |
--- PASS: TestEncoder/喵呜猫咪_no_padding:_Hello,_World (0.00s) | |
--- PASS: TestEncoder/阿巴_no_padding:_123 (0.00s) | |
--- PASS: TestEncoder/阿巴_no_padding:_Hello,_World (0.00s) | |
--- PASS: TestEncoder/阿比巴布_no_padding:_123 (0.00s) | |
--- PASS: TestEncoder/阿比巴布_no_padding:_Hello,_World (0.00s) | |
PASS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment