Created
December 30, 2019 01:32
-
-
Save cmcaine/5bba9f6272d12a84e518ee9a1714e362 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Base64: base64encode | |
# From Base64 but copied here for readability | |
const BASE64_ENCODE = [UInt8(x) for x in ['A':'Z'; 'a':'z'; '0':'9'; '+'; '/']] | |
encode(x::UInt8) = @inbounds return BASE64_ENCODE[(x & 0x3f) + 1] | |
encodepadding() = UInt8('=') | |
""" | |
Spec: | |
base64 encodes arbitrary bits in a textual format that embeds up to 24 bits in blocks of 4 characters. | |
e.g. to convert 8 bits, you encode the first 6 as one character, then take the remaining 2 bits and treat them as the high bits for character 2 with the others filled with zero. | |
The final two characters are represented with padding ('='). | |
Layout: | |
123456781234567812345678 | |
char1|char2|char3|char4| | |
Algorithm: | |
calculate required length. | |
allocate buffer | |
load each triplet into buffer | |
Could replace all use of unsafe_load and store with normal array indexing. | |
Create a Vector{UInt8} from a string with unsafe_wrap(Vector{UInt8}, str) | |
""" | |
function encode_triple!(src::Ptr{UInt8}, dst::Ptr{UInt8}, n) | |
val(n) = unsafe_load(src, n) | |
enc(v, n) = unsafe_store!(dst, encode(v), n) | |
if n == 3 | |
enc(val(1) >> 2, 1) | |
enc(val(1) << 4 | val(2) >> 4, 2) | |
enc(val(2) << 2 | val(3) >> 6, 3) | |
enc(val(3), 4) | |
elseif n == 2 | |
enc(val(1) >> 2, 1) | |
enc(val(1) << 4 | val(2) >> 4, 2) | |
enc(val(2) << 2, 3) | |
unsafe_store!(dst, encodepadding(), 4) | |
elseif n == 1 | |
enc(val(1) >> 2, 1) | |
enc(val(1) << 4, 2) | |
unsafe_store!(dst, encodepadding(), 3) | |
unsafe_store!(dst, encodepadding(), 4) | |
else | |
error("n must be in 1:3") | |
end | |
end | |
# Save a little time when you know you'll have a full triple (benchmarked) | |
function encode_triple!(src::Ptr{UInt8}, dst::Ptr{UInt8}) | |
val(n) = unsafe_load(src, n) | |
enc(v, n) = unsafe_store!(dst, encode(v), n) | |
enc(val(1) >> 2, 1) | |
enc(val(1) << 4 | val(2) >> 4, 2) | |
enc(val(2) << 2 | val(3) >> 6, 3) | |
enc(val(3), 4) | |
end | |
# Only valid when pointer(src) isa Ptr{UInt8} | |
function b64enc(src) | |
dst = Vector{UInt8}(undef, ceil(Int, sizeof(src) / 3) * 4) | |
src_idx = 1 | |
dst_idx = 1 | |
while src_idx + 2 <= sizeof(src) | |
encode_triple!(pointer(src, src_idx), pointer(dst, dst_idx)) | |
src_idx += 3 | |
dst_idx += 4 | |
end | |
if sizeof(src) >= src_idx | |
encode_triple!(pointer(src, src_idx), pointer(dst, dst_idx), sizeof(src) - src_idx + 1) | |
end | |
return String(dst) | |
end | |
# Tests | |
@assert b64enc("a") == "YQ==" | |
@assert b64enc("aa") == "YWE=" | |
@assert b64enc("aaa") == "YWFh" | |
@assert b64enc("aaa") == "YWFh" | |
@assert b64enc("aaaa") == "YWFhYQ==" | |
const testdata = rand(UInt8, 1000); | |
@assert b64enc(testdata) == base64encode(testdata) | |
# Benchmarks | |
using BenchmarkTools | |
@btime b64enc($(repeat("a", 131072))); | |
@btime base64encode($(repeat("a", 131072))); | |
@btime b64enc($testdata); | |
@btime base64encode($testdata); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Providing a non-branching encode_triple! option for when n is 3 seems to help. Using
@inline
may also help, but could be measurement error.The next potential improvement is aping some other libraries by reinterpreting the byte stream as a stream of 64 bit ints and maybe unrolling the loop some more. https://github.com/aklomp/base64/blob/b8b3c58dd26283c9bef543376d61830914d8df87/lib/arch/generic/64/enc_loop.c
Nice article on this: http://0x80.pl/notesen/2015-12-27-base64-encoding.html
And by the same talented person for SSE: http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html
Worth noting that Java is very fast and at most packs three bytes into a 32 bit int. http://hg.openjdk.java.net/jdk8/jdk8/jdk/raw-file/687fd7c7986d/src/share/classes/java/util/Base64.java (if that's the relevant source).
So there's probably something else going on.