Skip to content

Instantly share code, notes, and snippets.

@linuskmr
Last active July 29, 2021 08:47
Show Gist options
  • Save linuskmr/46f204eb91c22d0698b113e6c7d1db19 to your computer and use it in GitHub Desktop.
Save linuskmr/46f204eb91c22d0698b113e6c7d1db19 to your computer and use it in GitHub Desktop.
Implementation of base64 in Python. Prefer using base64 from the standard library.
import string
from itertools import zip_longest
from typing import TypeVar, Iterable, Optional, Iterator, List, Union, Any
T = TypeVar('T')
def grouper(iterable: Iterable[T], chunk_size: int, fill_value: Optional[T] = None) -> Iterator[List[T]]:
"""Collect data into fixed-length chunks or blocks.
From https://docs.python.org/3/library/itertools.html#itertools-recipes
"""
args = [iter(iterable)] * chunk_size
return zip_longest(*args, fillvalue=fill_value)
def _triple_to_base64_values(byte_triple: bytes) -> bytes:
base64_values = bytearray(4)
base64_values[0] = (byte_triple[0] & 0b1111_1100) >> 2
base64_values[1] = ((byte_triple[0] & 0b0000_0011) << 4) | ((byte_triple[1] & 0b1111_0000) >> 4)
base64_values[2] = ((byte_triple[1] & 0b0000_1111) << 2) | ((byte_triple[2] & 0b1100_0000) >> 6)
base64_values[3] = byte_triple[2] & 0b0011_1111
return bytes(base64_values)
def _base64_byte_to_ascii(byte: bytearray) -> bytearray:
# Approx. 15 times faster than if else statements for each byte
trans = bytes.maketrans(
bytes(range(64)),
bytes(string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/', encoding='ascii')
)
return byte.translate(trans)
def _to_base64(input_bytes: bytes) -> bytes:
base64_length = 4 * (len(input_bytes) // 3)
fill_chars = base64_length - len(input_bytes)
# Split byte array into chunks of size 3
byte_triple_chunks = grouper(input_bytes, chunk_size=3, fill_value=0)
# Extract 4 base64 bytes from each byte triple
base64_bytes = map(lambda triple: _triple_to_base64_values(bytes(triple)), byte_triple_chunks)
# Flatten
base64_bytes = bytearray([item for sublist in base64_bytes for item in sublist])
# Map base64 bytes to ascii chars
base64_chars = _base64_byte_to_ascii(base64_bytes)
# Set padded base64 values to `=`
for i in range(fill_chars):
base64_chars[-(i+1)] = ord('=')
return bytes(base64_chars)
def to_base64(data: Any) -> bytes:
if type(data) is bytes:
return _to_base64(data)
elif type(data) is str:
return _to_base64(bytes(data, encoding='utf-8'))
else:
return _to_base64(bytes(str(data), encoding='utf-8'))
print(to_base64('Hello World'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment