Last active
July 29, 2021 08:47
-
-
Save linuskmr/46f204eb91c22d0698b113e6c7d1db19 to your computer and use it in GitHub Desktop.
Implementation of base64 in Python. Prefer using base64 from the standard library.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
from itertools import zip_longest | |
from typing import TypeVar, Iterable, Optional, Iterator, List, Union, Any | |
T = TypeVar('T') | |
def grouper(iterable: Iterable[T], chunk_size: int, fill_value: Optional[T] = None) -> Iterator[List[T]]: | |
"""Collect data into fixed-length chunks or blocks. | |
From https://docs.python.org/3/library/itertools.html#itertools-recipes | |
""" | |
args = [iter(iterable)] * chunk_size | |
return zip_longest(*args, fillvalue=fill_value) | |
def _triple_to_base64_values(byte_triple: bytes) -> bytes: | |
base64_values = bytearray(4) | |
base64_values[0] = (byte_triple[0] & 0b1111_1100) >> 2 | |
base64_values[1] = ((byte_triple[0] & 0b0000_0011) << 4) | ((byte_triple[1] & 0b1111_0000) >> 4) | |
base64_values[2] = ((byte_triple[1] & 0b0000_1111) << 2) | ((byte_triple[2] & 0b1100_0000) >> 6) | |
base64_values[3] = byte_triple[2] & 0b0011_1111 | |
return bytes(base64_values) | |
def _base64_byte_to_ascii(byte: bytearray) -> bytearray: | |
# Approx. 15 times faster than if else statements for each byte | |
trans = bytes.maketrans( | |
bytes(range(64)), | |
bytes(string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/', encoding='ascii') | |
) | |
return byte.translate(trans) | |
def _to_base64(input_bytes: bytes) -> bytes: | |
base64_length = 4 * (len(input_bytes) // 3) | |
fill_chars = base64_length - len(input_bytes) | |
# Split byte array into chunks of size 3 | |
byte_triple_chunks = grouper(input_bytes, chunk_size=3, fill_value=0) | |
# Extract 4 base64 bytes from each byte triple | |
base64_bytes = map(lambda triple: _triple_to_base64_values(bytes(triple)), byte_triple_chunks) | |
# Flatten | |
base64_bytes = bytearray([item for sublist in base64_bytes for item in sublist]) | |
# Map base64 bytes to ascii chars | |
base64_chars = _base64_byte_to_ascii(base64_bytes) | |
# Set padded base64 values to `=` | |
for i in range(fill_chars): | |
base64_chars[-(i+1)] = ord('=') | |
return bytes(base64_chars) | |
def to_base64(data: Any) -> bytes: | |
if type(data) is bytes: | |
return _to_base64(data) | |
elif type(data) is str: | |
return _to_base64(bytes(data, encoding='utf-8')) | |
else: | |
return _to_base64(bytes(str(data), encoding='utf-8')) | |
print(to_base64('Hello World')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment