Skip to content

Instantly share code, notes, and snippets.

@onecrayon
Created January 29, 2024 23:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save onecrayon/402f45446ae78ee7ba00d4cc36ea9a93 to your computer and use it in GitHub Desktop.
Save onecrayon/402f45446ae78ee7ba00d4cc36ea9a93 to your computer and use it in GitHub Desktop.
BaseEmoji CLI
#!/usr/bin/env python3
"""BaseEmoji CLI
Inspired by this lovely little gem: https://github.com/amoallim15/base-emoji
But using something a little closer to the base64 algorithm (effectively base128, using emojis).
Comparison of a 13 character/13 byte string in two encodings:
$ base-64 <<< 'Hello, world!'
SGVsbG8sIHdvcmxkIQo=
20 characters in 20 bytes.
$ base-emoji <<< 'Hello, world!'
๐Ÿ˜ค๐Ÿ˜™๐Ÿ˜ญ๐Ÿ™†๐Ÿคฃ๐Ÿ˜ผ๐Ÿค˜๐Ÿ˜ ๐Ÿ˜ป๐Ÿค›๐Ÿคฎ๐Ÿ˜ฆ๐Ÿคฃ๐Ÿ˜๐Ÿ™‚๐Ÿ˜Š
16 characters in 64 bytes. What a savings!
This script works by breaking up normal 8-bit bytes into 7-bit bytes and translating those into
emoji via a lookup table (very similar to the base64 algorithm, except using base128). Can be
used for both text and arbitrary binary data.
"""
import argparse
import os
import sys
LEFTOVER_MASK = (
0,
0b1,
0b11,
0b111,
0b1111,
0b11111,
0b111111,
0b1111111,
)
PADDING_TO_EMOJI = {
1: chr(0x1F447),
2: chr(0x1F448),
3: chr(0x1F449),
4: chr(0x1F44A),
5: chr(0x1F44D),
6: chr(0x1F44E),
}
EMOJI_TO_PADDING = {v: k for k, v in PADDING_TO_EMOJI.items()}
def to_emoji(value: int) -> str:
# 0 - 79 emoticons unicode block: 1F600 - 1F64F
if 0 <= value <= 79:
return chr(0x1F600 + value)
# 80 - 111 supplemental symbols block: 1F910 - 1F92F
elif 80 <= value <= 111:
return chr(0x1F910 + (value - 80))
# 112 - 122 supplemental symbols block: 1F970 - 1F97A
elif 112 <= value <= 122:
return chr(0x1F970 + (value - 112))
# 123 - 127 supplemental symbols block: 1F9D0 - 1F9D4
else:
return chr(0x1F9D0 + (value - 123))
def to_bits(char: str) -> int:
value = ord(char)
if 0x1F600 <= value <= 0x1F64F:
return value - 0x1F600
elif 0x1F910 <= value <= 0x1F92F:
return (value - 0x1F910) + 80
elif 0x1F970 <= value <= 0x1F97A:
return (value - 0x1F970) + 112
else:
return (value - 0x1F9D0) + 123
def encode_base_emoji(data: str | bytes) -> bytes:
if isinstance(data, str):
data = data.encode("utf-8")
# Break the data down into 7-bit chunks
leftover_bits = 0
leftover_bits_length = 0
bit_slices = []
for byte in data:
extract_amount = 7 - leftover_bits_length
leftover_bits_length = 8 - extract_amount
bit_slices.append((leftover_bits << extract_amount) + (byte >> leftover_bits_length))
leftover_bits = byte & LEFTOVER_MASK[leftover_bits_length]
# If we had a full 7 bits leftover, append that and reset
if leftover_bits_length == 7:
bit_slices.append(leftover_bits)
leftover_bits = 0
leftover_bits_length = 0
# If we have any leftover, note how many bits of padding were added
padded_bit_length = 0
if leftover_bits_length > 0:
padded_bit_length = 7 - leftover_bits_length
bit_slices.append(leftover_bits << padded_bit_length)
# Now convert our bit slice integers into emojis
emoji_str = "".join(to_emoji(x) for x in bit_slices)
# And finally append our padding character
if padded_bit_length:
emoji_str = emoji_str + PADDING_TO_EMOJI[padded_bit_length]
return emoji_str.encode("utf-8")
def decode_base_emoji(data: str | bytes) -> bytes:
if isinstance(data, bytes):
data = data.decode("utf-8")
if not data:
return b""
# Check for a padding suffix
if data[-1] in EMOJI_TO_PADDING:
final_bits_padding = EMOJI_TO_PADDING[data[-1]]
final_bits = to_bits(data[-2]) >> final_bits_padding
final_bits_length = 7 - final_bits_padding
data = data[:-2]
else:
final_bits = 0
final_bits_length = 0
# Convert emojis into their 7-bit equivalents, and recombine into 8-bit bytes
leftover_bits = 0
leftover_bits_length = 0
data_bytes = bytearray()
for char in data:
value = to_bits(char)
if leftover_bits_length == 0:
leftover_bits_length = 7
leftover_bits = value
continue
shift_amount = 8 - leftover_bits_length
leftover_bits_length = 7 - shift_amount
data_bytes.append((leftover_bits << shift_amount) + (value >> leftover_bits_length))
leftover_bits = value & LEFTOVER_MASK[leftover_bits_length]
# Add our final bits
if final_bits_length:
data_bytes.append((leftover_bits << final_bits_length) + final_bits)
# And return our bytes object
return bytes(data_bytes)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="BaseEmoji",
description="Encodes and decodes arbitrary data to BaseEmoji. Like base64, but friendly!",
)
parser.add_argument(
"data",
nargs="?",
default=None,
help="The string or bytes you wish to encode to or decode from BaseEmoji (or omit and pass via stdin).",
)
group = parser.add_mutually_exclusive_group()
group.add_argument("-e", "--encode", action="store_true")
group.add_argument("-d", "--decode", action="store_true")
args = parser.parse_args()
if args.data is None and not sys.stdin.isatty():
data = sys.stdin.buffer.read()
else:
data = args.data
if not data:
parser.error("data is required as positional argument or stdin")
with os.fdopen(sys.stdout.fileno(), "wb") as stdout:
if args.decode:
stdout.write(decode_base_emoji(data))
else:
stdout.write(encode_base_emoji(data))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment