Skip to content

Instantly share code, notes, and snippets.

@gabrielbarros
Last active December 14, 2024 18:40
Show Gist options
  • Save gabrielbarros/9c3adf98037df04a9918aeb3ba72d145 to your computer and use it in GitHub Desktop.
Save gabrielbarros/9c3adf98037df04a9918aeb3ba72d145 to your computer and use it in GitHub Desktop.
Unicode and byte notation in various places and programming languages

Unicode and byte notation

Letter: A
Code points: U+0041
UTF-8 bytes: 41
UTF-16BE bytes: 00 41

Pound sign: Β£
Code points: U+00A3
UTF-8 bytes: C2 A3
UTF-16BE bytes: 00 A3

Ball emoji: ⚽
Code points: U+26BD
UTF-8 bytes: E2 9A BD
UTF-16BE bytes: 26 BD

Thumbs up emoji: πŸ‘
Code points: U+1F44D
UTF-8 bytes: F0 9F 91 8D
UTF-16BE bytes: D8 3D DC 4D

Brazil flag emoji: πŸ‡§πŸ‡·
Code points: U+1F1E7 U+1F1F7
UTF-8 bytes: F0 9F 87 A7 F0 9F 87 B7
UTF-16BE bytes: D8 3C DD E7 D8 3C DD F7

HTML

<!-- Directly -->
<p>Letter: A</p>
<p>Pound: Β£</p>
<p>Ball: ⚽</p>
<p>Thumbs up: πŸ‘</p>
<p>Brazil: πŸ‡§πŸ‡·</p>

<!-- Code point to code point (base 10) -->
<p>Letter: &#65;</p>
<p>Pound: &#163;</p>
<p>Ball: &#9917;</p>
<p>Thumbs up: &#128077;</p>
<p>Brazil: &#127463;&#127479;</p>

<!-- Code point to code point (base 16) -->
<p>Letter: &#x41;</p>
<p>Pound: &#xA3;</p>
<p>Ball: &#x26BD;</p>
<p>Thumbs up: &#x1F44D;</p>
<p>Brazil: &#x1F1E7;&#x1F1F7;</p>

<!-- Alias (only a few code points) -->
<p>Pound: &pound;</p>

CSS

/* Directly */
#letter::after {
    content: 'A';
}

#pound::after {
    content: 'Β£';
}

#ball::after {
    content: '⚽';
}

#thumbs_up::after {
    content: 'πŸ‘';
}

#brazil::after {
    content: 'πŸ‡§πŸ‡·';
}

/* Code point to code point */
#letter::after {
    content: '\41';
}

#pound::after {
    content: '\A3';
}

#ball::after {
    content: '\26BD';
}

#thumbs_up::after {
    content: '\1F44D';
}

#brazil::after {
    content: '\1F1E7\1F1F7';
}

URL

# Byte to byte (UTF-8)
https://example.com/?letter=A
https://example.com/?letter=%41 # <-- No need to escape the "A" to "%41"
https://example.com/?pound=%C2%A3
https://example.com/?ball=%E2%9A%BD
https://example.com/?thumbs_up=%F0%9F%91%8D
https://example.com/?brazil=%F0%9F%87%A7%F0%9F%87%B7

PHP

<?php
// Directly
$letter = 'A';
$pound = 'Β£';
$ball = '⚽';
$thumbsUp = 'πŸ‘';
$brazil = 'πŸ‡§πŸ‡·';

// Byte to byte (UTF-8)
$letter = "\x41"; // Or: chr(0x41);
$pound = "\xC2\xA3"; // Or: chr(0xC2) . chr(0xA3);
$ball = "\xE2\x9A\xBD"; // Or: chr(0xE2) . chr(0x9A) . chr(0xBD);
$thumbsUp = "\xF0\x9F\x91\x8D"; // Or: chr(0xF0) . chr(0x9F) . chr(0x91) ...
$brazil = "\xF0\x9F\x87\xA7\xF0\x9F\x87\xB7"; // Or: chr(0xF0) . chr(0x9F) ...

// Code point to code point
$letter = "\u{0041}"; // Or: mb_chr(0x41);
$pound = "\u{00A3}"; // Or: mb_chr(0xA3);
$ball = "\u{26BD}"; // Or: mb_chr(0x26BD);
$thumbsUp = "\u{1F44D}"; // Or: mb_chr(0x1F44D);
$brazil = "\u{1F1E7}\u{1F1F7}"; // Or: mb_chr(0x1F1E7) . mb_chr(0x1F1F7);

Javascript

// Directly
let letter = 'A';
let pound = 'Β£';
let ball = '⚽';
let thumbsUp = 'πŸ‘';
let brazil = 'πŸ‡§πŸ‡·';

// Byte to byte (UTF-16BE)
let letter = '\u0041';
let pound = '\u00A3';
let ball = '\u26BD';
let thumbsUp = '\uD83D\uDC4D';
let brazil = '\uD83C\uDDE7\uD83C\uDDF7';

// Byte to byte (UTF-8)
const decoder = new TextDecoder('UTF-8');

let letter = decoder.decode(new Uint8Array([0x41]));
let pound = decoder.decode(new Uint8Array([0xC2, 0xA3]));
let ball = decoder.decode(new Uint8Array([0xE2, 0x9A, 0xBD]));
let thumbsUp = decoder.decode(new Uint8Array([0xF0, 0x9F, 0x91, 0x8D]));
let brazil = decoder.decode(new Uint8Array([0xF0, 0x9F, 0x87, 0xA7, 0xF0, 0x9F, 0x87, 0xB7]));

// Code point to code point
let letter = '\u{0041}';
let pound = '\u{00A3}';
let ball = '\u{26BD}';
let thumbsUp = '\u{1F44D}';
let brazil = '\u{1F1E7}\u{1F1F7}';

Python

# Directly
letter = 'A'
pound = 'Β£'
ball = '⚽'
thumbs_up = 'πŸ‘'
brazil = 'πŸ‡§πŸ‡·'

# Byte to byte (UTF-8)
letter = b"\x41".decode('utf-8')
pound = b"\xC2\xA3".decode('utf-8')
ball = b"\xE2\x9A\xBD".decode('utf-8')
thumbs_up = b"\xF0\x9F\x91\x8D".decode('utf-8')
brazil = b"\xF0\x9F\x87\xA7\xF0\x9F\x87\xB7".decode('utf-8')

# Code point to code point
letter = "\u0041"
pound = "\u00A3"
ball = "\u26BD"
thumbs_up = "\U0001F44D"
brazil = "\U0001F1E7\U0001F1F7"

Bash

# Directly
letter='A'
pound='Β£'
ball='⚽'
thumbs_up='πŸ‘'
brazil='πŸ‡§πŸ‡·'

# Byte to byte (UTF-8)
letter=$'\x41'
pound=$'\xC2\xA3'
ball=$'\xE2\x9A\xBD'
thumbs_up=$'\xF0\x9F\x91\x8D'
brazil=$'\xF0\x9F\x87\xA7\xF0\x9F\x87\xB7'

# Code point to code point
letter=$'\u0041'
pound=$'\u00A3'
ball=$'\u26BD'
thumbs_up=$'\U0001F44D'
brazil=$'\U0001F1E7\U0001F1F7'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment