Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Unicode Sandwich, demonstrating multiple encodings.
# -*- encoding: utf-8 -*-
import re
# Byte string containing an Icelandic pangram encoded in mac_iceland
input = 'Svo h\x9alt, yxna k\xe0r \xdfeg\xddi j\x9c um d\x97p \x92 f\x8e \x87 b\xbe.'
# Create a Unicode object from the string, decoding with the mac_iceland
# encoding
u_string = input.decode('mac_iceland')
re.sub(r'\w{4}', u'xxxx', u_string, flags=re.UNICODE)
# Print to UTF-8, which your terminal probably understands
print u_string.encode('UTF-8')
with open('output.txt', 'wb') as file:
# Write the new unicode string to file using UTF-8 encoding
file.write(u_string.encode('UTF-8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment