Unicode Sandwich, demonstrating multiple encodings.
# -*- encoding: utf-8 -*- | |
import re | |
# Byte string containing an Icelandic pangram encoded in mac_iceland | |
input = 'Svo h\x9alt, yxna k\xe0r \xdfeg\xddi j\x9c um d\x97p \x92 f\x8e \x87 b\xbe.' | |
# Create a Unicode object from the string, decoding with the mac_iceland | |
# encoding | |
u_string = input.decode('mac_iceland') | |
re.sub(r'\w{4}', u'xxxx', u_string, flags=re.UNICODE) | |
# Print to UTF-8, which your terminal probably understands | |
print u_string.encode('UTF-8') | |
with open('output.txt', 'wb') as file: | |
# Write the new unicode string to file using UTF-8 encoding | |
file.write(u_string.encode('UTF-8')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment