Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joereddington/358ea23802c5a2e2388049dc7693dc85 to your computer and use it in GitHub Desktop.
Save joereddington/358ea23802c5a2e2388049dc7693dc85 to your computer and use it in GitHub Desktop.
Working out what encode/decode really do.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from unittest import TestCase
import unittest
import io
class unicodeTest(TestCase):
def test_ascii_start(self):
uni_str=u'hello'
print uni_str
print uni_str.decode()
print uni_str.encode()
self.assertEqual("","")
def test_complex_start(self):
uni_str=u'hellø'
print uni_str
print uni_str.encode("utf8") # Okay, encode's argument is to identify the code you are writing. So if it's ascii, there are errors when there isn't an ascii option.
self.assertEqual("","")
# str.encode([encoding[, errors]])
#
# Return an encoded version of the string. Default encoding is the current default string encoding. errors may be given to set a different error handling scheme. The default for errors is 'strict', meaning that encoding errors raise a UnicodeError. Other possible values are 'ignore', 'replace', 'xmlcharrefreplace', 'backslashreplace' and any other name registered via codecs.register_error(), see section Codec Base Classes. For a list of possible encodings, see section Standard Encodings.
def test_emoji(self):
uni_st=u"à"
print len(uni_st)
encoded_str=uni_st.encode('utf8')
print "({})".format(encoded_str)
def test_writing_a_file(self):
filename="Ελπίζω" #I expected this to automatically be a unicode string but it wasn't
print type(filename)
filename_u=u"Ελπίζω"
print type(filename_u)
print filename
print filename_u
#but both print fine because :shrug
f = open(filename+"s.txt", "w")
f.write("Now the file has more content!")
f.close()
f = open(filename_u+"u.txt", "w")
f.write("Now the file has more content!")
f.close()
def test_writing_inside_a_file(self):
filename="Ελπίζω" #I expected this to automatically be a unicode string but it wasn't
print type(filename)
filename_u=u"Ελπίζω"
print type(filename_u)
print filename
print filename_u
#but both print fine because :shrug
f = open(filename+"si.txt", "w")
f.write(filename)
f.close()
f = io.open(filename_u+"ui.txt", "w")
f.write(filename_u)
f.close()
if __name__=="__main__":
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment