Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jay-johnson/a9fda72d907fa8e5805de3a5d5ed42f8 to your computer and use it in GitHub Desktop.
Save jay-johnson/a9fda72d907fa8e5805de3a5d5ed42f8 to your computer and use it in GitHub Desktop.
Python 2 vs Python 3 - Understanding Unicode Strings

Porting to Python 3 - git diff

Here's an example of porting changes when trying to deal with binary strings which are nothing more than serialized dictionaries (response.rendered_content). In python 2 json.loads(response.rendered_content) works just fine, but in python 3 I had to add .decode("utf-8"):

         # save to log
         try:
-            self.request.log.response = json.loads(response.rendered_content)
+            self.request.log.response = response.rendered_content.decode("utf-8")
         except Exception as w:
+            lg("Mixin Tracker Failed to decode content as utf-8 ex=" + str(w), 0, request) 
             self.request.log.response = response.rendered_content
+        # end of trying to assign
 
         self.request.log.status_code = response.status_code
         self.request.log.response_ms = response_ms
-        self.request.log.save()
+
+        try:
+            self.request.log.save()
+        except Exception as w:
+            lg("Mixin Tracker failed to save record with ex=" + str(w), 0, request) 
+            try:
+                self.request.log.response = str(response.rendered_content)
+                self.request.log.save()
+            except Exception as h:
+                lg("Mixin Tracker unable to save record with ex=" + str(h), 0, request) 
+            # end of second save...stop here
+        # end of first save attempt

Defaults per python kernel

python 2: response.rendered_content => "{'algo_name':'xgb-regressor' ... rest of dictionary in a string}"

python 3: response.rendered_content => b'{"algo_name":"xgb-regressor" ... rest of dictionary in a string of bytes}'

-  with open(tmp_file, "w") as output_file:
-      output_file.write(zlib.compress(pickle.dumps(analysis_rec["Record"])))
+  validate_bytes = 0    
+  decompressed_obj = None
 
-  self.lg("Validating Serialization", 5)
-  validate_bytes      = open(tmp_file).read()
-  decompressed_obj    = pickle.loads(zlib.decompress(validate_bytes))
+  # python 2 is byte strings
+  if sys.version_info < (2, 8):
+      lg("py2 write", 5)
+      with open(tmp_file, "w") as output_file:
+          output_file.write(zlib.compress(pickle.dumps(analysis_rec["Record"])))
+
+      lg("Validating Serialization", 5)
+      validate_bytes = open(tmp_file).read()
+      decompressed_obj = pickle.loads(zlib.decompress(validate_bytes))
+  # python 3 is byte strings so make sure to set the byte flag
+  else:
+      lg("py3 write", 5)
+      with open(tmp_file, "wb") as output_file:
+          output_file.write(zlib.compress(pickle.dumps(analysis_rec["Record"])))
+
+      lg("Validating Serialization", 5)
+      validate_bytes = open(tmp_file, "rb").read()
+      decompressed_obj = pickle.loads(zlib.decompress(validate_bytes))
+  # end of py2/py3

Commands I started the port using

  1. port python 2 implementation for: print ""
sed -i '/print / s/$/)/' $(grep -r "print " * | grep -v os.system | sed -e 's/:/ /g' | awk '{print $1}' | sort | uniq | grep -v '\.pyc' | grep -v '\.sh' | grep '\.py' | grep -v Binary)
sed -i 's/print /print(/g' $(grep -r "print " * | grep -v os.system | sed -e 's/:/ /g' | awk '{print $1}' | sort | uniq | grep -v '\.pyc' | grep -v '\.sh' | grep '\.py' | grep -v Binary)
sed -i 's/print((/print(/g' $(grep -r "print " * | grep -v os.system | sed -e 's/:/ /g' | awk '{print $1}' | sort | uniq | grep -v '\.pyc' | grep -v '\.sh' | grep '\.py' | grep -v Binary)

Example from the git diff after running these sed commands:

-        print ""
-        print "-----------------------------------------------------------------------"
-        print "Date(" + str(datetime.datetime.now().strftime("%d-%m-%Y %H:%M:%S")) + ") Starting State Machine"
+        print("")
+        print("-----------------------------------------------------------------------")
+        print("Date(" + str(datetime.datetime.now().strftime("%d-%m-%Y %H:%M:%S")) + ") Starting State Machine")
  1. Port python 2 execptions
sed -i 's/Exception,  /Exception as /g' $(find . | grep py)
sed -i 's/Exception,  /Exception as /g' $(find . | grep py | grep -v pycache)
sed -i 's/Exception, /Exception as /g' $(find . | grep py | grep -v pycache)
sed -i 's/Exception,/Exception as /g' $(find . | grep py | grep -v pycache)
sed -i 's/ValueError,  /ValueError as /g' $(find . | grep py | grep -v pycache)
sed -i 's/ValueError, /ValueError as /g' $(find . | grep py | grep -v pycache)
sed -i 's/ValueError,/ValueError as /g' $(find . | grep py | grep -v pycache)
sed -i 's/KeyError,  /KeyError as /g' $(find . | grep py | grep -v pycache)
sed -i 's/KeyError, /KeyError as /g' $(find . | grep py | grep -v pycache)
sed -i 's/KeyError,/KeyError as /g' $(find . | grep py | grep -v pycache)
sed -i 's/from HTMLParser/from html.parser/g' $(find . | grep py | grep -v pycache)
sed -i 's/import HTMLParser/import html.parser/g' $(find . | grep py | grep -v pycache)

Find places I opened a file

grep -r 'open(' *

Helper function for reading files

    def read_file(self, filename, debug=False):

        filedata = None

        try:

            # py2/py3 file reading
            if sys.version_info < (2, 8):

                if debug:
                    print("2.7 file=" + str(filename))
                    
                filedata = open(filename).read()

            else:

                if debug:
                    print("file=" + str(filename))

                filedata = open(filename, "rb").read()
            # end of py2/py3 

        except Exception as k:
            err_msg = "Failed to read file=" + str(filename) + " ex=" + str(k)
            print("ERROR: " + str(err_msg))
            return None
        # end of try/ex

        return filedata
    # end of read_file

and in use:

part = MIMEApplication(self.read_file(filename, debug))
  1. porting xrange to range with int casting
-    for idx, i in enumerate(xrange((len(self.m_cstick_window) / self.m_range))):
+    for idx, i in enumerate(range(int(len(self.m_cstick_window) / self.m_range))):
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment