Created
February 18, 2013 04:01
-
-
Save rw/4975046 to your computer and use it in GitHub Desktop.
JSON doesn't handle arbitrary bytes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [1]: arb = open('random-crap', 'rb').read() | |
In [2]: import json | |
In [3]: arb | |
Out[3]: '\x88\xb5rz\xe8(\xcd\x7f\xbc\x8e\xae\xfe\xfcA\xd7Q\xa8NOPQg\rZ\xe4\xb7\xf7p\xc4U<\xc6.\x94\xc8\xc3M\x91\xcdN\x91%j\xa8\xfd\xef\xcaI\xa1\xb2XB\xa5#\xae~\n\xc1\x8b0B\xe1^F.E\xdc\xea`<\xd7\x88&pR\xf0\xd8\xde\xabA\\\xc1\x10\x91\x8bsQ\x8a[\x15\x0c%G2\x85\x14\x97\t\xbe\x99T\xc8\x02\xac\xc6\xec\x1a\xe4\xa0x\x99]`\x90\xca&\x9a\x15\x08\x84W\x9eO\\\xa3NE\xd0YyA\xed|\xde\x1dl\xb8\xccU^\xb9\x83\xfe4\xd8\xa4\x0c2\xfb;\xff\xf3Qw\xeeX\xd6&\xef\x0f\xdc\xc7i\x93\xbd\xbav\xcb\x9a\x85%\x0c\xe2\x1dc\xb6\x8dcLN\x98\x9a?\x18{\x80`o\x0c\x07\xdf\xf9\x03\x01\xd6H\xa1\x11r\x16Gm\xdfW\xbd\xac\xb0!\x8f8\xa8\x99\n\x1c\x16(\x00\xdb~\xe3\x11\x8d?\xba\xc5\xc9\x94\x963(:\x99\xa8\x99\xc6\xae\x1f\xf1\xa2\x1c\xfc\x9b\x92\xb4\xb0\xf5!\x19\x01@\xb8\xb7\x8bOx\x19\x0fP\xfe\x86\xb7\x04\x05\xcd\xac\x8e@qC\xa7\xdb\x85\xae\x92\xa9:\x81_\xf1\x1b\xfd\xcb\x87\xb9\x88r\x94\xe6 \xfeG\xfe\xcd\xf2\x1d\x152\x04g\xd0\xe9O\xd0\xf5\x04\xc0\x9c\xb44\xa3\x84\x9f\xbd\xd0\xe3b\x90UaS\x00\xe4\xdd\xb2\x95\xcc\xe7\xd7l\xba\xdd\xb1\x1d\x81\xe4\x0e\xad\xee\x7fB+N\x18\xf5\x8c&\x0c\xf9\x94R\x86A\x1a\xbe\xdas\x98\x12*\x16\xfc\xba\xc7j\x10\x87E\xc8\xde\xc6\xea\x1d&\x17K6\x86\xc6\x99Y\x14"x\x06t\xd0\x89R\xafg\x8e\xca\x95BN\xcb\xa6\x84\x16\xaf\t\xd3\t\x7f\xe4\xfe\xf4>\xed\x93\xff\xed^\x00kd?\x8b~w\x05\xbe\xc4l$\xff\x1d\xb5\\#\x8d\x96\xa6\xde\xae1\x8f\x0e\x19D\xcf\xd2\x16\x17\xbe\x1c\xb5\xb1\xab\xc5\x07.\'\xb8\x01p\xbekUo\x1d\x8f\xecx\xaeK\xde\x9f\xa2\xffB\xe8\xdd\xd5r\x89WJ\xf6w\x17aG:WU^\xa1H,\x98\xd5\xc7\xf2\xd4\x1f\xf6\xc6\x0bi\xf6\x91,N6\xf3\xa2\x8b\xcc\xb3\xed\t5\xc0{\xcc\xe4\xf7\xe6\x02\x06\xe9.)\x90\x10G\xb69B\x9e\xd7\x14{\x8c\xe4\xaa\xf6\xfb\xf70\xef\xd3\xdd\x1a\xc1\xaa\x92\xad\x0cll\xf6\xd1`\xe4m4\xa8{\x896\x11\xd0:=\n~\x08S\x93Zu\xfb\x86\xe9\xed\xde\x1e5\x9d_\x8a\xd8\x8a\x03H\xd5\\\xc1\xab\x9a\xc7_\xbd[W\xc8\xb4\xe8=U\x8f\xe3\xf0\t\xccL^\xf0\xf3\xd3\xa2\xa5X)\x81S\x82\x9a\x10w1Qk,C\xfd\xa6\x0c\x91aej\x03\r\xcf\xbc\xd9\xc0\xba\x1b]\x03\xd3\x82P\xdd/\xb6\xf5v\xa7\x9c\xf0.\x10\xc0\x8d5\xaf\xfdv\x18a\xe9\x0e\xc4\xdd\xb0\x15\x90\x12\x98#\xf69U\xf1\x8e\x1a\xce\x95\\\x91\x02\xbe\x13\x88\xa6i\xe2\x1f7\xb3\xdb\x0c}\xc2\xe5\x97A\xb8\x90l\xad\x9f\xf7CD\xb4\x8f\x0b\xe9#X\x15\x8ei\x89\xafyQ)\xc0\xe1\x95\xe6/\xc5c\xe0\x14\x80H\xf7\xb1M\xbf\xe8\xbf\xd96\xac\xb6b@\xcd>\x19\xb6y4\xe5\xed\xb3<\x1a\xf0\xfb\xe1\xe1\xee3\xa8[c\xda\xd8\xa3CS\xa0\x86\x98KG,\x80o\xc0\x7f\xf3\xf0\xba"Zm\x95\xa9U#\xd2\xa3Q\xcac\xe7\x10\x8fk\xe2W\xb1;\xc5\x8eU\xaf\xd2G\x9f\xc0\t\x96\x1b\x08\xa8;\xf1y\xe1\x06\x1bi\x98\x82\xbd\x05\x93\xdbZ\xddIW{\xb8^\x1a\x92\x1bKlD\xe0\xa4\xd1\x14\xff\xb8L\\\x18\xcd\xc6\xe7\x8e\x93\xc5f\x0bz\x88\xd4f\xb2T\x17h\x1e\xe2VU\xc2\x88\xdf\t\xe2\xb0\xaes\x89V\xd0\xb4\xd6\x8c\x9a\x1e-7\\\xc2ly\xf9\xd1\xc1aj\x90\x9e\xb3\x9bLj\xa1\xe1n\xfd\x1b^\xd6Bf(\x16\xbb\x80\x85\x82k\xfa\x14\x88\xd3[\xdc\x95\xa3\x19rL\x92)\x0ba\x82Q)U|\x7fT\xa4R\x98\x93\xb3\xe3\xa7\xb8\xd0}@\xe1\xff\x07\xac\xff\xba\x99\x0ehk.\xed\r\'~\xc2\xac\x98r<\xbbUNs\xfb\xb5\xdc>\x1f\xde\xab\x0fq\xd2\xf6:\x87\xaf\xa5\xa7$\x03\xc2W\xa1' | |
In [4]: msg = json.dumps(arb) | |
--------------------------------------------------------------------------- | |
UnicodeDecodeError Traceback (most recent call last) | |
<ipython-input-4-845ccbae793b> in <module>() | |
----> 1 msg = json.dumps(arb) | |
/usr/local/Cellar/python/2.7.2/lib/python2.7/json/__init__.pyc in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, **kw) | |
229 cls is None and indent is None and separators is None and | |
230 encoding == 'utf-8' and default is None and not kw): | |
--> 231 return _default_encoder.encode(obj) | |
232 if cls is None: | |
233 cls = JSONEncoder | |
/usr/local/Cellar/python/2.7.2/lib/python2.7/json/encoder.pyc in encode(self, o) | |
193 o = o.decode(_encoding) | |
194 if self.ensure_ascii: | |
--> 195 return encode_basestring_ascii(o) | |
196 else: | |
197 return encode_basestring(o) | |
UnicodeDecodeError: 'utf8' codec can't decode byte 0x88 in position 0: invalid start byte |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dd if=/dev/urandom of=random-crap count=1000 bs=1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment