Last active
August 29, 2015 14:28
-
-
Save upbit/d5c8dcfacdf1ec0b1337 to your computer and use it in GitHub Desktop.
使用raw_unicode_escape解析转义过的unicode字符串
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- # | |
import sys | |
reload(sys) | |
sys.setdefaultencoding("utf-8") | |
import operator | |
import urllib | |
import json | |
# Copyright: Liao Xuefeng (https://github.com/michaelliao/sinaweibopy/blob/master/weibo.py) | |
class JsonDict(dict): | |
"""general json object that allows attributes to be bound to and also behaves like a dict""" | |
def __getattr__(self, attr): | |
try: | |
return self[attr] | |
except KeyError: | |
raise AttributeError(r"'JsonDict' object has no attribute '%s'" % attr) | |
def __setattr__(self, attr, value): | |
self[attr] = value | |
def parse_json(s): | |
"""parse str into JsonDict""" | |
def _obj_hook(pairs): | |
"""convert json object to python object""" | |
o = JsonDict() | |
for k, v in pairs.iteritems(): | |
o[str(k)] = v | |
return o | |
return json.loads(s, object_hook=_obj_hook) | |
def main(): | |
for line in file(sys.argv[1]): | |
try: | |
json_data = parse_json(line) | |
print json_data.text.decode('raw_unicode_escape') | |
except: | |
print "[ERROR]", line | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment