Skip to content

Instantly share code, notes, and snippets.

@mondeja
Last active February 3, 2018 23:09
Show Gist options
  • Save mondeja/a8215d34ee1c0c850d7b3f64ab6b2260 to your computer and use it in GitHub Desktop.
Save mondeja/a8215d34ee1c0c850d7b3f64ab6b2260 to your computer and use it in GitHub Desktop.
Comparation parsing broken JSON text with json decoder and re modules.
from re import sub
from timeit import timeit
from json import loads
from requests import get
def test(res):
return type(res) == list and \
type(res[0]) == dict and \
type(res[0]["last_updated"]) == int
url = "https://api.coinmarketcap.com/v1/ticker/" # Bad formatted JSON
# Parsing raw text response
raw_text = loads(sub(r'"(-*\d+(?:\.\d+)?)"', r"\1", get(url).text))
# Parsing JSON responses
raw_json_1 = get(url).json()
raw_json_2 = loads(get(url).text)
responses = [
{"title": "Parsed with: re.sub()",
"response": raw_text,
"setup": "from re import sub;from json import loads;",
"code": "loads(sub(r'" + '"(-*\d+(?:\.\d+)?)"' + "', r'" + "\\1', get(url).text))"},
{"title": "Parsed with: requests.get(...).json()",
"response": raw_json_1,
"setup": "",
"code": "get(url).json()"},
{"title": "Parsed with: json.loads()",
"response": raw_json_2,
"setup": "from json import loads",
"code": "loads(get(url).text)"}
]
print("\n============================================\n")
for res in responses:
print("\t%s" % res["title"])
print("\n>>> %s" % res["code"])
print("%s ..." % str(res["response"])[0:200])
result = test(res["response"])
print("\n--------> Test %s! <--------" % ("passed" if result else "failed"))
bench = timeit(res["code"],
setup="url='%s';from requests import get;%s" % (url, res["setup"]),
number=1)
print("Benchmark: %f" % bench)
print("\n============================================\n")
""" Output:
============================================
Parsed with: re.sub()
>>> loads(sub(r'"(\d+(?:\.\d+)?)"', r'\1', get(url).text))
[{'id': 'bitcoin', 'name': 'Bitcoin', 'symbol': 'BTC', 'rank': 1, 'price_usd': 9308.99, 'price_btc': 1.0, '24h_volume_usd': 7456650000.0, 'market_cap_usd': 156795386599, 'available_supply': 16843437.0 ...
--------> Test passed! <--------
Benchmark: 0.151925
============================================
Parsed with: requests.get(...).json()
>>> get(url).json()
[{'id': 'bitcoin', 'name': 'Bitcoin', 'symbol': 'BTC', 'rank': '1', 'price_usd': '9308.99', 'price_btc': '1.0', '24h_volume_usd': '7456650000.0', 'market_cap_usd': '156795386599', 'available_supply': ...
--------> Test failed! <--------
Benchmark: 0.136815
============================================
Parsed with: json.loads()
>>> loads(get(url).text)
[{'id': 'bitcoin', 'name': 'Bitcoin', 'symbol': 'BTC', 'rank': '1', 'price_usd': '9308.99', 'price_btc': '1.0', '24h_volume_usd': '7456650000.0', 'market_cap_usd': '156795386599', 'available_supply': ...
--------> Test failed! <--------
Benchmark: 0.162506
============================================
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment