Last active
January 4, 2017 02:01
-
-
Save Evolution0/cca774249c60afa1a571938b3f5527f5 to your computer and use it in GitHub Desktop.
Convert embedded JavaScript dictionary to JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import demjson | |
import re | |
class BandcampJSON: | |
def __init__(self, body, var_name: str, js_data=None): | |
self.body = body | |
self.var_name = var_name | |
self.js_data = js_data | |
def get_js(self) -> str: | |
""" | |
Get <script> element containing the data we need and return the raw JS | |
:return js_data: Raw JS as str | |
""" | |
self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.var_name)).string | |
return self.js_data | |
def extract_data(self, js: str) -> str: | |
""" | |
Extract values from JS dictionary | |
:param js: Raw JS | |
:return: Contents of dictionary as str | |
""" | |
self.js_data = re.search(r"(?<=var\s" + self.var_name + "\s=\s)[^;]*", js).group().replace('" + "', '') | |
return self.js_data | |
def js_to_json(self) -> str: | |
""" | |
Convert JavaScript dictionary to JSON | |
:return: JSON as str | |
""" | |
js = self.get_js() | |
data = self.extract_data(js) | |
# Decode with demjson first to reformat keys and lists | |
js_data = demjson.decode(data) | |
# Encode to make valid JSON | |
js_data = demjson.encode(js_data) | |
return js_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment