Skip to content

Instantly share code, notes, and snippets.

Last active February 13, 2020 20:23
Show Gist options
  • Save Thomas-Rosenkrans-Vestergaard/524dfc5d4d922226631ebe89943914c6 to your computer and use it in GitHub Desktop.
Save Thomas-Rosenkrans-Vestergaard/524dfc5d4d922226631ebe89943914c6 to your computer and use it in GitHub Desktop.
import re
import subprocess
regex = """<script>var[\s]+profile[\s]*=[\s]*({[^å]+)(?=</script>)"""
test_str = ("""
<script>var profile = {"env":null,
"matrixData":[{"label":"Mærke ","value":"Xxxxx"},
"text":"2.139 kr."}},
"channel":"xxxx", "target":"_blank","adLayout":"xxxx"}},
def to_json(javascript):
javascript = javascript.rstrip(' ;\n\r')
cmd = ['node', '-e', """console.log(JSON.stringify(""" + javascript + """))"""]
out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True)
except subprocess.CalledProcessError as exc:
# dette var ikke korrekt javascript
return (False, exc.output)
# dette var korrekt javascript
return (True, out)
matches = re.finditer(regex, test_str, re.MULTILINE | re.DOTALL)
for matchNum, match in enumerate(matches, start=1):
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
output = to_json(
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment