Skip to content

Instantly share code, notes, and snippets.

@evansd
Last active August 29, 2015 14:01
Show Gist options
  • Save evansd/41ea9dfc90d87f6afde1 to your computer and use it in GitHub Desktop.
Save evansd/41ea9dfc90d87f6afde1 to your computer and use it in GitHub Desktop.
from __future__ import absolute_import
import json
import re
from django import template
from django.utils.html import format_html_join
register = template.Library()
# Note: this is much more restrictive than the actual rules for
# JavaScript variables, but we prefer to err on the side of
# caution here
JS_VARIABLE_RE = re.compile(r'^[a-z_\$][0-9a-z_\$]*$', re.IGNORECASE)
# Use a regex to do character replacements so we can do them in a single pass
REPLACEMENTS = {'<': r'\u003c', '&': r'\u0026', '>': r'\u003e'}
REPLACE_RE = re.compile('|'.join(map(re.escape, REPLACEMENTS.keys())))
def replace(match):
return REPLACEMENTS[match.group(0)]
def escape_json_for_script(json_string):
"""
Escape a JSON string so that it can be safely included in a `<script>`
element in an HTML or XHTML document
Escaped sequences are based on the recommendations here:
http://www.w3.org/TR/html5/scripting-1.html#restrictions-for-contents-of-script-elements
https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet
< : should be escaped in all contexts
& : only relevant for XHTML documents
> : only relevant within CDATA blocks or HTML comments
As these sequences can only occur within strings in JSON, we can safely replace
them with unicode expressions.
"""
return REPLACE_RE.sub(replace, json_string)
@register.simple_tag(name='json')
def json_encode(data, var=None, indent=None, **kwargs):
"""
Embed `data` as JSON encoded string with a `<script>` element of type
`application/json` or, if 'var' is supplied, as a global JavaScript
variable
Any extra keyword arguments are added as attributes on the element
(underscores in keyword names are replaced with dashes)
"""
encoded = json.dumps(data, indent=indent)
safe_encoded = escape_json_for_script(encoded)
attrs = {key.replace('_', '-'): value for (key, value) in kwargs.items()}
if var is not None:
if not JS_VARIABLE_RE.match(var):
raise ValueError(u'Invalid JavaScript variable name: %s' % var)
attrs['type'] = 'application/javascript'
template = u'<script {attrs}>var {var} = {data};</script>'
else:
attrs['type'] = 'application/json'
template = u'<script {attrs}>{data}</script>'
return template.format(
attrs=format_html_join(u' ', u'{0}="{1}"', attrs.items()),
data=safe_encoded,
var=var)
@beniwohli
Copy link

@evansd is there a reason you switched from unicode escapes to hex escapes? I'm getting errors like this with the hex escapes

SyntaxError: JSON.parse: bad escaped character 

When I use unicode escapes, everything seems to work

@evansd
Copy link
Author

evansd commented Feb 25, 2015

@piquadrat You're absolutely right. Hex escapes aren't valid JSON, although they are valid JavaScript, so if you're "parsing" the JSON by interpreting it as JavaScript then it happens to work.

Thanks for spotting this!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment