Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A Sanitize gem transformer which sanitizes any CSS in a HTML document.
check_css = lambda { |env|
node = env[:node]
node_name = env[:node_name]
# Don't continue if this node is already whitelisted or is not an element.
return if env[:is_whitelisted] || !node.element?
parent = node.parent
return unless node_name == 'style' || node['style']
if node_name == 'style'
unless good_css? node.content
node.unlink
return
end
else
unless good_css? node['style']
node.unlink
return
end
end
{:node_whitelist => [node]}
}
def good_css? text
return false if text =~ /(\w\/\/)/ # a// comment immediately following a letter
return false if text =~ /(\w\/\/*\*)/ # a/* comment immediately following a letter
return false if text =~ /(\/\*\/)/ # /*/ --> hack attempt, IMO
# Now, strip out any comments, and do some parsing.
no_comments = text.gsub(/(\/\*.*?\*\/)/, "") # filter out any /* ... */
no_comments.gsub!("\n", "")
# No backslashes allowed
evil = [
/(\bdata:\b|eval|cookie|\bwindow\b|\bparent\b|\bthis\b)/i, # suspicious javascript-type words
/behaviou?r|expression|moz-binding|@import|@charset|(java|vb)?script|[\<]|\\\w/i,
/[\<>]/, # back slash, html tags,
/[\x7f-\xff]/, # high bytes -- suspect
/[\x00-\x08\x0B\x0C\x0E-\x1F]/, #low bytes -- suspect
/&\#/, # bad charset
]
evil.each { |regex| return false if no_comments =~ regex }
true
end
@taketin

This comment has been minimized.

Copy link

@taketin taketin commented Oct 26, 2012

Hi. I'm found a bug. s/node_whitelist/whitelist_node/g fixed in my fork.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment