Skip to content

Instantly share code, notes, and snippets.

@itochan
Created June 20, 2011 19:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save itochan/1036353 to your computer and use it in GitHub Desktop.
Save itochan/1036353 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
class CGI
@@accept_charset="UTF-8" unless defined?(@@accept_charset)
# URL-encode a string.
# url_encoded_string = CGI::escape("'Stop!' said Fred")
# # => "%27Stop%21%27+said+Fred"
def CGI::escape(string)
string.gsub(/([^ a-zA-Z0-9_.-]+)/) do
'%' + $1.unpack('H2' * $1.bytesize).join('%').upcase
end.tr(' ', '+')
end
# URL-decode a string with encoding(optional).
# string = CGI::unescape("%27Stop%21%27+said+Fred")
# # => "'Stop!' said Fred"
def CGI::unescape(string,encoding=@@accept_charset)
str=string.tr('+', ' ').force_encoding(Encoding::ASCII_8BIT).gsub(/((?:%[0-9a-fA-F]{2})+)/) do
[$1.delete('%')].pack('H*')
end.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end
# The set of special characters and their escaped values
TABLE_FOR_ESCAPE_HTML__ = {
'"' => '"',
'\'' => ''',
'&' => '&',
'<' => '&lt;',
'>' => '&gt;',
' ' => '&nbsp;',
'¡' => '&iexcl;',
'¢' => '&cent;',
'£' => '&pound;',
'¤' => '&curren;',
'¥' => '&yen;',
'¦' => '&brvbar;',
'§' => '&sect;',
'¨' => '&uml;',
'©' => '&copy;',
'ª' => '&ordf;',
'«' => '&laquo;',
'¬' => '&not;',
'®' => '&reg;',
'¯' => '&macr;',
'°' => '&deg;',
'±' => '&plusmn;',
'²' => '&sup2;',
'³' => '&sup3;',
'´' => '&acute;',
'µ' => '&micro;',
'¶' => '&para;',
'·' => '&middot;',
'¸' => '&cedil;',
'¹' => '&sup1;',
'º' => '&ordm;',
'»' => '&raquo;',
'¼' => '&frac14;',
'½' => '&frac12;',
'¾' => '&frac34;',
'¿' => '&iquest;',
'×' => '&times;',
'÷' => '&divide;',
'À' => '&Agrave;',
'Á' => '&Aacute;',
'Â' => '&Acirc;',
'Ã' => '&Atilde;',
'Ä' => '&Auml;',
'Å' => '&Aring;',
'Æ' => '&AElig;',
'Ç' => '&Ccedil;',
'È' => '&Egrave;',
'É' => '&Eacute;',
'Ê' => '&Ecirc;',
'Ë' => '&Euml;',
'Ì' => '&Igrave;',
'Í' => '&Iacute;',
'Î' => '&Icirc;',
'Ï' => '&Iuml;',
'Ð' => '&ETH;',
'Ñ' => '&Ntilde;',
'Ò' => '&Ograve;',
'Ó' => '&Oacute;',
'Ô' => '&Ocirc;',
'Õ' => '&Otilde;',
'Ö' => '&Ouml;',
'Ø' => '&Oslash;',
'Ù' => '&Ugrave;',
'Ú' => '&Uacute;',
'Û' => '&Ucirc;',
'Ü' => '&Uuml;',
'Ý' => '&Yacute;',
'Þ' => '&THORN;',
'ß' => '&szlig;',
'à' => '&agrave;',
'á' => '&aacute;',
'â' => '&acirc;',
'ã' => '&atilde;',
'ä' => '&auml;',
'å' => '&aring;',
'æ' => '&aelig;',
'ç' => '&ccedil;',
'è' => '&egrave;',
'é' => '&eacute;',
'ê' => '&ecirc;',
'ë' => '&euml;',
'ì' => '&igrave;',
'í' => '&iacute;',
'î' => '&icirc;',
'ï' => '&iuml;',
'ð' => '&eth;',
'ñ' => '&ntilde;',
'ò' => '&ograve;',
'ó' => '&oacute;',
'ô' => '&ocirc;',
'õ' => '&otilde;',
'ö' => '&ouml;',
'ø' => '&oslash;',
'ù' => '&ugrave;',
'ú' => '&uacute;',
'û' => '&ucirc;',
'ü' => '&uuml;',
'ý' => '&yacute;',
'þ' => '&thorn;',
'ÿ' => '&yuml;',
'∀' => '&forall;',
'∂' => '&part;',
'∃' => '&exist;',
'∅' => '&empty;',
'∇' => '&nabla;',
'∈' => '&isin;',
'∉' => '&notin;',
'∋' => '&ni;',
'∏' => '&prod;',
'∑' => '&sum;',
'−' => '&minus;',
'∗' => '&lowast;',
'√' => '&radic;',
'∝' => '&prop;',
'∞' => '&infin;',
'∠' => '&ang;',
'∧' => '&and;',
'∨' => '&or;',
'∩' => '&cap;',
'∪' => '&cup;',
'∫' => '&int;',
'∴' => '&there4;',
'∼' => '&sim;',
'≅' => '&cong;',
'≈' => '&asymp;',
'≠' => '&ne;',
'≡' => '&equiv;',
'≤' => '&le;',
'≥' => '&ge;',
'⊂' => '&sub;',
'⊃' => '&sup;',
'⊄' => '&nsub;',
'⊆' => '&sube;',
'⊇' => '&supe;',
'⊕' => '&oplus;',
'⊗' => '&otimes;',
'⊥' => '&perp;',
'⋅' => '&sdot;',
'Α' => '&Alpha;',
'Β' => '&Beta;',
'Γ' => '&Gamma;',
'Δ' => '&Delta;',
'Ε' => '&Epsilon;',
'Ζ' => '&Zeta;',
'Η' => '&Eta;',
'Θ' => '&Theta;',
'Ι' => '&Iota;',
'Κ' => '&Kappa;',
'Λ' => '&Lambda;',
'Μ' => '&Mu;',
'Ν' => '&Nu;',
'Ξ' => '&Xi;',
'Ο' => '&Omicron;',
'Π' => '&Pi;',
'Ρ' => '&Rho;',
'Σ' => '&Sigma;',
'Τ' => '&Tau;',
'Υ' => '&Upsilon;',
'Φ' => '&Phi;',
'Χ' => '&Chi;',
'Ψ' => '&Psi;',
'Ω' => '&Omega;',
'α' => '&alpha;',
'β' => '&beta;',
'γ' => '&gamma;',
'δ' => '&delta;',
'ε' => '&epsilon;',
'ζ' => '&zeta;',
'η' => '&eta;',
'θ' => '&theta;',
'ι' => '&iota;',
'κ' => '&kappa;',
'λ' => '&lambda;',
'μ' => '&mu;',
'ν' => '&nu;',
'ξ' => '&xi;',
'ο' => '&omicron;',
'π' => '&pi;',
'ρ' => '&rho;',
'ς' => '&sigmaf;',
'σ' => '&sigma;',
'τ' => '&tau;',
'υ' => '&upsilon;',
'φ' => '&phi;',
'χ' => '&chi;',
'ψ' => '&psi;',
'ω' => '&omega;',
'ϑ' => '&thetasym;',
'ϒ' => '&upsih;',
'ϖ' => '&piv;',
'Œ' => '&OElig;',
'œ' => '&oelig;',
'Š' => '&Scaron;',
'š' => '&scaron;',
'Ÿ' => '&Yuml;',
'ƒ' => '&fnof;',
'ˆ' => '&circ;',
'˜' => '&tilde;',
'–' => '&ndash;',
'—' => '&mdash;',
'‘' => '&lsquo;',
'’' => '&rsquo;',
'‚' => '&sbquo;',
'“' => '&ldquo;',
'”' => '&rdquo;',
'„' => '&bdquo;',
'†' => '&dagger;',
'‡' => '&Dagger;',
'•' => '&bull;',
'…' => '&hellip;',
'‰' => '&permil;',
'′' => '&prime;',
'″' => '&Prime;',
'‹' => '&lsaquo;',
'›' => '&rsaquo;',
'‾' => '&oline;',
'€' => '&euro;',
'™' => '&trade;',
'←' => '&larr;',
'↑' => '&uarr;',
'→' => '&rarr;',
'↓' => '&darr;',
'↔' => '&harr;',
'↵' => '&crarr;',
'⌈' => '&lceil;',
'⌉' => '&rceil;',
'⌊' => '&lfloor;',
'⌋' => '&rfloor;',
'◊' => '&loz;',
'♠' => '&spades;',
'♣' => '&clubs;',
'♥' => '&hearts;',
'♦' => '&diams;',
}
TABLE_FOR_UNESCAPE_HTML__ = Hash[TABLE_FOR_ESCAPE_HTML__.map{|k, v| [v, k] }]
# Escape special characters in HTML, namely &\"<>
# CGI::escapeHTML('Usage: foo "bar" <baz>')
# # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
def CGI::escapeHTML(string)
string.gsub(/[&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
end
# Unescape a string that has been HTML-escaped
# CGI::unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
# # => "Usage: foo \"bar\" <baz>"
def CGI::unescapeHTML(string)
enc = string.encoding
if [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
return string.gsub(Regexp.new('&([0-9A-Za-z]+|\#[0-9]+|\#x[0-9A-Fa-f]+);'.encode(enc))) do
case $1.encode("US-ASCII")
when /\\A#0*(\\d+)\\z/ then $1.to_i.chr(enc)
when /\\A#x([0-9a-f]+)\\z/i then $1.hex.chr(enc)
else
TABLE_FOR_UNESCAPE_HTML__["&" + $1 + ";"]
end
end
end
asciicompat = Encoding.compatible?(string, "a")
string.gsub(/&([0-9A-Za-z]+|\#[0-9]+|\#x[0-9A-Fa-f]+);/) do
match = $1.dup
case match
when /[a-zA-Z0-9]+/
if enc == Encoding::UTF_8 or
TABLE_FOR_UNESCAPE_HTML__['&' + $1 + ';']
when /\A#0*(\d+)\z/
n = $1.to_i
if enc == Encoding::UTF_8 or
enc == Encoding::ISO_8859_1 && n < 256 or
asciicompat && n < 128
n.chr(enc)
else
"&##{$1};"
end
when /\A#x([0-9a-f]+)\z/i
n = $1.hex
if enc == Encoding::UTF_8 or
enc == Encoding::ISO_8859_1 && n < 256 or
asciicompat && n < 128
n.chr(enc)
else
"&#x#{$1};"
end
else
"&#{match};"
end
end
end
# Synonym for CGI::escapeHTML(str)
def CGI::escape_html(str)
escapeHTML(str)
end
# Synonym for CGI::unescapeHTML(str)
def CGI::unescape_html(str)
unescapeHTML(str)
end
# Escape only the tags of certain HTML elements in +string+.
#
# Takes an element or elements or array of elements. Each element
# is specified by the name of the element, without angle brackets.
# This matches both the start and the end tag of that element.
# The attribute list of the open tag will also be escaped (for
# instance, the double-quotes surrounding attribute values).
#
# print CGI::escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
# # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
#
# print CGI::escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
# # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
def CGI::escapeElement(string, *elements)
elements = elements[0] if elements[0].kind_of?(Array)
unless elements.empty?
string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do
CGI::escapeHTML($&)
end
else
string
end
end
# Undo escaping such as that done by CGI::escapeElement()
#
# print CGI::unescapeElement(
# CGI::escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
# # "&lt;BR&gt;<A HREF="url"></A>"
#
# print CGI::unescapeElement(
# CGI::escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
# # "&lt;BR&gt;<A HREF="url"></A>"
def CGI::unescapeElement(string, *elements)
elements = elements[0] if elements[0].kind_of?(Array)
unless elements.empty?
string.gsub(/&lt;\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?&gt;/i) do
CGI::unescapeHTML($&)
end
else
string
end
end
# Synonym for CGI::escapeElement(str)
def CGI::escape_element(str)
escapeElement(str)
end
# Synonym for CGI::unescapeElement(str)
def CGI::unescape_element(str)
unescapeElement(str)
end
# Abbreviated day-of-week names specified by RFC 822
RFC822_DAYS = %w[ Sun Mon Tue Wed Thu Fri Sat ]
# Abbreviated month names specified by RFC 822
RFC822_MONTHS = %w[ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ]
# Format a +Time+ object as a String using the format specified by RFC 1123.
#
# CGI::rfc1123_date(Time.now)
# # Sat, 01 Jan 2000 00:00:00 GMT
def CGI::rfc1123_date(time)
t = time.clone.gmtime
return format("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT",
RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year,
t.hour, t.min, t.sec)
end
# Prettify (indent) an HTML string.
#
# +string+ is the HTML string to indent. +shift+ is the indentation
# unit to use; it defaults to two spaces.
#
# print CGI::pretty("<HTML><BODY></BODY></HTML>")
# # <HTML>
# # <BODY>
# # </BODY>
# # </HTML>
#
# print CGI::pretty("<HTML><BODY></BODY></HTML>", "\t")
# # <HTML>
# # <BODY>
# # </BODY>
# # </HTML>
#
def CGI::pretty(string, shift = " ")
lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n")
end_pos = 0
while end_pos = lines.index(/^<\/(\w+)/, end_pos)
element = $1.dup
start_pos = lines.rindex(/^\s*<#{element}/i, end_pos)
lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__"
end
lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment