Created
May 23, 2009 06:59
-
-
Save comboy/116523 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# = rfc2047.rb | |
# | |
# An implementation of RFC 2047 decoding and encoding. | |
# | |
# This module depends on the iconv library by Nobuyoshi Nakada, which | |
# I've heard may be distributed as a standard part of Ruby 1.8. Many | |
# thanks to him for helping with building and using iconv. | |
# | |
# Thanks to "Josef 'Jupp' Schugt" <jupp@gmx.de> for pointing out an error | |
# with stateful character sets. | |
# | |
# Copyright (c) Sam Roberts <sroberts@uniserve.com> 2004 | |
# | |
# This file is distributed under the same terms as Ruby. | |
# | |
# See RFC2047 for documentation. | |
#-- | |
# $Id: rfc2047.rb,v 1.4 2003/04/18 20:55:56 sam Exp $ | |
#++ | |
require 'iconv' | |
# The RFC2047 module provides for the decoding (#decode_to) and encoding (#encode_to) | |
# of binary data using a Base64 or Quoted-Printable representation. | |
# | |
# == Example | |
# | |
# A simple encoding and decoding. | |
# | |
# require 'rfc2047' | |
# | |
# plain = RFC2047.decode_to('ISO8859-1', '=?iso-8859-1?q?M=E5ns_Rullg=E5rd?=') | |
# # -> 'Mĺns Rullgĺrd' | |
# plain = RFC2047.decode_to('ISO8859-1', 'Peter =?UTF-8?B?S8O2aGxtYW5u?=') | |
# # -> 'Peter Köhlmann' | |
# | |
# The purpose of using Base64 to encode data is that it translates any | |
# binary data and, or Quoted-Printable to encode any text data into | |
# purely printable characters. It is specified in RFC 2045 and RFC 2047 | |
# (http://www.faqs.org/rfcs/rfc2047.html). | |
# | |
# == To-do | |
# | |
# Method +encode_to+ is pending as yet. | |
module RFC2047 | |
module_function | |
WORD = %r{=\?([!#$\%&'*+-/0-9A-Z\\^\`a-z{|}~]+)\?([BbQq])\?([!->@-~]+)\?=} #' :nodoc: | |
WORDSEQ = %r{(#{WORD.source})\s+(?=#{WORD.source})} # :nodoc: | |
# Decodes a string, +from+, containing RFC 2047 encoded words into a | |
# target character set, +target+. Run iconv -l for information on | |
# the supported target encodings. If one of the encoded words cannot be | |
# converted to the target encoding, it is left in its encoded form. | |
def decode_to(target, from) | |
from = from.gsub(WORDSEQ, '\1') | |
out = from.gsub(WORD) do | |
|word| | |
charset, encoding, text = $1, $2, $3 | |
# B64 or QP decode, as necessary: | |
case encoding | |
when 'b', 'B' | |
#puts text | |
text = text.unpack('m*')[0] | |
#puts text.dump | |
when 'q', 'Q' | |
# RFC 2047 has a variant of quoted printable where a ' ' | |
# character can be represented as an '_', rather than =32, so | |
# convert any of these that we find before doing the QP | |
# decoding. | |
text = text.tr('_', ' ') | |
text = text.unpack('M*')[0] | |
# Don't need an else, because no other values can be matched in a | |
# WORD. | |
end | |
# Convert: | |
# | |
# Remember - Iconv.open(to, from)! | |
begin | |
text = Iconv.iconv(target, charset, text).join | |
#puts text.dump | |
rescue Errno::EINVAL, Iconv::IllegalSequence | |
# Replace with the entire matched encoded word, a NOOP. | |
text = word | |
end | |
end | |
end | |
module Deprecated # :nodoc: | |
include RFC2047 | |
for m in RFC2047.private_instance_methods(false) | |
module_eval %{ | |
def #{m}(*args) | |
warn("\#{caller(1)[0]}: #{m} is deprecated; use RFC2047.#{m} instead") | |
super | |
end | |
} | |
end | |
end | |
end | |
include RFC2047::Deprecated # :nodoc: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment