Created May 23, 2009 06:59
# = rfc2047.rb
# An implementation of RFC 2047 decoding and encoding.
# This module depends on the iconv library by Nobuyoshi Nakada, which
# I've heard may be distributed as a standard part of Ruby 1.8. Many
# thanks to him for helping with building and using iconv.
# Thanks to "Josef 'Jupp' Schugt" <> for pointing out an error
# with stateful character sets.
# Copyright (c) Sam Roberts <> 2004
# This file is distributed under the same terms as Ruby.
# See RFC2047 for documentation.
# $Id: rfc2047.rb,v 1.4 2003/04/18 20:55:56 sam Exp $
require 'iconv'
# The RFC2047 module provides for the decoding (#decode_to) and encoding (#encode_to)
# of binary data using a Base64 or Quoted-Printable representation.
# == Example
# A simple encoding and decoding.
# require 'rfc2047'
# plain = RFC2047.decode_to('ISO8859-1', '=?iso-8859-1?q?M=E5ns_Rullg=E5rd?=')
# # -> 'Mĺns Rullgĺrd'
# plain = RFC2047.decode_to('ISO8859-1', 'Peter =?UTF-8?B?S8O2aGxtYW5u?=')
# # -> 'Peter Köhlmann'
# The purpose of using Base64 to encode data is that it translates any
# binary data and, or Quoted-Printable to encode any text data into
# purely printable characters. It is specified in RFC 2045 and RFC 2047
# (
# == To-do
# Method +encode_to+ is pending as yet.
module RFC2047
WORD = %r{=\?([!#$\%&'*+-/0-9A-Z\\^\`a-z{|}~]+)\?([BbQq])\?([!->@-~]+)\?=} #' :nodoc:
WORDSEQ = %r{(#{WORD.source})\s+(?=#{WORD.source})} # :nodoc:
# Decodes a string, +from+, containing RFC 2047 encoded words into a
# target character set, +target+. Run iconv -l for information on
# the supported target encodings. If one of the encoded words cannot be
# converted to the target encoding, it is left in its encoded form.
def decode_to(target, from)
from = from.gsub(WORDSEQ, '\1')
out = from.gsub(WORD) do
charset, encoding, text = $1, $2, $3
# B64 or QP decode, as necessary:
case encoding
when 'b', 'B'
#puts text
text = text.unpack('m*')[0]
#puts text.dump
when 'q', 'Q'
# RFC 2047 has a variant of quoted printable where a ' '
# character can be represented as an '_', rather than =32, so
# convert any of these that we find before doing the QP
# decoding.
text ='_', ' ')
text = text.unpack('M*')[0]
# Don't need an else, because no other values can be matched in a
# Convert:
# Remember -, from)!
text = Iconv.iconv(target, charset, text).join
#puts text.dump
rescue Errno::EINVAL, Iconv::IllegalSequence
# Replace with the entire matched encoded word, a NOOP.
text = word
module Deprecated # :nodoc:
include RFC2047
for m in RFC2047.private_instance_methods(false)
module_eval %{
def #{m}(*args)
warn("\#{caller(1)[0]}: #{m} is deprecated; use RFC2047.#{m} instead")
include RFC2047::Deprecated # :nodoc:
