comboy/gist:116523

## gistfile1.rb
# = rfc2047.rb
#
# An implementation of RFC 2047 decoding and encoding.
#
# This module depends on the iconv library by Nobuyoshi Nakada, which
# I've  heard may be distributed as a standard part of Ruby 1.8. Many
# thanks to him for helping with building and using iconv.
#
# Thanks to "Josef 'Jupp' Schugt" <jupp@gmx.de> for pointing out an error
# with stateful character sets.
#
# Copyright (c) Sam Roberts <sroberts@uniserve.com> 2004
#
# This file is distributed under the same terms as Ruby.
#
# See RFC2047 for documentation.
#--
# $Id: rfc2047.rb,v 1.4 2003/04/18 20:55:56 sam Exp $
#++

require 'iconv'

# The RFC2047 module provides for the decoding (#decode_to) and encoding (#encode_to)
# of binary data using a Base64 or Quoted-Printable representation.
#
# == Example
#
# A simple encoding and decoding.
#
#     require 'rfc2047'
#
#     plain = RFC2047.decode_to('ISO8859-1', '=?iso-8859-1?q?M=E5ns_Rullg=E5rd?=')
#                         # -> 'Mĺns Rullgĺrd'
#     plain = RFC2047.decode_to('ISO8859-1', 'Peter =?UTF-8?B?S8O2aGxtYW5u?=')
#                         # -> 'Peter Köhlmann'
#
# The purpose of using Base64 to encode data is that it translates any
# binary data and, or Quoted-Printable to encode any text data into
# purely printable characters.  It is specified in RFC 2045 and RFC 2047
# (http://www.faqs.org/rfcs/rfc2047.html).
#
# == To-do
#
# Method +encode_to+ is pending as yet.

module RFC2047
  module_function

  WORD = %r{=\?([!#$\%&'*+-/0-9A-Z\\^\`a-z{|}~]+)\?([BbQq])\?([!->@-~]+)\?=} #' :nodoc:
  WORDSEQ = %r{(#{WORD.source})\s+(?=#{WORD.source})} # :nodoc:

  # Decodes a string, +from+, containing RFC 2047 encoded words into a
  # target character set, +target+. Run iconv -l for information on
  # the supported target encodings. If one of the encoded words cannot be
  # converted to the target encoding, it is left in its encoded form.
  def decode_to(target, from)
    from = from.gsub(WORDSEQ, '\1')
    out = from.gsub(WORD) do
      |word|
      charset, encoding, text = $1, $2, $3

      # B64 or QP decode, as necessary:
      case encoding
        when 'b', 'B'
          #puts text
          text = text.unpack('m*')[0]
          #puts text.dump

        when 'q', 'Q'
          # RFC 2047 has a variant of quoted printable where a ' '
          # character can be represented as an '_', rather than =32, so
          # convert any of these that we find before doing the QP
          # decoding.
          text = text.tr('_', ' ')
          text = text.unpack('M*')[0]

          # Don't need an else, because no other values can be matched in a
          # WORD.
      end

      # Convert:
      #
      # Remember - Iconv.open(to, from)!
      begin
          text = Iconv.iconv(target, charset, text).join
          #puts text.dump
        rescue Errno::EINVAL, Iconv::IllegalSequence
          # Replace with the entire matched encoded word, a NOOP.
          text = word
      end
    end
  end

  module Deprecated  # :nodoc:
    include RFC2047

    for m in RFC2047.private_instance_methods(false)
      module_eval %{
        def #{m}(*args)
          warn("\#{caller(1)[0]}: #{m} is deprecated; use RFC2047.#{m} instead")
          super
        end
      }
    end
  end

end

include RFC2047::Deprecated  # :nodoc:
	# = rfc2047.rb
	#
	# An implementation of RFC 2047 decoding and encoding.
	#
	# This module depends on the iconv library by Nobuyoshi Nakada, which
	# I've heard may be distributed as a standard part of Ruby 1.8. Many
	# thanks to him for helping with building and using iconv.
	#
	# Thanks to "Josef 'Jupp' Schugt" <jupp@gmx.de> for pointing out an error
	# with stateful character sets.
	#
	# Copyright (c) Sam Roberts <sroberts@uniserve.com> 2004
	#
	# This file is distributed under the same terms as Ruby.
	#
	# See RFC2047 for documentation.
	#--
	# $Id: rfc2047.rb,v 1.4 2003/04/18 20:55:56 sam Exp $
	#++

	require 'iconv'

	# The RFC2047 module provides for the decoding (#decode_to) and encoding (#encode_to)
	# of binary data using a Base64 or Quoted-Printable representation.
	#
	# == Example
	#
	# A simple encoding and decoding.
	#
	# require 'rfc2047'
	#
	# plain = RFC2047.decode_to('ISO8859-1', '=?iso-8859-1?q?M=E5ns_Rullg=E5rd?=')
	# # -> 'Mĺns Rullgĺrd'
	# plain = RFC2047.decode_to('ISO8859-1', 'Peter =?UTF-8?B?S8O2aGxtYW5u?=')
	# # -> 'Peter Köhlmann'
	#
	# The purpose of using Base64 to encode data is that it translates any
	# binary data and, or Quoted-Printable to encode any text data into
	# purely printable characters. It is specified in RFC 2045 and RFC 2047
	# (http://www.faqs.org/rfcs/rfc2047.html).
	#
	# == To-do
	#
	# Method +encode_to+ is pending as yet.

	module RFC2047
	module_function

	WORD = %r{=\?([!#$\%&'*+-/0-9A-Z\\^\`a-z{\|}~]+)\?([BbQq])\?([!->@-~]+)\?=} #' :nodoc:
	WORDSEQ = %r{(#{WORD.source})\s+(?=#{WORD.source})} # :nodoc:

	# Decodes a string, +from+, containing RFC 2047 encoded words into a
	# target character set, +target+. Run iconv -l for information on
	# the supported target encodings. If one of the encoded words cannot be
	# converted to the target encoding, it is left in its encoded form.
	def decode_to(target, from)
	from = from.gsub(WORDSEQ, '\1')
	out = from.gsub(WORD) do
	\|word\|
	charset, encoding, text = $1, $2, $3

	# B64 or QP decode, as necessary:
	case encoding
	when 'b', 'B'
	#puts text
	text = text.unpack('m*')[0]
	#puts text.dump

	when 'q', 'Q'
	# RFC 2047 has a variant of quoted printable where a ' '
	# character can be represented as an '_', rather than =32, so
	# convert any of these that we find before doing the QP
	# decoding.
	text = text.tr('_', ' ')
	text = text.unpack('M*')[0]

	# Don't need an else, because no other values can be matched in a
	# WORD.
	end

	# Convert:
	#
	# Remember - Iconv.open(to, from)!
	begin
	text = Iconv.iconv(target, charset, text).join
	#puts text.dump
	rescue Errno::EINVAL, Iconv::IllegalSequence
	# Replace with the entire matched encoded word, a NOOP.
	text = word
	end
	end
	end

	module Deprecated # :nodoc:
	include RFC2047

	for m in RFC2047.private_instance_methods(false)
	module_eval %{
	def #{m}(*args)
	warn("\#{caller(1)[0]}: #{m} is deprecated; use RFC2047.#{m} instead")
	super
	end
	}
	end
	end

	end

	include RFC2047::Deprecated # :nodoc: