jun66j5/mime-encoding.py

## mime-encoding.py
# -*- coding: utf-8 -*-

import re
import sys
from email.header import Header, decode_header
from email.Charset import Charset, QP


MAX_LENGTH = 76

ASCII_RE = re.compile(r'[\x00-\x7f]+\Z')


def encode_header(text):
    if isinstance(text, bytes):
        text = text.decode('utf-8')
    text = re.sub(r'[\x00-\x1f]', ' ', text)
    text = text.strip()
    text = re.sub(r'\s{2,}', ' ', text)

    charset = Charset()
    charset.input_charset = 'utf-8'
    charset.output_charset = 'utf-8'
    charset.input_codec = 'utf-8'
    charset.output_codec = 'utf-8'
    charset.header_encoding = QP
    charset.body_encoding = QP

    tokens = []
    for token in re.split(r'( *[^\x21-\x7f]+ *)', text):
        if ASCII_RE.match(token):
            token = token.strip()
            if token:
                tokens.append(token)
        else:
            h = Header(token.encode('utf-8'), charset, MAX_LENGTH)
            tokens.extend(h.encode().split())
    lines = []
    buf = []
    for token in tokens:
        if sum(map(len, buf)) + len(token) > MAX_LENGTH:
            lines.append(' '.join(buf))
            buf = [token]
        else:
            buf.append(token)
    else:
        if buf:
            lines.append(' '.join(buf))
    return '\n '.join(lines)


if __name__ == '__main__':
    args = sys.argv[1:]
    if not args:
        args = [
            u'short-name',
            (u'Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed '
             u'do eiusmod tempor incididunt ut labore et dolore magna aliqua. '
             u'Ut enim ad minim veniam, quis nostrud exercitation ullamco '
             u'laboris nisi ut aliquip ex ea commodo consequat. Duis aute '
             u'irure dolor in reprehenderit in voluptate velit esse cillum '
             u'dolore eu fugiat nulla pariatur. Excepteur sint occaecat '
             u'cupidatat non proident, sunt in culpa qui officia deserunt '
             u'mollit anim id est laborum'),
            (u'Lorem ipsum dolor sit amet, conséctetur adipisicing elit, sed '
             u'do eiusmod tempor incididunt út labore et dolore magna aliqua. '
             u'Ut enim ad minim veniam, quis nostrud exercitatioń ullamco'),
            (u'ア イウ エオカ キクケコ サシスセソ タチツテトナ ニヌネノハヒフ '
             u'ヘホマミムメモヤユ ヨラリルレロワヰヱヲ'),
        ]
    for arg in args:
        encoded = encode_header(arg)
        print(encoded)
        print(''.join(v[0].decode(v[1] or 'utf-8').replace('\n ', ' ')
                      for v in decode_header(encoded))
              == arg)
        print
	# -- coding: utf-8 --

	import re
	import sys
	from email.header import Header, decode_header
	from email.Charset import Charset, QP


	MAX_LENGTH = 76

	ASCII_RE = re.compile(r'[\x00-\x7f]+\Z')


	def encode_header(text):
	if isinstance(text, bytes):
	text = text.decode('utf-8')
	text = re.sub(r'[\x00-\x1f]', ' ', text)
	text = text.strip()
	text = re.sub(r'\s{2,}', ' ', text)

	charset = Charset()
	charset.input_charset = 'utf-8'
	charset.output_charset = 'utf-8'
	charset.input_codec = 'utf-8'
	charset.output_codec = 'utf-8'
	charset.header_encoding = QP
	charset.body_encoding = QP

	tokens = []
	for token in re.split(r'( [^\x21-\x7f]+ )', text):
	if ASCII_RE.match(token):
	token = token.strip()
	if token:
	tokens.append(token)
	else:
	h = Header(token.encode('utf-8'), charset, MAX_LENGTH)
	tokens.extend(h.encode().split())
	lines = []
	buf = []
	for token in tokens:
	if sum(map(len, buf)) + len(token) > MAX_LENGTH:
	lines.append(' '.join(buf))
	buf = [token]
	else:
	buf.append(token)
	else:
	if buf:
	lines.append(' '.join(buf))
	return '\n '.join(lines)


	if __name__ == '__main__':
	args = sys.argv[1:]
	if not args:
	args = [
	u'short-name',
	(u'Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed '
	u'do eiusmod tempor incididunt ut labore et dolore magna aliqua. '
	u'Ut enim ad minim veniam, quis nostrud exercitation ullamco '
	u'laboris nisi ut aliquip ex ea commodo consequat. Duis aute '
	u'irure dolor in reprehenderit in voluptate velit esse cillum '
	u'dolore eu fugiat nulla pariatur. Excepteur sint occaecat '
	u'cupidatat non proident, sunt in culpa qui officia deserunt '
	u'mollit anim id est laborum'),
	(u'Lorem ipsum dolor sit amet, conséctetur adipisicing elit, sed '
	u'do eiusmod tempor incididunt út labore et dolore magna aliqua. '
	u'Ut enim ad minim veniam, quis nostrud exercitatioń ullamco'),
	(u'アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフ '
	u'ヘホマミムメモヤユヨラリルレロワヰヱヲ'),
	]
	for arg in args:
	encoded = encode_header(arg)
	print(encoded)
	print(''.join(v[0].decode(v[1] or 'utf-8').replace('\n ', ' ')
	for v in decode_header(encoded))
	== arg)
	print