Skip to content

Instantly share code, notes, and snippets.

@jun66j5
Created January 8, 2020 03:51
Show Gist options
  • Save jun66j5/ebfe88700ea67ec55eba91bddcf4e27a to your computer and use it in GitHub Desktop.
Save jun66j5/ebfe88700ea67ec55eba91bddcf4e27a to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import re
import sys
from email.header import Header, decode_header
from email.Charset import Charset, QP
MAX_LENGTH = 76
ASCII_RE = re.compile(r'[\x00-\x7f]+\Z')
def encode_header(text):
if isinstance(text, bytes):
text = text.decode('utf-8')
text = re.sub(r'[\x00-\x1f]', ' ', text)
text = text.strip()
text = re.sub(r'\s{2,}', ' ', text)
charset = Charset()
charset.input_charset = 'utf-8'
charset.output_charset = 'utf-8'
charset.input_codec = 'utf-8'
charset.output_codec = 'utf-8'
charset.header_encoding = QP
charset.body_encoding = QP
tokens = []
for token in re.split(r'( *[^\x21-\x7f]+ *)', text):
if ASCII_RE.match(token):
token = token.strip()
if token:
tokens.append(token)
else:
h = Header(token.encode('utf-8'), charset, MAX_LENGTH)
tokens.extend(h.encode().split())
lines = []
buf = []
for token in tokens:
if sum(map(len, buf)) + len(token) > MAX_LENGTH:
lines.append(' '.join(buf))
buf = [token]
else:
buf.append(token)
else:
if buf:
lines.append(' '.join(buf))
return '\n '.join(lines)
if __name__ == '__main__':
args = sys.argv[1:]
if not args:
args = [
u'short-name',
(u'Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed '
u'do eiusmod tempor incididunt ut labore et dolore magna aliqua. '
u'Ut enim ad minim veniam, quis nostrud exercitation ullamco '
u'laboris nisi ut aliquip ex ea commodo consequat. Duis aute '
u'irure dolor in reprehenderit in voluptate velit esse cillum '
u'dolore eu fugiat nulla pariatur. Excepteur sint occaecat '
u'cupidatat non proident, sunt in culpa qui officia deserunt '
u'mollit anim id est laborum'),
(u'Lorem ipsum dolor sit amet, conséctetur adipisicing elit, sed '
u'do eiusmod tempor incididunt út labore et dolore magna aliqua. '
u'Ut enim ad minim veniam, quis nostrud exercitatioń ullamco'),
(u'ア イウ エオカ キクケコ サシスセソ タチツテトナ ニヌネノハヒフ '
u'ヘホマミムメモヤユ ヨラリルレロワヰヱヲ'),
]
for arg in args:
encoded = encode_header(arg)
print(encoded)
print(''.join(v[0].decode(v[1] or 'utf-8').replace('\n ', ' ')
for v in decode_header(encoded))
== arg)
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment