Created
January 8, 2020 03:51
-
-
Save jun66j5/ebfe88700ea67ec55eba91bddcf4e27a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
import sys | |
from email.header import Header, decode_header | |
from email.Charset import Charset, QP | |
MAX_LENGTH = 76 | |
ASCII_RE = re.compile(r'[\x00-\x7f]+\Z') | |
def encode_header(text): | |
if isinstance(text, bytes): | |
text = text.decode('utf-8') | |
text = re.sub(r'[\x00-\x1f]', ' ', text) | |
text = text.strip() | |
text = re.sub(r'\s{2,}', ' ', text) | |
charset = Charset() | |
charset.input_charset = 'utf-8' | |
charset.output_charset = 'utf-8' | |
charset.input_codec = 'utf-8' | |
charset.output_codec = 'utf-8' | |
charset.header_encoding = QP | |
charset.body_encoding = QP | |
tokens = [] | |
for token in re.split(r'( *[^\x21-\x7f]+ *)', text): | |
if ASCII_RE.match(token): | |
token = token.strip() | |
if token: | |
tokens.append(token) | |
else: | |
h = Header(token.encode('utf-8'), charset, MAX_LENGTH) | |
tokens.extend(h.encode().split()) | |
lines = [] | |
buf = [] | |
for token in tokens: | |
if sum(map(len, buf)) + len(token) > MAX_LENGTH: | |
lines.append(' '.join(buf)) | |
buf = [token] | |
else: | |
buf.append(token) | |
else: | |
if buf: | |
lines.append(' '.join(buf)) | |
return '\n '.join(lines) | |
if __name__ == '__main__': | |
args = sys.argv[1:] | |
if not args: | |
args = [ | |
u'short-name', | |
(u'Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' | |
u'do eiusmod tempor incididunt ut labore et dolore magna aliqua. ' | |
u'Ut enim ad minim veniam, quis nostrud exercitation ullamco ' | |
u'laboris nisi ut aliquip ex ea commodo consequat. Duis aute ' | |
u'irure dolor in reprehenderit in voluptate velit esse cillum ' | |
u'dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' | |
u'cupidatat non proident, sunt in culpa qui officia deserunt ' | |
u'mollit anim id est laborum'), | |
(u'Lorem ipsum dolor sit amet, conséctetur adipisicing elit, sed ' | |
u'do eiusmod tempor incididunt út labore et dolore magna aliqua. ' | |
u'Ut enim ad minim veniam, quis nostrud exercitatioń ullamco'), | |
(u'ア イウ エオカ キクケコ サシスセソ タチツテトナ ニヌネノハヒフ ' | |
u'ヘホマミムメモヤユ ヨラリルレロワヰヱヲ'), | |
] | |
for arg in args: | |
encoded = encode_header(arg) | |
print(encoded) | |
print(''.join(v[0].decode(v[1] or 'utf-8').replace('\n ', ' ') | |
for v in decode_header(encoded)) | |
== arg) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment