Skip to content

Instantly share code, notes, and snippets.

@pfmoore
Created June 22, 2020 14:56
Show Gist options
  • Save pfmoore/20f3654ca33f8b14f0fcb6dfa1a6b469 to your computer and use it in GitHub Desktop.
Save pfmoore/20f3654ca33f8b14f0fcb6dfa1a6b469 to your computer and use it in GitHub Desktop.
Python packaging metadata parser
import re
import json
from email.message import EmailMessage
from email import message_from_string
class InvalidMetadata(Exception):
pass
SINGLE_USE = [
"Metadata-Version",
"Name",
"Version",
"Summary",
"Description",
"Description-Content-Type",
"Home-page",
"Download-URL",
"Author",
"Author-email",
"Maintainer",
"Maintainer-email",
"License",
"Requires-Python",
]
MULTIPLE_USE = [
"Platform",
"Supported-Platform",
"Classifier",
"Requires-Dist",
"Requires-External",
"Project-URL",
"Provides-Extra",
"Provides-Dist",
"Obsoletes-Dist",
# Technically single-use, but handled specially
"Keywords",
]
def json_form(val):
return val.lower().replace("-", "_")
SINGLE_USE_KEYS = set(json_form(k) for k in SINGLE_USE)
MULTIPLE_USE_KEYS = set(json_form(k) for k in MULTIPLE_USE)
def validate_metadata_dict(meta):
for key, val in meta.items():
if key in SINGLE_USE_KEYS:
if not isinstance(val, str):
raise InvalidMetadata(f"Non-string value for {key}: {val}")
elif key in MULTIPLE_USE_KEYS:
if not isinstance(val, list):
raise InvalidMetadata(f"Non-list value for {key}: {val}")
else:
raise InvalidMetadata(f"Unknown key {key}: {val}")
class Metadata:
def __init__(self, **kw):
validate_metadata_dict(kw)
self.metadata = kw
def __eq__(self, other):
if isinstance(other, Metadata):
return self.metadata == other.metadata
@classmethod
def from_json(cls, data):
return cls(**json.loads(data))
@classmethod
def from_rfc822(cls, data):
metadata = {}
msg = message_from_string(data)
for field in SINGLE_USE:
value = msg.get(field)
if value:
metadata[json_form(field)] = value
for field in MULTIPLE_USE:
value = msg.get_all(field)
if value and len(value) > 0:
if field == "Keywords":
if len(value) > 1:
raise InvalidMetadata
value = re.split(r"\s*(?:,|\s)\s*", value[0])
metadata[json_form(field)] = value
payload = msg.get_payload()
if payload:
if "description" in metadata:
print("Both Description and payload given - ignoring Description")
metadata["description"] = payload
return cls(**metadata)
def as_json(self):
return json.dumps(self.metadata)
def as_rfc822(self):
msg = EmailMessage()
for field in SINGLE_USE + MULTIPLE_USE:
value = self.metadata.get(json_form(field))
if value:
if field == "Description":
# Special case - put in payload
msg.set_payload(value)
continue
if field == "Keywords":
value = ", ".join(value)
if isinstance(value, str):
value = [value]
for item in value:
msg.add_header(field, item)
return msg.as_string()
if __name__ == '__main__':
m = Metadata(name="foo", version="1.0", keywords=["a", "b", "c"], description="Hello\nworld")
rfc822_data = m.as_rfc822()
json_data = m.as_json()
print(rfc822_data)
print(json_data)
m2 = Metadata.from_json(json_data)
m3 = Metadata.from_rfc822(rfc822_data)
assert m == m2, "Metadata changed"
assert m == m3, "Metadata changed"
@MrMino
Copy link

MrMino commented Jan 5, 2021

Hi. Would you be willing to license this up? It's exactly the stuff I need for my project, wheelfile.

@pfmoore
Copy link
Author

pfmoore commented Jan 5, 2021

Sure, but the plan is that there will ultimately be a "better" version in the packaging library - although I think work on that is a bit stalled at the moment. How will you be licensing your project?

@MrMino
Copy link

MrMino commented Jan 5, 2021

Thanks! I'll be adding the MIT license.

@pfmoore
Copy link
Author

pfmoore commented Jan 5, 2021

You can consider this code as under MIT as well.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment