Skip to content

Instantly share code, notes, and snippets.

@sultaniman
Last active March 17, 2020 20:22
Show Gist options
  • Save sultaniman/c852e74305c6064e6fba954b777d491b to your computer and use it in GitHub Desktop.
Save sultaniman/c852e74305c6064e6fba954b777d491b to your computer and use it in GitHub Desktop.
def from_packages_list(data: str) -> Generator:
"""Parses CRAN package metadata from
https://cran.r-project.org/src/contrib/PACKAGES
and returns the list of dictionaries.
Args:
data (str): raw text from the package list
Returns:
(Generator): each entry from packages as dictionary
"""
fields = set()
tmp = {}
# We want to iterate over each line and accumulate
# keys in dictionary, once we meet the same key
# in our dictionary we have a single package
# metadata parsed so we yield and repeat again.
for line in data.splitlines():
line = str(line)
if not line.strip():
continue
if ":" in line:
parts = line.split(":")
field = str(parts[0].strip())
value = str("".join(parts[1:]).strip())
if field in fields:
fields = {field}
result = {**tmp}
tmp = {field: value}
if result:
yield result
else:
# Here we want to parse dangling lines
# like the ones with long dependency
# list, `R (>= 2.15.0), xtable, pbapply ... \n and more`
tmp[field] = str(value)
fields.add(field)
else:
pairs = list(tmp.items())
if pairs:
last_field = str(pairs[-1][0])
tmp[last_field] += f" {line.strip()}"
# We also need to return the metadata for
# the last parsed package.
if tmp:
yield tmp
def to_cran_format(metadata: Dict) -> Optional[str]:
"""
Dump dictionary into the following form
Package: A3
Version: 1.0.0
Depends: R (>= 2.15.0), xtable, pbapply
Suggests: randomForest, e1071
License: GPL (>= 2)
MD5sum: 027ebdd8affce8f0effaecfcd5f5ade2
NeedsCompilation: no
Args:
metadata (Dict): Converts metadata dictionary to deb format
Returns:
(Optional[str]): package record as deb format
"""
return "\n".join([
f"{key}: {value}"
for key, value in metadata.items()
])
def from_cran_format(metadata: str) -> Dict:
"""Parse package metadata
Note: it is a shorthand to `from_packages_list`
then extracts the first value from it.
Input should be in the following format
which is R package metadata description
see: https://cran.r-project.org/src/contrib/PACKAGES
Package: A3
Version: 1.0.0
Depends: R (>= 2.15.0), xtable, pbapply
Suggests: randomForest, e1071
License: GPL (>= 2)
MD5sum: 027ebdd8affce8f0effaecfcd5f5ade2
NeedsCompilation: no
Args:
metadata (str): metadata text information
Returns:
(Dict): Parse deb format and return dictionary
"""
[package] = list(from_packages_list(metadata))
return package
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment