Last active
March 17, 2020 20:22
-
-
Save sultaniman/c852e74305c6064e6fba954b777d491b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def from_packages_list(data: str) -> Generator: | |
"""Parses CRAN package metadata from | |
https://cran.r-project.org/src/contrib/PACKAGES | |
and returns the list of dictionaries. | |
Args: | |
data (str): raw text from the package list | |
Returns: | |
(Generator): each entry from packages as dictionary | |
""" | |
fields = set() | |
tmp = {} | |
# We want to iterate over each line and accumulate | |
# keys in dictionary, once we meet the same key | |
# in our dictionary we have a single package | |
# metadata parsed so we yield and repeat again. | |
for line in data.splitlines(): | |
line = str(line) | |
if not line.strip(): | |
continue | |
if ":" in line: | |
parts = line.split(":") | |
field = str(parts[0].strip()) | |
value = str("".join(parts[1:]).strip()) | |
if field in fields: | |
fields = {field} | |
result = {**tmp} | |
tmp = {field: value} | |
if result: | |
yield result | |
else: | |
# Here we want to parse dangling lines | |
# like the ones with long dependency | |
# list, `R (>= 2.15.0), xtable, pbapply ... \n and more` | |
tmp[field] = str(value) | |
fields.add(field) | |
else: | |
pairs = list(tmp.items()) | |
if pairs: | |
last_field = str(pairs[-1][0]) | |
tmp[last_field] += f" {line.strip()}" | |
# We also need to return the metadata for | |
# the last parsed package. | |
if tmp: | |
yield tmp | |
def to_cran_format(metadata: Dict) -> Optional[str]: | |
""" | |
Dump dictionary into the following form | |
Package: A3 | |
Version: 1.0.0 | |
Depends: R (>= 2.15.0), xtable, pbapply | |
Suggests: randomForest, e1071 | |
License: GPL (>= 2) | |
MD5sum: 027ebdd8affce8f0effaecfcd5f5ade2 | |
NeedsCompilation: no | |
Args: | |
metadata (Dict): Converts metadata dictionary to deb format | |
Returns: | |
(Optional[str]): package record as deb format | |
""" | |
return "\n".join([ | |
f"{key}: {value}" | |
for key, value in metadata.items() | |
]) | |
def from_cran_format(metadata: str) -> Dict: | |
"""Parse package metadata | |
Note: it is a shorthand to `from_packages_list` | |
then extracts the first value from it. | |
Input should be in the following format | |
which is R package metadata description | |
see: https://cran.r-project.org/src/contrib/PACKAGES | |
Package: A3 | |
Version: 1.0.0 | |
Depends: R (>= 2.15.0), xtable, pbapply | |
Suggests: randomForest, e1071 | |
License: GPL (>= 2) | |
MD5sum: 027ebdd8affce8f0effaecfcd5f5ade2 | |
NeedsCompilation: no | |
Args: | |
metadata (str): metadata text information | |
Returns: | |
(Dict): Parse deb format and return dictionary | |
""" | |
[package] = list(from_packages_list(metadata)) | |
return package |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment