Skip to content

Instantly share code, notes, and snippets.

@bnewbold
Created May 5, 2020 20:58
Show Gist options
  • Save bnewbold/9918634282f6013e13174badbce64a93 to your computer and use it in GitHub Desktop.
Save bnewbold/9918634282f6013e13174badbce64a93 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Depends on:
- articlemetaapi
Refs:
- https://github.com/scieloorg/articlemetaapi/blob/master/articlemeta/client.py
- https://github.com/scieloorg/xylose/blob/master/xylose/scielodocument.py
"""
import os, sys, json, argparse
from articlemeta.client import RestfulClient
from xylose.scielodocument import Article, Journal, Issue, Citation
def reduce_dict(d):
"""
Takes a dict and removes all the None values.
On the assumption that the dict will be transformed into JSON, this can
save a bunch of verbosity.
"""
keys = list(d.keys())
for k in keys:
if d[k] is None:
d.pop(k)
return d
def obj_to_dict(obj):
"""
Journal ref: https://github.com/scieloorg/xylose/blob/master/xylose/scielodocument.py#L688
We basically want all of the @property fields. Some methods are not
@property fields and don't work right with getattr; we skip or call those
explicitly.
"""
# sometimes... Citations? are not objects
if type(obj) not in (Article, Journal, Issue, Citation):
return obj
d = {}
for key in dir(obj):
if key.startswith('_') or key in ('data', 'bibliographic_legends', 'any_issn'):
continue
val = getattr(obj, key)
if type(val).__name__ == 'method':
# it's a @property method; call it
d[key] = val()
else:
#t = type(val)
#print(f"{key}: {t}", file=sys.stderr)
d[key] = val
# article specific
if d.get('citations'):
d['citations'] = [reduce_dict(obj_to_dict(c)) for c in d['citations']]
if d.get('journal'):
d['journal'] = obj_to_dict(d['journal'])
if d.get('issue'):
d['issue'] = obj_to_dict(d['issue'])
return d
def run_articles():
cl = RestfulClient()
for article in cl.documents_bulk():
obj = obj_to_dict(article)
print(json.dumps(obj, sort_keys=True))
def run_article_ids():
cl = RestfulClient()
for ident in cl.documents_by_identifiers(only_identifiers=True):
print(ident)
def run_journals():
cl = RestfulClient()
for journal in cl.journals():
obj = obj_to_dict(journal)
print(json.dumps(obj, sort_keys=True))
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
subparsers = parser.add_subparsers()
sub = subparsers.add_parser('journals',
help="print all journals to stdout as JSON-per-line")
sub.set_defaults(func=run_journals)
sub = subparsers.add_parser('articles',
help="print all articles to stdout as JSON-per-line")
sub.set_defaults(func=run_articles)
sub = subparsers.add_parser('article-ids',
help="print all articles to stdout as JSON-per-line")
sub.set_defaults(func=run_article_ids)
args = parser.parse_args()
if not args.__dict__.get("func"):
print("tell me what to do! (try --help)")
sys.exit(-1)
args.func()
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment