Skip to content

Instantly share code, notes, and snippets.

@jamilatta
Created May 18, 2018 19:22
Show Gist options
  • Save jamilatta/44ab0aa4e8230992df7d067a64a01f9f to your computer and use it in GitHub Desktop.
Save jamilatta/44ab0aa4e8230992df7d067a64a01f9f to your computer and use it in GitHub Desktop.
# coding: utf-8
import time
import pprint
from opac_proc.web import config
from opac_proc.extractors.source_clients.thrift import am_clients
from articlemeta.client import ThriftClient
COLLECTION = 'spa'
ISSN = '0034-8910'
'''
Evaluation time approximalety
SPA['RSP'](ids):
Time to get ids journal: 0.13660001754760742 seconds.
Time to get ids issue: 2.8168540000915527 seconds.
Time to get ids article: 4.821263074874878 seconds.
SPA['RSP'](objects):
Time to get ids journal: 0.21854615211486816 seconds.
Time to get ids issue: 14.316105127334595 seconds.
Time to get ids article: 1258.500862121582 seconds.
'''
def main(cl, articlemeta):
# print("Init ids evaluation")
# ids_evaluation(cl)
# print("Finish ids evaluation")
print("Init objects evaluation")
objects_evaluation(cl, articlemeta)
print("Finish objects evaluation")
def objects_evaluation(cl, articlemeta):
# Start time
journal_start = time.time()
journals = cl.journals(issn=ISSN, collection=COLLECTION)
journal_list = [j for j in journals]
# End Time
journal_end = time.time()
pprint.pprint(journal_list)
print("Journal total: %s" % len(journal_list))
# Start time
issue_start = time.time()
issues = cl.issues(issn=ISSN, collection=COLLECTION)
issue_list = [i for i in issues]
# End Time
issue_end = time.time()
pprint.pprint(issue_list)
print("Issue total: %s" % len(issue_list))
article_id_list = articlemeta.get_article_identifiers(
collection=COLLECTION,
issn=ISSN)
# Start time
article_start = time.time()
for article_id in article_id_list:
article = cl.document(code=article_id, collection=COLLECTION, body=True)
# article_list = [d for d in articles]
print(article.publisher_id)
# End Time
article_end = time.time()
# pprint.pprint(article_list)
# print("Article total: %s" % len(article_list))
print(80 * '*')
print("Time to get object journal: {0} seconds.".format(journal_end-journal_start))
print("Time to get object issue: {0} seconds.".format(issue_end-issue_start))
print("Time to get object article: {0} seconds.".format(article_end-article_start))
print(80 * '*')
def ids_evaluation(cl, articlemeta):
# Start time
journal_start = time.time()
journals = cl.journals(issn=ISSN, only_identifiers=True, collection=COLLECTION)
journal_list = [j for j in journals]
# End Time
journal_end = time.time()
pprint.pprint(journal_list)
print("Journal total: %s" % len(journal_list))
# Start time
issue_start = time.time()
issues = cl.issues(issn=ISSN, only_identifiers=True, collection=COLLECTION)
issue_list = [i for i in issues]
# End Time
issue_end = time.time()
pprint.pprint(issue_list)
print("Issue total: %s" % len(issue_list))
# Start time
article_start = time.time()
articles = cl.documents(issn=ISSN, collection=COLLECTION, only_identifiers=True)
article_list = [d for d in articles]
# End Time
article_end = time.time()
pprint.pprint(article_list)
print("Article total: %s" % len(article_list))
print(80 * '*')
print("Time to get ids journal: {0} seconds.".format(journal_end-journal_start))
print("Time to get ids issue: {0} seconds.".format(issue_end-issue_start))
print("Time to get ids article: {0} seconds.".format(article_end-article_start))
print(80 * '*')
if __name__ == "__main__":
cl = ThriftClient()
articlemeta = am_clients.ArticleMeta(
config.ARTICLE_META_THRIFT_DOMAIN,
config.ARTICLE_META_THRIFT_PORT)
main(cl, articlemeta)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment