Skip to content

Instantly share code, notes, and snippets.

@caseydm
Created March 10, 2023 19:09
Show Gist options
  • Save caseydm/ee529ae09b33555bdd967f4d2a64ceb4 to your computer and use it in GitHub Desktop.
Save caseydm/ee529ae09b33555bdd967f4d2a64ceb4 to your computer and use it in GitHub Desktop.
Parseland stats
import requests
def main():
result = {}
doi_count = 0
no_landing_page_in_s3 = 0
for page in range(1, 11):
url = f"https://api.openalex.org/works?page={page}&per-page=100&sample=1000&seed=23&filter=has_doi:true"
r1 = requests.get(url)
dois = [item["doi"] for item in r1.json()["results"]]
for doi in dois:
print(f"checking doi {doi}")
doi_count += 1
try:
r2 = requests.get(f"http://127.0.0.1:5000/parse-publisher?doi={doi}")
if (
"error" in r2.json()
and "file not found on S3" in r2.json()["error"]
):
no_landing_page_in_s3 += 1
continue
if "message" in r2.json() and type(r2.json()["message"]) == str:
continue
authors = r2.json().get("message").get("authors")
has_authors = authors and authors[0].get("name")
has_affiliations = authors and authors[0].get("affiliations")
has_is_corresponding = authors and authors[0].get("is_corresponding")
has_abstract = r2.json().get("message").get("abstract")
except Exception as e:
print(f"error: {e}")
continue
if has_authors:
result["has_authors"] = result.get("has_authors", 0) + 1
if has_affiliations:
result["has_affiliations"] = result.get("has_affiliations", 0) + 1
if has_is_corresponding:
result["has_is_corresponding"] = (
result.get("has_is_corresponding", 0) + 1
)
if has_abstract:
result["has_abstract"] = result.get("has_abstract", 0) + 1
print(f"doi_count: {doi_count}")
print(f"no_landing_page_in_s3: {no_landing_page_in_s3}")
print(f"result: {result}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment