Last active
September 20, 2018 09:17
-
-
Save geekdinazor/01fa97b2b11fa046905d110641bf955b to your computer and use it in GitHub Desktop.
Feed Zonedo metadata fields from Web of Science (WOS)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from wos import WosClient | |
from wos.utils import query | |
from bs4 import BeautifulSoup | |
from slugify import slugify | |
from iso639 import to_iso639_2 | |
from suds.client import Client | |
import json | |
WOS_API_USER="" | |
WOS_API_PASS="" | |
def generate_metadata(wos_ids): | |
""" | |
Fetch article metadata from Web of Science (WOS) API, | |
parse it and format to Zonedo accepted structure. | |
You can POST it directly while create deposit but | |
you have to handle Zonedo API requests yourself. | |
Example output: https://yapistir.io/x2BE/Raw/ | |
""" | |
with WosClient('', '') as client: | |
for wos_id in wos_ids: | |
data = {} | |
data["metadata"] = {} | |
result = query(client, "UT=%s" % wos_id, '', 1, 1, 1) | |
result = BeautifulSoup(result, "lxml") | |
summary = result.records.rec.static_data.summary | |
full = result.records.rec.static_data.fullrecord_metadata | |
data["metadata"]["upload_type"] = 'publication' | |
data["metadata"]["publication_type"] = 'article' | |
data["metadata"]["license"] = "cc-by-sa" | |
lang = full.languages.find("language", {"type": "primary"}).text | |
data["metadata"]["language"] = to_iso639_2(lang, type='B') | |
data["metadata"]["title"] = summary.titles.find("title", {"type": "item"}).text | |
abstract = [str(p) for p in full.abstract.abstract_text.find_all("p")] | |
data["metadata"]["description"] = "".join(abstract) | |
try: | |
data["metadata"]["doi"] = result.records.rec.dynamic_data.find("identifier", {"type": "doi"})["value"] | |
except TypeError: | |
pass | |
if full.keywords: | |
keywords = [keyword.text for keyword in full.keywords.find_all("keyword")] | |
data["metadata"]["keywords"] = keywords | |
data["metadata"]["journal_title"] = summary.titles.find("title", {"type": "source"}).text | |
try: | |
data["metadata"]["journal_volume"] = summary.pub_info["vol"] | |
except KeyError: | |
pass | |
try: | |
data["metadata"]["journal_issue"] = summary.pub_info["issue"] | |
except KeyError: | |
pass | |
data["metadata"]["publication_date"] = summary.pub_info["sortdate"] | |
try: | |
data["metadata"]["journal_pages"] = "%s - %s" % (summary.page["begin"], summary.page["end"]) | |
except KeyError : | |
pass | |
creators = [] | |
affs = {} | |
for address_name in full.addresses.find_all("address_name"): | |
aff = address_name.find("organization", {"pref": "Y"}).text | |
addr_no = address_name.address_spec["addr_no"] | |
affs.update({addr_no: aff}) | |
for address_name in full.addresses.find_all("address_name"): | |
names = address_name.find_all("name") | |
for name in names: | |
creator = {} | |
creator["name"] = name.display_name.text | |
addr_no = name["addr_no"] | |
creator["affiliation"] = affs[addr_no] | |
contributors = result.records.rec.static_data.contributors | |
if contributors: | |
for contributor in contributors.find_all("contributor"): | |
if slugify(contributor.display_name.text) == slugify(name.display_name.text): | |
try: | |
creator["orcid"] = contributor.find("name")["orcid_id"] | |
except KeyError: | |
pass | |
creators.append(creator) | |
data["metadata"]["creators"] = creators | |
refs = [] | |
ref_obj = Client.dict(client.citedReferences(wos_id)) | |
for ref in ref_obj["references"]: | |
try: | |
_author = ref["citedAuthor"] | |
author_last = _author.split(",")[0] | |
author_last = "%s%s" % (author_last[0], author_last[1:].lower()) | |
author_first = _author.split(",")[1][1] | |
year = ref["year"] | |
title = ref["citedTitle"] | |
work = ref["citedWork"] | |
volume = ref["volume"] | |
ref = "%s, %s., [et al.] (%s) %s. %s, %s" % (author_last, author_first, year, title, work, volume) | |
#ref = ref.upper() | |
refs.append(ref) | |
except: | |
pass | |
data["metadata"]["references"] = refs | |
yield json.dumps(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment