Skip to content

Instantly share code, notes, and snippets.

@geekdinazor
Last active September 20, 2018 09:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save geekdinazor/01fa97b2b11fa046905d110641bf955b to your computer and use it in GitHub Desktop.
Save geekdinazor/01fa97b2b11fa046905d110641bf955b to your computer and use it in GitHub Desktop.
Feed Zonedo metadata fields from Web of Science (WOS)
from wos import WosClient
from wos.utils import query
from bs4 import BeautifulSoup
from slugify import slugify
from iso639 import to_iso639_2
from suds.client import Client
import json
WOS_API_USER=""
WOS_API_PASS=""
def generate_metadata(wos_ids):
"""
Fetch article metadata from Web of Science (WOS) API,
parse it and format to Zonedo accepted structure.
You can POST it directly while create deposit but
you have to handle Zonedo API requests yourself.
Example output: https://yapistir.io/x2BE/Raw/
"""
with WosClient('', '') as client:
for wos_id in wos_ids:
data = {}
data["metadata"] = {}
result = query(client, "UT=%s" % wos_id, '', 1, 1, 1)
result = BeautifulSoup(result, "lxml")
summary = result.records.rec.static_data.summary
full = result.records.rec.static_data.fullrecord_metadata
data["metadata"]["upload_type"] = 'publication'
data["metadata"]["publication_type"] = 'article'
data["metadata"]["license"] = "cc-by-sa"
lang = full.languages.find("language", {"type": "primary"}).text
data["metadata"]["language"] = to_iso639_2(lang, type='B')
data["metadata"]["title"] = summary.titles.find("title", {"type": "item"}).text
abstract = [str(p) for p in full.abstract.abstract_text.find_all("p")]
data["metadata"]["description"] = "".join(abstract)
try:
data["metadata"]["doi"] = result.records.rec.dynamic_data.find("identifier", {"type": "doi"})["value"]
except TypeError:
pass
if full.keywords:
keywords = [keyword.text for keyword in full.keywords.find_all("keyword")]
data["metadata"]["keywords"] = keywords
data["metadata"]["journal_title"] = summary.titles.find("title", {"type": "source"}).text
try:
data["metadata"]["journal_volume"] = summary.pub_info["vol"]
except KeyError:
pass
try:
data["metadata"]["journal_issue"] = summary.pub_info["issue"]
except KeyError:
pass
data["metadata"]["publication_date"] = summary.pub_info["sortdate"]
try:
data["metadata"]["journal_pages"] = "%s - %s" % (summary.page["begin"], summary.page["end"])
except KeyError :
pass
creators = []
affs = {}
for address_name in full.addresses.find_all("address_name"):
aff = address_name.find("organization", {"pref": "Y"}).text
addr_no = address_name.address_spec["addr_no"]
affs.update({addr_no: aff})
for address_name in full.addresses.find_all("address_name"):
names = address_name.find_all("name")
for name in names:
creator = {}
creator["name"] = name.display_name.text
addr_no = name["addr_no"]
creator["affiliation"] = affs[addr_no]
contributors = result.records.rec.static_data.contributors
if contributors:
for contributor in contributors.find_all("contributor"):
if slugify(contributor.display_name.text) == slugify(name.display_name.text):
try:
creator["orcid"] = contributor.find("name")["orcid_id"]
except KeyError:
pass
creators.append(creator)
data["metadata"]["creators"] = creators
refs = []
ref_obj = Client.dict(client.citedReferences(wos_id))
for ref in ref_obj["references"]:
try:
_author = ref["citedAuthor"]
author_last = _author.split(",")[0]
author_last = "%s%s" % (author_last[0], author_last[1:].lower())
author_first = _author.split(",")[1][1]
year = ref["year"]
title = ref["citedTitle"]
work = ref["citedWork"]
volume = ref["volume"]
ref = "%s, %s., [et al.] (%s) %s. %s, %s" % (author_last, author_first, year, title, work, volume)
#ref = ref.upper()
refs.append(ref)
except:
pass
data["metadata"]["references"] = refs
yield json.dumps(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment