computron/esdr_scopus_to_endnote.py

## esdr_scopus_to_endnote.py
from scopus import ScopusAuthor, ScopusAbstract

AUTHOR_SCOPUS_ID = """
Sumanjeet Kaur, 15071548400
Sean Lubner, 55895288700
Fuduo Ma, 55390403200
Ravi Prasher, 7004221271
Peiyuan Yu, 56412335300
Vivek Subramanian, 56504639000
David Suich, 55804751900
Doug Black, 7402568194
Duncan Callaway, 26038994100
Duncan Callaway 2, 56979042000
Duncan Callaway 3, 56454080500
Goncalo Ferreira Cardoso, 55820471800
Jonathan Coignard, 57191476825
Nicholas Deforest, 55213847400
Miguel Heleno, 53463606400
Jason MacDonald, 36844248300
Salman Mashayekh, 36651050900
Ciaran Roberts, 57191095405
Samveg Saxena, 36895020700
Bin Wang, 56907931500
Dai Wang, 55777604400
Dai Wang 2, 57191477838
Rongxin Yin, 24067993200
Rohini Bala Chandran, 56428746800
Vincent Battaglia, 7004855730
Vincent Battaglia, 7004855730
Guoying Chen, 35267923600
Marca Doeff, 7003642480
Yanbao Fu, 9238959800
Atetegeb Haregewoin, 55892318700
Ivana Hasa, 55641177800
Kenneth Higa, 56604768400
Yoon Hwa, 55952764700
Robert Kostecki, 7004240372
Rung-Chuan Lee, 55998569400
Bryan McCloskey, 7003337275
Lalit Pant, 54951562700
Xiangyun Song, 7402269668
Emily Tow, 56015886000
Tianyue Zheng, 57194395762
Jian Zhu, 55503418800
George Chan, 7202355292
Jhanis Gonzalez, 7404493853
Xianglei Mao, 7402841260
Dayana Oropeza, 23390342400
Vasileia Zormpa, 6508092944
Nemanja Danilovic, 26534274700
Michael Gerhardt, 55885192800
Douglas Kushner, 37031221800
Ahmet Kusoglu, 15061383700
Grace Lau, 15065725300
Vi Rapp, 36970695200
Michael Tucker, 7201659500
Adam Weber, 7403707400
Kristin Persson, 56586964900
Shyam Dwaraknath, 56636510400
Benjamin Ellis, 57059877300
Alireza Faghaninia, 56416508500
Patrick Huck, 57007920900
Ning Li, 56518674200
Gao Liu, 55535145500
Miao Liu, 56424190400
Joseph Montoya, 16310120000
Anubhav Jain, 7404463800
Nav Nidhi Rajput, 55276437100
Trevor Seguin, 56708627600
Yongwoo Shin, 57189032233
Meiling Sun, 56121049400
Wei Tong, 57037876300
Donald Winston, 57007929900
Nils Zimmermann, 23089386300
Elton Cairns, 35570527800
Md. Khan, ?
Jhi-Young Joo, ?
"""

def entry_to_ris(ab, include_abstract=True):
    """
    Converts a ScopusAbstract object to a RIS string in Endnote format. The
    Scopus API does this but leaves out abstracts and non-journal article
    files, so we are implementing the function here.

    Args:
        ab: (ScopusAbstract)
        include_abstract: (bool) - whether to include abstract info

    Returns:
        (str) - RIS string for the ScopusAbstract

    """

    type_map = {"Journal": "JOUR",
                "Trade Journal": "JOUR",
                "Conference Proceeding": "CONF",
                "Book": "BOOK"}

    if ab.aggregationType in type_map.keys():

        template = u'''TY  - {type}
TI  - {title}
JO  - {journal}
VL  - {volume}
DA  - {date}
SP  - {pages}
PY  - {year}
DO  - {doi}
UR  - http://dx.doi.org/{doi}
AB  - {abstract}
        '''

        abstract = ab.abstract if include_abstract else ''
        ris = template.format(type=type_map.get(ab.aggregationType),
            title=ab.title, journal=ab.publicationName,
            volume=ab.volume, date=ab.coverDate, pages=ab.pageRange,
            year=ab.coverDate[0:4], doi=ab.doi, abstract=abstract)

        for au in ab.authors:
            ris += 'AU  - {}\n'.format(au.indexed_name)
        if ab.issueIdentifier is not None:
            ris += 'IS  - {}\n'.format(ab.issueIdentifier)
        ris += 'ER  - \n\n'
        return ris


def get_ris_str(author_id, refresh=False):
    author = ScopusAuthor(author_id)
    cite_str = ""
    for docid in author.get_document_eids(refresh=refresh):
        ab = ScopusAbstract(docid, refresh=refresh)
        if ab.aggregationType in ['Journal', 'Trade Journal',
                                  'Conference Proceeding', 'Book']:
            cite_str += entry_to_ris(ab)
        else:
            print(u"Skipping: {} - type is {} (unsupported)".format(
                ab.title.encode('ascii', 'ignore').decode('ascii'),
                ab.aggregationType))

    return cite_str


if __name__ == "__main__":
    refresh = False

    for line in AUTHOR_SCOPUS_ID.split("\n"):
        if line.strip():
            name, author_id = line.split(",")
            name = name.strip()
            author_id = author_id.strip()
            if author_id != "?":
                ris_str = get_ris_str(int(author_id), refresh=refresh)
                file_name = name.lower().replace(" ", "_") + ".ris"
                with open(file_name, "w") as f:
                    f.write(ris_str.encode('utf8'))
            else:
                print("NO AUTHOR ID SPECIFIED FOR: {}".format(name))
	from scopus import ScopusAuthor, ScopusAbstract

	AUTHOR_SCOPUS_ID = """
	Sumanjeet Kaur, 15071548400
	Sean Lubner, 55895288700
	Fuduo Ma, 55390403200
	Ravi Prasher, 7004221271
	Peiyuan Yu, 56412335300
	Vivek Subramanian, 56504639000
	David Suich, 55804751900
	Doug Black, 7402568194
	Duncan Callaway, 26038994100
	Duncan Callaway 2, 56979042000
	Duncan Callaway 3, 56454080500
	Goncalo Ferreira Cardoso, 55820471800
	Jonathan Coignard, 57191476825
	Nicholas Deforest, 55213847400
	Miguel Heleno, 53463606400
	Jason MacDonald, 36844248300
	Salman Mashayekh, 36651050900
	Ciaran Roberts, 57191095405
	Samveg Saxena, 36895020700
	Bin Wang, 56907931500
	Dai Wang, 55777604400
	Dai Wang 2, 57191477838
	Rongxin Yin, 24067993200
	Rohini Bala Chandran, 56428746800
	Vincent Battaglia, 7004855730
	Vincent Battaglia, 7004855730
	Guoying Chen, 35267923600
	Marca Doeff, 7003642480
	Yanbao Fu, 9238959800
	Atetegeb Haregewoin, 55892318700
	Ivana Hasa, 55641177800
	Kenneth Higa, 56604768400
	Yoon Hwa, 55952764700
	Robert Kostecki, 7004240372
	Rung-Chuan Lee, 55998569400
	Bryan McCloskey, 7003337275
	Lalit Pant, 54951562700
	Xiangyun Song, 7402269668
	Emily Tow, 56015886000
	Tianyue Zheng, 57194395762
	Jian Zhu, 55503418800
	George Chan, 7202355292
	Jhanis Gonzalez, 7404493853
	Xianglei Mao, 7402841260
	Dayana Oropeza, 23390342400
	Vasileia Zormpa, 6508092944
	Nemanja Danilovic, 26534274700
	Michael Gerhardt, 55885192800
	Douglas Kushner, 37031221800
	Ahmet Kusoglu, 15061383700
	Grace Lau, 15065725300
	Vi Rapp, 36970695200
	Michael Tucker, 7201659500
	Adam Weber, 7403707400
	Kristin Persson, 56586964900
	Shyam Dwaraknath, 56636510400
	Benjamin Ellis, 57059877300
	Alireza Faghaninia, 56416508500
	Patrick Huck, 57007920900
	Ning Li, 56518674200
	Gao Liu, 55535145500
	Miao Liu, 56424190400
	Joseph Montoya, 16310120000
	Anubhav Jain, 7404463800
	Nav Nidhi Rajput, 55276437100
	Trevor Seguin, 56708627600
	Yongwoo Shin, 57189032233
	Meiling Sun, 56121049400
	Wei Tong, 57037876300
	Donald Winston, 57007929900
	Nils Zimmermann, 23089386300
	Elton Cairns, 35570527800
	Md. Khan, ?
	Jhi-Young Joo, ?
	"""

	def entry_to_ris(ab, include_abstract=True):
	"""
	Converts a ScopusAbstract object to a RIS string in Endnote format. The
	Scopus API does this but leaves out abstracts and non-journal article
	files, so we are implementing the function here.

	Args:
	ab: (ScopusAbstract)
	include_abstract: (bool) - whether to include abstract info

	Returns:
	(str) - RIS string for the ScopusAbstract

	"""

	type_map = {"Journal": "JOUR",
	"Trade Journal": "JOUR",
	"Conference Proceeding": "CONF",
	"Book": "BOOK"}

	if ab.aggregationType in type_map.keys():

	template = u'''TY - {type}
	TI - {title}
	JO - {journal}
	VL - {volume}
	DA - {date}
	SP - {pages}
	PY - {year}
	DO - {doi}
	UR - http://dx.doi.org/{doi}
	AB - {abstract}
	'''

	abstract = ab.abstract if include_abstract else ''
	ris = template.format(type=type_map.get(ab.aggregationType),
	title=ab.title, journal=ab.publicationName,
	volume=ab.volume, date=ab.coverDate, pages=ab.pageRange,
	year=ab.coverDate[0:4], doi=ab.doi, abstract=abstract)

	for au in ab.authors:
	ris += 'AU - {}\n'.format(au.indexed_name)
	if ab.issueIdentifier is not None:
	ris += 'IS - {}\n'.format(ab.issueIdentifier)
	ris += 'ER - \n\n'
	return ris


	def get_ris_str(author_id, refresh=False):
	author = ScopusAuthor(author_id)
	cite_str = ""
	for docid in author.get_document_eids(refresh=refresh):
	ab = ScopusAbstract(docid, refresh=refresh)
	if ab.aggregationType in ['Journal', 'Trade Journal',
	'Conference Proceeding', 'Book']:
	cite_str += entry_to_ris(ab)
	else:
	print(u"Skipping: {} - type is {} (unsupported)".format(
	ab.title.encode('ascii', 'ignore').decode('ascii'),
	ab.aggregationType))

	return cite_str


	if __name__ == "__main__":
	refresh = False

	for line in AUTHOR_SCOPUS_ID.split("\n"):
	if line.strip():
	name, author_id = line.split(",")
	name = name.strip()
	author_id = author_id.strip()
	if author_id != "?":
	ris_str = get_ris_str(int(author_id), refresh=refresh)
	file_name = name.lower().replace(" ", "_") + ".ris"
	with open(file_name, "w") as f:
	f.write(ris_str.encode('utf8'))
	else:
	print("NO AUTHOR ID SPECIFIED FOR: {}".format(name))