neuromusic/make_mendeley_collection_from_PMIDs.py

## make_mendeley_collection_from_PMIDs.py
#! /usr/bin/python

# Requires Python >2.7.2

# CAUTION: this script will create a new collection in your Mendeley library and add a bunch of documents to it.

import json

import urlparse
from mendeley_client import MendeleyClient
from Bio import Entrez, Medline


def update_uploaded_list(mendeley, group_id):
    """helper function to update the list of documents already in the group"""
    docs = mendeley.group_documents(group_id,items='5000')
    uploaded_PMIDs = []
    for doc_id in docs['document_ids']:
        doc_details = mendeley.group_doc_details(group_id,doc_id)
        if ('pmid' in doc_details['identifiers']):
            uploaded_PMIDs.append(doc_details['identifiers']['pmid'])
    return uploaded_PMIDs

consumer_key = 'XXXXXXXXXXXXXXXX'
consumer_secret = 'XXXXXXXXXXXXXXXX'

mendeley = MendeleyClient(consumer_key, consumer_secret)

try:
    mendeley.load_keys()
except IOError:
    mendeley.get_required_keys()
    mendeley.save_keys()

# check to see if there is already a group w/ 'ABCD' in the title
abcd_group_id = []
groups = mendeley.groups()
for gr in groups:
    if 'abcd' in gr['name'].lower():
        abcd_group_id = gr['id']

if not abcd_group_id:
    # create public invite-only group for ABCD
    response = mendeley.create_group(group=json.dumps({'name':'ABCD', 'type': 'invite'}))
    abcd_group_id = response['group_id']

# load ABCD dump
with open("abcd_connections.json","rb") as f:
    connections = json.load(f)

PMIDs = []
ASINs = []
# for each citation in the ABCD connection dump
for cnxn_id, cnxn_info in connections.iteritems():
    if 'citations' in cnxn_info:
        for cite in cnxn_info['citations']:
            if 'PMID' in cite.keys():
                if not cite['PMID'] in PMIDs:
                    PMIDs.append(cite['PMID'])
            elif 'ASIN' in cite.keys():
                if not cite['ASIN'] in ASINs:
                    ASINs.append(cite['ASIN'])

print "%s PMIDs referenced in the ABCD" % len(PMIDs)

print "updating the list of uploaded PMIDS..."
uploaded_PMIDs = update_uploaded_list(mendeley,abcd_group_id)
print "%s PMIDs already uploaded" % len(uploaded_PMIDs)

for identifier in PMIDs:
    #   if it exists in the mendeley group, then skip it
    if identifier in uploaded_PMIDs:
        print "skip %s: %s" % ('pmid', identifier)
    else:
        print "adding %s: %s" % ('pmid', identifier)
        document = mendeley.details(identifier, type='pmid')
        if 'error' in document:
            print "Error getting document: %s" % document['error']
            print "since PMID not found in Mendley, extracting basic info on %(identifier)s from pubmed"
            handle = Entrez.efetch(db="pubmed", id=identifier, rettype="medline", retmode="text")
            records = Medline.parse(handle)
            records = list(records)

            document = {'title': records[0]['TI'],
                        'authors': records[0]['FAU'],
                        'abstract': records[0]['AB'],
                        'identifiers': {'pmid': identifier},
                        'type': 'Journal Article',

                        }

        # strip extra fields to prep to add to group
        fields_to_remove = ['error',
                            'mendeley_url',
                            'stats',
                            'uuid',
                            'publication_outlet',
                            'categories',
                            'groups',]
        for field in fields_to_remove:
            if field in document:
                del document[field]
        document['group_id'] = abcd_group_id

        # add the citation to the group
        response = mendeley.create_document(document=json.dumps(document))
        if 'error' in response:
            print "Error adding document: %s" % response['error']
        else:
            uploaded_PMIDs.append(identifier)
        # #update the doc list
        # print "updating the list of uploaded PMIDS..."
        # uploaded_PMIDs = update_uploaded_list(mendeley,abcd_group_id)
        # print "%s PMIDs already uploaded" % len(uploaded_PMIDs)
	#! /usr/bin/python

	# Requires Python >2.7.2

	# CAUTION: this script will create a new collection in your Mendeley library and add a bunch of documents to it.

	import json

	import urlparse
	from mendeley_client import MendeleyClient
	from Bio import Entrez, Medline


	def update_uploaded_list(mendeley, group_id):
	"""helper function to update the list of documents already in the group"""
	docs = mendeley.group_documents(group_id,items='5000')
	uploaded_PMIDs = []
	for doc_id in docs['document_ids']:
	doc_details = mendeley.group_doc_details(group_id,doc_id)
	if ('pmid' in doc_details['identifiers']):
	uploaded_PMIDs.append(doc_details['identifiers']['pmid'])
	return uploaded_PMIDs

	consumer_key = 'XXXXXXXXXXXXXXXX'
	consumer_secret = 'XXXXXXXXXXXXXXXX'

	mendeley = MendeleyClient(consumer_key, consumer_secret)

	try:
	mendeley.load_keys()
	except IOError:
	mendeley.get_required_keys()
	mendeley.save_keys()

	# check to see if there is already a group w/ 'ABCD' in the title
	abcd_group_id = []
	groups = mendeley.groups()
	for gr in groups:
	if 'abcd' in gr['name'].lower():
	abcd_group_id = gr['id']

	if not abcd_group_id:
	# create public invite-only group for ABCD
	response = mendeley.create_group(group=json.dumps({'name':'ABCD', 'type': 'invite'}))
	abcd_group_id = response['group_id']

	# load ABCD dump
	with open("abcd_connections.json","rb") as f:
	connections = json.load(f)

	PMIDs = []
	ASINs = []
	# for each citation in the ABCD connection dump
	for cnxn_id, cnxn_info in connections.iteritems():
	if 'citations' in cnxn_info:
	for cite in cnxn_info['citations']:
	if 'PMID' in cite.keys():
	if not cite['PMID'] in PMIDs:
	PMIDs.append(cite['PMID'])
	elif 'ASIN' in cite.keys():
	if not cite['ASIN'] in ASINs:
	ASINs.append(cite['ASIN'])

	print "%s PMIDs referenced in the ABCD" % len(PMIDs)

	print "updating the list of uploaded PMIDS..."
	uploaded_PMIDs = update_uploaded_list(mendeley,abcd_group_id)
	print "%s PMIDs already uploaded" % len(uploaded_PMIDs)

	for identifier in PMIDs:
	# if it exists in the mendeley group, then skip it
	if identifier in uploaded_PMIDs:
	print "skip %s: %s" % ('pmid', identifier)
	else:
	print "adding %s: %s" % ('pmid', identifier)
	document = mendeley.details(identifier, type='pmid')
	if 'error' in document:
	print "Error getting document: %s" % document['error']
	print "since PMID not found in Mendley, extracting basic info on %(identifier)s from pubmed"
	handle = Entrez.efetch(db="pubmed", id=identifier, rettype="medline", retmode="text")
	records = Medline.parse(handle)
	records = list(records)

	document = {'title': records[0]['TI'],
	'authors': records[0]['FAU'],
	'abstract': records[0]['AB'],
	'identifiers': {'pmid': identifier},
	'type': 'Journal Article',

	}

	# strip extra fields to prep to add to group
	fields_to_remove = ['error',
	'mendeley_url',
	'stats',
	'uuid',
	'publication_outlet',
	'categories',
	'groups',]
	for field in fields_to_remove:
	if field in document:
	del document[field]
	document['group_id'] = abcd_group_id

	# add the citation to the group
	response = mendeley.create_document(document=json.dumps(document))
	if 'error' in response:
	print "Error adding document: %s" % response['error']
	else:
	uploaded_PMIDs.append(identifier)
	# #update the doc list
	# print "updating the list of uploaded PMIDS..."
	# uploaded_PMIDs = update_uploaded_list(mendeley,abcd_group_id)
	# print "%s PMIDs already uploaded" % len(uploaded_PMIDs)