Skip to content

Instantly share code, notes, and snippets.

@neuromusic
Created September 7, 2012 20:41
Show Gist options
  • Save neuromusic/3669439 to your computer and use it in GitHub Desktop.
Save neuromusic/3669439 to your computer and use it in GitHub Desktop.
demo of hacked-together script for creating a Mendeley collection from a set of PMIDs. uses Mendeley API
#! /usr/bin/python
# Requires Python >2.7.2
# CAUTION: this script will create a new collection in your Mendeley library and add a bunch of documents to it.
import json
import urlparse
from mendeley_client import MendeleyClient
from Bio import Entrez, Medline
def update_uploaded_list(mendeley, group_id):
"""helper function to update the list of documents already in the group"""
docs = mendeley.group_documents(group_id,items='5000')
uploaded_PMIDs = []
for doc_id in docs['document_ids']:
doc_details = mendeley.group_doc_details(group_id,doc_id)
if ('pmid' in doc_details['identifiers']):
uploaded_PMIDs.append(doc_details['identifiers']['pmid'])
return uploaded_PMIDs
consumer_key = 'XXXXXXXXXXXXXXXX'
consumer_secret = 'XXXXXXXXXXXXXXXX'
mendeley = MendeleyClient(consumer_key, consumer_secret)
try:
mendeley.load_keys()
except IOError:
mendeley.get_required_keys()
mendeley.save_keys()
# check to see if there is already a group w/ 'ABCD' in the title
abcd_group_id = []
groups = mendeley.groups()
for gr in groups:
if 'abcd' in gr['name'].lower():
abcd_group_id = gr['id']
if not abcd_group_id:
# create public invite-only group for ABCD
response = mendeley.create_group(group=json.dumps({'name':'ABCD', 'type': 'invite'}))
abcd_group_id = response['group_id']
# load ABCD dump
with open("abcd_connections.json","rb") as f:
connections = json.load(f)
PMIDs = []
ASINs = []
# for each citation in the ABCD connection dump
for cnxn_id, cnxn_info in connections.iteritems():
if 'citations' in cnxn_info:
for cite in cnxn_info['citations']:
if 'PMID' in cite.keys():
if not cite['PMID'] in PMIDs:
PMIDs.append(cite['PMID'])
elif 'ASIN' in cite.keys():
if not cite['ASIN'] in ASINs:
ASINs.append(cite['ASIN'])
print "%s PMIDs referenced in the ABCD" % len(PMIDs)
print "updating the list of uploaded PMIDS..."
uploaded_PMIDs = update_uploaded_list(mendeley,abcd_group_id)
print "%s PMIDs already uploaded" % len(uploaded_PMIDs)
for identifier in PMIDs:
# if it exists in the mendeley group, then skip it
if identifier in uploaded_PMIDs:
print "skip %s: %s" % ('pmid', identifier)
else:
print "adding %s: %s" % ('pmid', identifier)
document = mendeley.details(identifier, type='pmid')
if 'error' in document:
print "Error getting document: %s" % document['error']
print "since PMID not found in Mendley, extracting basic info on %(identifier)s from pubmed"
handle = Entrez.efetch(db="pubmed", id=identifier, rettype="medline", retmode="text")
records = Medline.parse(handle)
records = list(records)
document = {'title': records[0]['TI'],
'authors': records[0]['FAU'],
'abstract': records[0]['AB'],
'identifiers': {'pmid': identifier},
'type': 'Journal Article',
}
# strip extra fields to prep to add to group
fields_to_remove = ['error',
'mendeley_url',
'stats',
'uuid',
'publication_outlet',
'categories',
'groups',]
for field in fields_to_remove:
if field in document:
del document[field]
document['group_id'] = abcd_group_id
# add the citation to the group
response = mendeley.create_document(document=json.dumps(document))
if 'error' in response:
print "Error adding document: %s" % response['error']
else:
uploaded_PMIDs.append(identifier)
# #update the doc list
# print "updating the list of uploaded PMIDS..."
# uploaded_PMIDs = update_uploaded_list(mendeley,abcd_group_id)
# print "%s PMIDs already uploaded" % len(uploaded_PMIDs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment