Created
June 7, 2018 21:36
-
-
Save nrweir/0f9f456ac8c1b67bc7a72a29241fc1fe to your computer and use it in GitHub Desktop.
Using the PMC API and parsing its XML output
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# imports | |
import pandas as pd | |
import os | |
import ftplib as FTP | |
import urllib | |
from xml.etree import ElementTree as ET | |
import subprocess | |
# next line is the request url that I was using; everything after the first ? is the query terms. | |
# there's a place somewhere on pubmed (can't remember where) where you can get the string to use there. | |
requestURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=PMC&%22open%20access%22[filter]&term=%22western%20blot%22[Figure/Table%20Caption]&usehistory=y&retmax=50000" | |
# next line does the request and reads in the xml-formatted output | |
root = ET.parse(urllib.request.urlopen(requestURL)).getroot() | |
# in my case, I was just looking for the 'IdList/Id' tag based on my search, so I pulled that out. | |
id_list = [] | |
IDs = root.findall('IdList/Id') | |
for ID in IDs: | |
id_list.append(ID.text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment