Skip to content

Instantly share code, notes, and snippets.

@yk-tanigawa
Last active August 7, 2020 00:41
Show Gist options
  • Save yk-tanigawa/8bc3330bd44cce12e2d6b82c74318bdf to your computer and use it in GitHub Desktop.
Save yk-tanigawa/8bc3330bd44cce12e2d6b82c74318bdf to your computer and use it in GitHub Desktop.

Example usage of the figshare API

Yosuke Tanigawa

2020/5/22

To upload large files to figshare, one can use figshare API.

This notebook summarizes the basic usage of the figshare API.

We copied the example Python file from the official website, moved some constants to arguments of the function, and saved as a separate function file, figshare_API_misc.py.

import hashlib
import json
import os
import requests
from requests.exceptions import HTTPError
def read_token(token_file):
'''
We read the access token from a file.
You can generate a token from https://nih.figshare.com/account/applications
and save it to a safe place (like /home/users/ytanigaw/.figshare.token.NIH.txt)
'''
with open(token_file) as f:
k = f.read().rstrip()
return k
# We copied the example Python file from
# https://docs.figshare.com/#upload_files_example_upload_on_figshare
# and save its functions as a separate file
def raw_issue_request(method, url, token, data=None, binary=False):
headers = {'Authorization': 'token ' + token}
if data is not None and not binary:
data = json.dumps(data)
response = requests.request(method, url, headers=headers, data=data)
try:
response.raise_for_status()
try:
data = json.loads(response.content)
except ValueError:
data = response.content
except HTTPError as error:
print('Caught an HTTPError: {}'.format(error.message))
print('Body:\n', response.content)
raise
return data
def issue_request(method, base_URL, endpoint, token, *args, **kwargs):
return raw_issue_request(method, base_URL.format(endpoint=endpoint), token, *args, **kwargs)
def list_articles(base_URL, token):
result = issue_request('GET', base_URL, 'account/articles', token)
print('Listing current articles:')
if result:
for item in result:
print(u' {url} - {title}'.format(**item))
else:
print(' No articles.')
print('')
def create_article(title, base_URL, token):
data = {
'title': title # You may add any other information about the article here as you wish.
}
result = issue_request('POST', base_URL, 'account/articles', token, data=data)
print('Created article:', result['location'], '\n')
result = raw_issue_request('GET', result['location'], token)
return result['id']
def list_files_of_article(base_URL, article_id, token):
result = issue_request('GET', base_URL, 'account/articles/{}/files'.format(article_id), token)
print('Listing files for article {}:'.format(article_id))
if result:
for item in result:
print(' {id} - {name}'.format(**item))
else:
print(' No files.')
print('')
def get_file_check_data(file_name, chunk_size):
with open(file_name, 'rb') as fin:
md5 = hashlib.md5()
size = 0
data = fin.read(chunk_size)
while data:
size += len(data)
md5.update(data)
data = fin.read(chunk_size)
return md5.hexdigest(), size
def initiate_new_upload(base_URL, article_id, file_name, token, chunk_size=1048576):
endpoint = 'account/articles/{}/files'
endpoint = endpoint.format(article_id)
md5, size = get_file_check_data(file_name, chunk_size)
data = {'name': os.path.basename(file_name),
'md5': md5,
'size': size}
result = issue_request('POST', base_URL, endpoint, token, data=data)
print('Initiated file upload:', result['location'], '\n')
result = raw_issue_request('GET', result['location'], token)
return result
def complete_upload(base_URL, article_id, file_id, token):
issue_request('POST', base_URL, 'account/articles/{}/files/{}'.format(article_id, file_id), token)
def upload_parts(file_info, file_name, token):
url = '{upload_url}'.format(**file_info)
result = raw_issue_request('GET', url, token)
print('Uploading parts:')
with open(file_name, 'rb') as fin:
for part in result['parts']:
upload_part(file_info, fin, part, token)
print('')
def upload_part(file_info, stream, part, token):
udata = file_info.copy()
udata.update(part)
url = '{upload_url}/{partNo}'.format(**udata)
stream.seek(part['startOffset'])
data = stream.read(part['endOffset'] - part['startOffset'] + 1)
raw_issue_request('PUT', url, token, data=data, binary=True)
print(' Uploaded part {partNo} from {startOffset} to {endOffset}'.format(**part))
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from figshare_API_misc import *\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# basic usage of figshare API\n",
"\n",
"## token for figshare API\n",
"\n",
"You can generate a token from https://nih.figshare.com/account/applications\n",
"and save it to a safe place"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"token_file = '/@@@@@@@@@@@@@@@@@@@/.figshare.token.NIH.txt'\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## define constants"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"base_URL = 'https://api.figshare.com/v2/{endpoint}'\n",
"token = read_token(token_file)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## show the list of \"articles\" (~= datasets)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Listing current articles:\n",
" https://api.figshare.com/v2/account/articles/11369166 - Gene-based test results used for the analysis described in 'Rare protein-altering variants in ANGPTL7 lower intraocular pressure and protect against glaucoma'\n",
" https://api.figshare.com/v2/account/articles/11368022 - Genome-wide summary statistics used for the analysis described in 'Rare protein-altering variants in ANGPTL7 lower intraocular pressure and protect against glaucoma'\n",
" https://api.figshare.com/v2/account/articles/9202247 - Decomposed matrices used for the analysis described in 'Components of genetic associations across 2,138 phenotypes in the UK Biobank highlight adipocyte biology'\n",
"\n"
]
}
],
"source": [
"list_articles(base_URL, token)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## show the list of files in the specified \"article\"\n",
"\n",
"You should specify the article ID from the output above"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Listing files for article 9202247:\n",
" 16758284 - dev_allNonMHC_z_center_p0001_100PCs_20180129.npz\n",
" 16758281 - dev_codingNonMHC_z_center_p0001_100PCs_20180129.npz\n",
" 16758278 - dev_PTVsNonMHC_z_center_p0001_100PCs_20180129.npz\n",
"\n"
]
}
],
"source": [
"article_id = 9202247\n",
"\n",
"list_files_of_article(base_URL, article_id, token)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## upload a file using the figshare API"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# # specify an article ID\n",
"# article_id = -1\n",
"\n",
"# # Then we upload the file.\n",
"# file_info = initiate_new_upload(base_URL, article_id, file_name, token)\n",
"\n",
"# # Until here we used the figshare API; following lines use the figshare upload service API.\n",
"# upload_parts(file_info, file_name, token)\n",
"\n",
"# # We return to the figshare API to complete the file upload process.\n",
"# complete_upload(base_URL, article_id, file_info['id'], token)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment