Skip to content

Instantly share code, notes, and snippets.

@johnsolk
Created August 14, 2018 18:13
Show Gist options
  • Save johnsolk/0f0f5d1bb3fa606a008473c8dafc2b21 to your computer and use it in GitHub Desktop.
Save johnsolk/0f0f5d1bb3fa606a008473c8dafc2b21 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Make a .bib file from doi "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://github.com/bibcure/doi2bib\n",
"\n",
"Install doi2bib:"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#!pip install doi2bib"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import path\n",
"import subprocess\n",
"from subprocess import Popen, PIPE"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Previously-created .bib file from Mendeley, but not formatted correctly.\n",
"* DOI could be obtained separately, and compiled in a list."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"filename = \"MMETSP.bib\"\n",
"with open(filename) as f:\n",
" content = f.readlines()\n",
"content = [x.strip() for x in content] "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"doi_list = []\n",
"for i in content:\n",
" if i.startswith(\"doi\"):\n",
" doi = i.split(\"{\")\n",
" doi = doi[1].split(\"}\")\n",
" if doi not in doi_list:\n",
" doi_list.append(doi[0])\n",
" else:\n",
" print(\"Already in list:\",doi)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['10.1073/pnas.1421993112', '10.1073/pnas.1518165112', '10.1002/bies.201600213', '10.1038/srep39734', '10.1371/journal.pgen.1004365', '10.1089/cmb.2012.0021', '10.1093/bioinformatics/btu170', '10.1186/2047-217X-2-10', '10.6084/M9.FIGSHARE.878460.V2', '10.1093/bioinformatics/btw218', '10.1038/nrmicro.2016.160', '10.1186/s12864-017-4379-x', '10.1371/journal.pone.0094825', '10.1186/s13059-016-0881-8', '10.12688/f1000research.6924.1', '10.1016/j.margen.2015.05.014', '10.1111/jpy.12441', '10.1093/bioinformatics/btw354', '10.1093/nar/gkv1344', '10.3389/fmicb.2014.00375', '10.1093/nar/gkn766', '10.1016/j.cpb.2017.12.004', '10.1038/ismej.2016.129', '10.1098/rspb.2014.2332', '10.1038/nbt.1883', '10.1371/currents.tol.c24b6054aebf3602748ac042ccc8f2e9', '10.1371/journal.pone.0129081', '10.3389/fmicb.2017.01279', '10.1093/molbev/msw166', '10.1111/ede.12230', '10.1101/056846', '10.5281/ZENODO.1212585', '10.1101/039230', '10.1371/journal.pbio.1001889', '10.1371/journal.pone.0097801', '10.1093/nar/gkq1019', '10.1186/s13059-014-0553-5', '10.1016/J.RESMIC.2011.04.006', '10.7287/peerj.preprints.505v1', '10.3389/fgene.2014.00013', '10.7717/peerj.5428', '10.1186/s12864-016-3451-2', '10.1186/s13742-016-0138-1', '10.1186/s12920-017-0289-7', '10.3389/fgene.2015.00361', '10.1371/journal.pone.0184167', '10.1186/1471-2164-14-465', '10.1038/nmeth.4197', '10.1093/bioinformatics/btt219', '10.1371/journal.pone.0153104', '10.1038/nmeth.1517', '10.1093/bioinformatics/bts094', '10.1038/srep24375', '10.1038/s41559-017-0145', '10.1093/bioinformatics/btv351', '10.1101/gr.196469.115', '10.1038/nbt.3442', '10.1038/sdata.2016.51', '10.1016/j.cell.2016.06.020', '10.21105/joss.00027', '10.1186/2041-9139-4-16', '10.1016/J.MARGEN.2017.09.002', '10.1093/bioinformatics/btw625', '10.1093/bioinformatics/btu077', '10.1016/S2095-3119(16)61461-2', '10.1038/srep36877', '10.1093/nar/gkw1119', '10.7287/peerj.preprints.890v1']\n",
"68\n"
]
}
],
"source": [
"print(doi_list)\n",
"print(len(doi_list))"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"out_filename = \"MMETSP_formatted.bib\"\n",
"with open(out_filename,'w') as f:\n",
" for d in doi_list:\n",
" command = ['doi2bib',d]\n",
" result = subprocess.run(command, stdout=subprocess.PIPE)\n",
" f.write(result.stdout.decode('utf-8'))\n",
"with open(out_filename) as f:\n",
" content = f.readlines()\n",
" clean_lines = [l.strip() for l in content if l.strip()]\n",
"with open(out_filename, \"w\") as f:\n",
" f.writelines('\\n'.join(clean_lines))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment