johnsolk/doi2bib.ipynb

## doi2bib.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Make a .bib file from doi "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://github.com/bibcure/doi2bib\n",
    "\n",
    "Install doi2bib:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#!pip install doi2bib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import path\n",
    "import subprocess\n",
    "from subprocess import Popen, PIPE"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Previously-created .bib file from Mendeley, but not formatted correctly.\n",
    "* DOI could be obtained separately, and compiled in a list."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = \"MMETSP.bib\"\n",
    "with open(filename) as f:\n",
    "    content = f.readlines()\n",
    "content = [x.strip() for x in content] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "doi_list = []\n",
    "for i in content:\n",
    "    if i.startswith(\"doi\"):\n",
    "        doi = i.split(\"{\")\n",
    "        doi = doi[1].split(\"}\")\n",
    "        if doi not in doi_list:\n",
    "            doi_list.append(doi[0])\n",
    "        else:\n",
    "            print(\"Already in list:\",doi)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['10.1073/pnas.1421993112', '10.1073/pnas.1518165112', '10.1002/bies.201600213', '10.1038/srep39734', '10.1371/journal.pgen.1004365', '10.1089/cmb.2012.0021', '10.1093/bioinformatics/btu170', '10.1186/2047-217X-2-10', '10.6084/M9.FIGSHARE.878460.V2', '10.1093/bioinformatics/btw218', '10.1038/nrmicro.2016.160', '10.1186/s12864-017-4379-x', '10.1371/journal.pone.0094825', '10.1186/s13059-016-0881-8', '10.12688/f1000research.6924.1', '10.1016/j.margen.2015.05.014', '10.1111/jpy.12441', '10.1093/bioinformatics/btw354', '10.1093/nar/gkv1344', '10.3389/fmicb.2014.00375', '10.1093/nar/gkn766', '10.1016/j.cpb.2017.12.004', '10.1038/ismej.2016.129', '10.1098/rspb.2014.2332', '10.1038/nbt.1883', '10.1371/currents.tol.c24b6054aebf3602748ac042ccc8f2e9', '10.1371/journal.pone.0129081', '10.3389/fmicb.2017.01279', '10.1093/molbev/msw166', '10.1111/ede.12230', '10.1101/056846', '10.5281/ZENODO.1212585', '10.1101/039230', '10.1371/journal.pbio.1001889', '10.1371/journal.pone.0097801', '10.1093/nar/gkq1019', '10.1186/s13059-014-0553-5', '10.1016/J.RESMIC.2011.04.006', '10.7287/peerj.preprints.505v1', '10.3389/fgene.2014.00013', '10.7717/peerj.5428', '10.1186/s12864-016-3451-2', '10.1186/s13742-016-0138-1', '10.1186/s12920-017-0289-7', '10.3389/fgene.2015.00361', '10.1371/journal.pone.0184167', '10.1186/1471-2164-14-465', '10.1038/nmeth.4197', '10.1093/bioinformatics/btt219', '10.1371/journal.pone.0153104', '10.1038/nmeth.1517', '10.1093/bioinformatics/bts094', '10.1038/srep24375', '10.1038/s41559-017-0145', '10.1093/bioinformatics/btv351', '10.1101/gr.196469.115', '10.1038/nbt.3442', '10.1038/sdata.2016.51', '10.1016/j.cell.2016.06.020', '10.21105/joss.00027', '10.1186/2041-9139-4-16', '10.1016/J.MARGEN.2017.09.002', '10.1093/bioinformatics/btw625', '10.1093/bioinformatics/btu077', '10.1016/S2095-3119(16)61461-2', '10.1038/srep36877', '10.1093/nar/gkw1119', '10.7287/peerj.preprints.890v1']\n",
      "68\n"
     ]
    }
   ],
   "source": [
    "print(doi_list)\n",
    "print(len(doi_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "out_filename = \"MMETSP_formatted.bib\"\n",
    "with open(out_filename,'w') as f:\n",
    "    for d in doi_list:\n",
    "        command = ['doi2bib',d]\n",
    "        result = subprocess.run(command, stdout=subprocess.PIPE)\n",
    "        f.write(result.stdout.decode('utf-8'))\n",
    "with open(out_filename) as f:\n",
    "    content = f.readlines()\n",
    "    clean_lines = [l.strip() for l in content if l.strip()]\n",
    "with open(out_filename, \"w\") as f:\n",
    "    f.writelines('\\n'.join(clean_lines))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Make a .bib file from doi "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"https://github.com/bibcure/doi2bib\n",
	"\n",
	"Install doi2bib:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 50,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"#!pip install doi2bib"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 51,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"import path\n",
	"import subprocess\n",
	"from subprocess import Popen, PIPE"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"* Previously-created .bib file from Mendeley, but not formatted correctly.\n",
	"* DOI could be obtained separately, and compiled in a list."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"filename = \"MMETSP.bib\"\n",
	"with open(filename) as f:\n",
	" content = f.readlines()\n",
	"content = [x.strip() for x in content] "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [],
	"source": [
	"doi_list = []\n",
	"for i in content:\n",
	" if i.startswith(\"doi\"):\n",
	" doi = i.split(\"{\")\n",
	" doi = doi[1].split(\"}\")\n",
	" if doi not in doi_list:\n",
	" doi_list.append(doi[0])\n",
	" else:\n",
	" print(\"Already in list:\",doi)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"['10.1073/pnas.1421993112', '10.1073/pnas.1518165112', '10.1002/bies.201600213', '10.1038/srep39734', '10.1371/journal.pgen.1004365', '10.1089/cmb.2012.0021', '10.1093/bioinformatics/btu170', '10.1186/2047-217X-2-10', '10.6084/M9.FIGSHARE.878460.V2', '10.1093/bioinformatics/btw218', '10.1038/nrmicro.2016.160', '10.1186/s12864-017-4379-x', '10.1371/journal.pone.0094825', '10.1186/s13059-016-0881-8', '10.12688/f1000research.6924.1', '10.1016/j.margen.2015.05.014', '10.1111/jpy.12441', '10.1093/bioinformatics/btw354', '10.1093/nar/gkv1344', '10.3389/fmicb.2014.00375', '10.1093/nar/gkn766', '10.1016/j.cpb.2017.12.004', '10.1038/ismej.2016.129', '10.1098/rspb.2014.2332', '10.1038/nbt.1883', '10.1371/currents.tol.c24b6054aebf3602748ac042ccc8f2e9', '10.1371/journal.pone.0129081', '10.3389/fmicb.2017.01279', '10.1093/molbev/msw166', '10.1111/ede.12230', '10.1101/056846', '10.5281/ZENODO.1212585', '10.1101/039230', '10.1371/journal.pbio.1001889', '10.1371/journal.pone.0097801', '10.1093/nar/gkq1019', '10.1186/s13059-014-0553-5', '10.1016/J.RESMIC.2011.04.006', '10.7287/peerj.preprints.505v1', '10.3389/fgene.2014.00013', '10.7717/peerj.5428', '10.1186/s12864-016-3451-2', '10.1186/s13742-016-0138-1', '10.1186/s12920-017-0289-7', '10.3389/fgene.2015.00361', '10.1371/journal.pone.0184167', '10.1186/1471-2164-14-465', '10.1038/nmeth.4197', '10.1093/bioinformatics/btt219', '10.1371/journal.pone.0153104', '10.1038/nmeth.1517', '10.1093/bioinformatics/bts094', '10.1038/srep24375', '10.1038/s41559-017-0145', '10.1093/bioinformatics/btv351', '10.1101/gr.196469.115', '10.1038/nbt.3442', '10.1038/sdata.2016.51', '10.1016/j.cell.2016.06.020', '10.21105/joss.00027', '10.1186/2041-9139-4-16', '10.1016/J.MARGEN.2017.09.002', '10.1093/bioinformatics/btw625', '10.1093/bioinformatics/btu077', '10.1016/S2095-3119(16)61461-2', '10.1038/srep36877', '10.1093/nar/gkw1119', '10.7287/peerj.preprints.890v1']\n",
	"68\n"
	]
	}
	],
	"source": [
	"print(doi_list)\n",
	"print(len(doi_list))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 49,
	"metadata": {},
	"outputs": [],
	"source": [
	"out_filename = \"MMETSP_formatted.bib\"\n",
	"with open(out_filename,'w') as f:\n",
	" for d in doi_list:\n",
	" command = ['doi2bib',d]\n",
	" result = subprocess.run(command, stdout=subprocess.PIPE)\n",
	" f.write(result.stdout.decode('utf-8'))\n",
	"with open(out_filename) as f:\n",
	" content = f.readlines()\n",
	" clean_lines = [l.strip() for l in content if l.strip()]\n",
	"with open(out_filename, \"w\") as f:\n",
	" f.writelines('\\n'.join(clean_lines))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}