ericmjl/preprocess-compile-notebook.ipynb

## preprocess-compile-notebook.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nbformat\n",
    "from nbconvert import MarkdownExporter, PDFExporter\n",
    "from nbconvert.preprocessors import ExecutePreprocessor\n",
    "from pyprojroot import here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import yaml\n",
    "from pyprojroot import here\n",
    "\n",
    "\n",
    "def read_mkdocs():\n",
    "    with open(here() / \"mkdocs.yml\", \"r+\") as f:\n",
    "        f = \"\".join(l for l in f.readlines())\n",
    "        mkdocs_config = yaml.safe_load(f)\n",
    "    return mkdocs_config\n",
    "\n",
    "\n",
    "mkdocs_config = read_mkdocs()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "nav = mkdocs_config[\"nav\"]\n",
    "docroot = here() / \"docs\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Prerequisites', 'preface/prereqs.ipynb'),\n",
       " ('Learning Goals', 'preface/goals.md'),\n",
       " ('Chapter 1: Introduction to Graphs', 'introduction/graphs.md'),\n",
       " ('Chapter 2: The NetworkX API', 'introduction/networkx-intro.ipynb'),\n",
       " ('Chapter 3: Graph Visualization', 'introduction/viz.ipynb'),\n",
       " ('Chapter 4: Hubs', 'algorithms/hubs.ipynb'),\n",
       " ('Chapter 5: Paths', 'algorithms/paths.ipynb'),\n",
       " ('Chapter 6: Structures', 'algorithms/structures.ipynb'),\n",
       " ('Chapter 7: Graph I/O', 'practical/io.ipynb'),\n",
       " ('Chapter 8: Testing', 'practical/testing.md'),\n",
       " ('Further Learning', 'learn-more.md'),\n",
       " ('Style Guide', 'devdocs/style.md')]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# The goal here is to flatten the tree structure into a list of 2-tuples,\n",
    "# where the title is the first element and the filename is the second element.\n",
    "accumulator = []\n",
    "\n",
    "def parse_navigation(nav, accumulator):\n",
    "    \"\"\"\n",
    "    Collect all .ipynb files to be parsed\n",
    "    into a list of 2-tuples.\n",
    "    \"\"\"\n",
    "    for item in nav:\n",
    "        if isinstance(item, dict):\n",
    "            for k, v in item.items():\n",
    "                if isinstance(v, list):\n",
    "                    parse_navigation(v, accumulator)\n",
    "                if isinstance(v, str):\n",
    "                    accumulator.append((k, v))\n",
    "            \n",
    "    return accumulator\n",
    "    \n",
    "accumulated = parse_navigation(nav, accumulator)\n",
    "accumulated = accumulated[2:]\n",
    "accumulated"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_markdown(fpath):\n",
    "    with open(fpath, \"r+\") as f:\n",
    "        md = f.read()\n",
    "    return md"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"Hey, thanks for picking up this book!\\n\\nNetwork Analysis Made Simple is a collection of Jupyter notebooks\\ndesigned to help you get up and running with the NetworkX package\\nin the Python programming langauge.\\nIt's written by programmers for programmers,\\nand will give you a basic introduction to\\ngraph theory, applied network science,\\nand their connection to linear algebra.\\n\\nWe hope you enjoy learning from it.\\n\\n## Feedback\\n\\nIf you have feedback for the eBook,\\nplease head over to our [GitHub repository][repo] and raise an issue there.\\n\\n[repo]: https://github.com/ericmjl/Network-Analysis-Made-Simple\\n\\n## Support us!\\n\\nIf you find the book useful,\\nyou can support the creators in the following ways:\\n\\n1. Star the repository! It costs you nothing,\\nand helps raise the profile of the book.\\n1. Share it with your colleagues! It also costs you nothing,\\nand helps share _the good stuff_ with those you think might benefit from it.\\n1. Take the official companion courses and projects on DataCamp!\\nIt does cost some money, so we totally understand if you'd prefer not to.\\n1. Support [Eric Ma on Patreon][patreon] with a monthly coffee pledge\\nto keep him caffeinated,\\nwhich helps him make more good material to share.\\n1. Follow Eric and Mridul on Twitter at [@ericmjl][ericmjl_twitter] and [@Mridul_Seth][mridul_twitter]\\n\\n[patreon]: https://www.patreon.com/ericmjl\\n[ericmjl_twitter]: https://twitter.com/ericmjl\\n[mridul_twitter]: https://twitter.com/Mridul_Seth\\n\""
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "read_markdown(here() / \"docs/index.md\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_notebook(fpath):\n",
    "    with open(fpath, \"r+\") as f:\n",
    "        nb = nbformat.reads(f.read(), as_version=4)\n",
    "    return nb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "nb = read_notebook(here() / \"docs/practical/io.ipynb\")\n",
    "# nb[\"cells\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nb.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'kernelspec': {'display_name': 'nams', 'language': 'python', 'name': 'nams'},\n",
       " 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},\n",
       "  'file_extension': '.py',\n",
       "  'mimetype': 'text/x-python',\n",
       "  'name': 'python',\n",
       "  'nbconvert_exporter': 'python',\n",
       "  'pygments_lexer': 'ipython3',\n",
       "  'version': '3.7.7'}}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nb.metadata"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is how we are going to approach the problem. We are going to create ONE GIANT NOTEBOOK\n",
    "and use the PDFExporter to do exporting.\n",
    "\n",
    "God Bless Me as I attempt this..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def md2nbcell(md):\n",
    "    \"\"\"Convert markdown to Jupyter notebook cell.\"\"\"\n",
    "    data = {\n",
    "        \"cell_type\": \"markdown\",\n",
    "        \"metadata\": {},\n",
    "        \"source\": md\n",
    "    }\n",
    "    cell = nbformat.NotebookNode(**data)\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compile_code_cells(accumulated):\n",
    "    cells = []\n",
    "    for title, file in accumulated:\n",
    "        fpath = here() / \"docs\" / file\n",
    "        titlecell = md2nbcell(f\"# {title}\")\n",
    "        cells.append(titlecell)\n",
    "\n",
    "        if file.endswith(\".md\"):\n",
    "            md = read_markdown(fpath)\n",
    "            cell = md2nbcell(md)\n",
    "            cells.append(cell)\n",
    "        elif file.endswith(\".ipynb\"):\n",
    "            nb = read_notebook(fpath)\n",
    "            cells.extend(nb.cells)\n",
    "    return cells"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "cells = compile_code_cells(accumulated)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_compiled_notebook(cells):\n",
    "    metadata = {'kernelspec': {'display_name': 'nams', 'language': 'python', 'name': 'nams'},\n",
    " 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},\n",
    "  'file_extension': '.py',\n",
    "  'mimetype': 'text/x-python',\n",
    "  'name': 'python',\n",
    "  'nbconvert_exporter': 'python',\n",
    "  'pygments_lexer': 'ipython3',\n",
    "  'version': '3.7.7'}}\n",
    "    \n",
    "    compiled_nb = nbformat.v4.new_notebook()\n",
    "    compiled_nb.metadata = metadata\n",
    "    compiled_nb.cells = cells\n",
    "    return compiled_nb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "compiled_nb = make_compiled_notebook(cells)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def to_pdf(nb):\n",
    "    ep = ExecutePreprocessor(timeout=600, kernel_name='nams')\n",
    "    ep.preprocess(nb)\n",
    "    pdf_exporter = PDFExporter()\n",
    "    body, resources = pdf_exporter.from_notebook_node(nb)\n",
    "    return body, resources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "body, resources = to_pdf(compiled_nb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def write_pdf(body, fpath):\n",
    "    with open(fpath, \"wb\") as f:\n",
    "        f.write(body)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "write_pdf(body, \"output.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nams",
   "language": "python",
   "name": "nams"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [],
	"source": [
	"import nbformat\n",
	"from nbconvert import MarkdownExporter, PDFExporter\n",
	"from nbconvert.preprocessors import ExecutePreprocessor\n",
	"from pyprojroot import here"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [],
	"source": [
	"import yaml\n",
	"from pyprojroot import here\n",
	"\n",
	"\n",
	"def read_mkdocs():\n",
	" with open(here() / \"mkdocs.yml\", \"r+\") as f:\n",
	" f = \"\".join(l for l in f.readlines())\n",
	" mkdocs_config = yaml.safe_load(f)\n",
	" return mkdocs_config\n",
	"\n",
	"\n",
	"mkdocs_config = read_mkdocs()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [],
	"source": [
	"nav = mkdocs_config[\"nav\"]\n",
	"docroot = here() / \"docs\"\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('Prerequisites', 'preface/prereqs.ipynb'),\n",
	" ('Learning Goals', 'preface/goals.md'),\n",
	" ('Chapter 1: Introduction to Graphs', 'introduction/graphs.md'),\n",
	" ('Chapter 2: The NetworkX API', 'introduction/networkx-intro.ipynb'),\n",
	" ('Chapter 3: Graph Visualization', 'introduction/viz.ipynb'),\n",
	" ('Chapter 4: Hubs', 'algorithms/hubs.ipynb'),\n",
	" ('Chapter 5: Paths', 'algorithms/paths.ipynb'),\n",
	" ('Chapter 6: Structures', 'algorithms/structures.ipynb'),\n",
	" ('Chapter 7: Graph I/O', 'practical/io.ipynb'),\n",
	" ('Chapter 8: Testing', 'practical/testing.md'),\n",
	" ('Further Learning', 'learn-more.md'),\n",
	" ('Style Guide', 'devdocs/style.md')]"
	]
	},
	"execution_count": 23,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# The goal here is to flatten the tree structure into a list of 2-tuples,\n",
	"# where the title is the first element and the filename is the second element.\n",
	"accumulator = []\n",
	"\n",
	"def parse_navigation(nav, accumulator):\n",
	" \"\"\"\n",
	" Collect all .ipynb files to be parsed\n",
	" into a list of 2-tuples.\n",
	" \"\"\"\n",
	" for item in nav:\n",
	" if isinstance(item, dict):\n",
	" for k, v in item.items():\n",
	" if isinstance(v, list):\n",
	" parse_navigation(v, accumulator)\n",
	" if isinstance(v, str):\n",
	" accumulator.append((k, v))\n",
	" \n",
	" return accumulator\n",
	" \n",
	"accumulated = parse_navigation(nav, accumulator)\n",
	"accumulated = accumulated[2:]\n",
	"accumulated"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [],
	"source": [
	"def read_markdown(fpath):\n",
	" with open(fpath, \"r+\") as f:\n",
	" md = f.read()\n",
	" return md"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"\"Hey, thanks for picking up this book!\\n\\nNetwork Analysis Made Simple is a collection of Jupyter notebooks\\ndesigned to help you get up and running with the NetworkX package\\nin the Python programming langauge.\\nIt's written by programmers for programmers,\\nand will give you a basic introduction to\\ngraph theory, applied network science,\\nand their connection to linear algebra.\\n\\nWe hope you enjoy learning from it.\\n\\n## Feedback\\n\\nIf you have feedback for the eBook,\\nplease head over to our [GitHub repository][repo] and raise an issue there.\\n\\n[repo]: https://github.com/ericmjl/Network-Analysis-Made-Simple\\n\\n## Support us!\\n\\nIf you find the book useful,\\nyou can support the creators in the following ways:\\n\\n1. Star the repository! It costs you nothing,\\nand helps raise the profile of the book.\\n1. Share it with your colleagues! It also costs you nothing,\\nand helps share _the good stuff_ with those you think might benefit from it.\\n1. Take the official companion courses and projects on DataCamp!\\nIt does cost some money, so we totally understand if you'd prefer not to.\\n1. Support [Eric Ma on Patreon][patreon] with a monthly coffee pledge\\nto keep him caffeinated,\\nwhich helps him make more good material to share.\\n1. Follow Eric and Mridul on Twitter at [@ericmjl][ericmjl_twitter] and [@Mridul_Seth][mridul_twitter]\\n\\n[patreon]: https://www.patreon.com/ericmjl\\n[ericmjl_twitter]: https://twitter.com/ericmjl\\n[mridul_twitter]: https://twitter.com/Mridul_Seth\\n\""
	]
	},
	"execution_count": 25,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"read_markdown(here() / \"docs/index.md\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [],
	"source": [
	"def read_notebook(fpath):\n",
	" with open(fpath, \"r+\") as f:\n",
	" nb = nbformat.reads(f.read(), as_version=4)\n",
	" return nb"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [],
	"source": [
	"nb = read_notebook(here() / \"docs/practical/io.ipynb\")\n",
	"# nb[\"cells\"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"nb.keys()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'kernelspec': {'display_name': 'nams', 'language': 'python', 'name': 'nams'},\n",
	" 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},\n",
	" 'file_extension': '.py',\n",
	" 'mimetype': 'text/x-python',\n",
	" 'name': 'python',\n",
	" 'nbconvert_exporter': 'python',\n",
	" 'pygments_lexer': 'ipython3',\n",
	" 'version': '3.7.7'}}"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"nb.metadata"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This is how we are going to approach the problem. We are going to create ONE GIANT NOTEBOOK\n",
	"and use the PDFExporter to do exporting.\n",
	"\n",
	"God Bless Me as I attempt this..."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"def md2nbcell(md):\n",
	" \"\"\"Convert markdown to Jupyter notebook cell.\"\"\"\n",
	" data = {\n",
	" \"cell_type\": \"markdown\",\n",
	" \"metadata\": {},\n",
	" \"source\": md\n",
	" }\n",
	" cell = nbformat.NotebookNode(**data)\n",
	" return cell"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"def compile_code_cells(accumulated):\n",
	" cells = []\n",
	" for title, file in accumulated:\n",
	" fpath = here() / \"docs\" / file\n",
	" titlecell = md2nbcell(f\"# {title}\")\n",
	" cells.append(titlecell)\n",
	"\n",
	" if file.endswith(\".md\"):\n",
	" md = read_markdown(fpath)\n",
	" cell = md2nbcell(md)\n",
	" cells.append(cell)\n",
	" elif file.endswith(\".ipynb\"):\n",
	" nb = read_notebook(fpath)\n",
	" cells.extend(nb.cells)\n",
	" return cells"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"cells = compile_code_cells(accumulated)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"def make_compiled_notebook(cells):\n",
	" metadata = {'kernelspec': {'display_name': 'nams', 'language': 'python', 'name': 'nams'},\n",
	" 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},\n",
	" 'file_extension': '.py',\n",
	" 'mimetype': 'text/x-python',\n",
	" 'name': 'python',\n",
	" 'nbconvert_exporter': 'python',\n",
	" 'pygments_lexer': 'ipython3',\n",
	" 'version': '3.7.7'}}\n",
	" \n",
	" compiled_nb = nbformat.v4.new_notebook()\n",
	" compiled_nb.metadata = metadata\n",
	" compiled_nb.cells = cells\n",
	" return compiled_nb"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"compiled_nb = make_compiled_notebook(cells)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"def to_pdf(nb):\n",
	" ep = ExecutePreprocessor(timeout=600, kernel_name='nams')\n",
	" ep.preprocess(nb)\n",
	" pdf_exporter = PDFExporter()\n",
	" body, resources = pdf_exporter.from_notebook_node(nb)\n",
	" return body, resources"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"body, resources = to_pdf(compiled_nb)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [],
	"source": [
	"def write_pdf(body, fpath):\n",
	" with open(fpath, \"wb\") as f:\n",
	" f.write(body)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [],
	"source": [
	"write_pdf(body, \"output.pdf\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "nams",
	"language": "python",
	"name": "nams"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}