Skip to content

Instantly share code, notes, and snippets.

Last active January 3, 2021 06:24
Show Gist options
  • Save Joshuaek/b3182e996f9b851c811a182264ccd5a4 to your computer and use it in GitHub Desktop.
Save Joshuaek/b3182e996f9b851c811a182264ccd5a4 to your computer and use it in GitHub Desktop.
import os
import sys
import argparse
import fnmatch
import logging
import papermill as pm
from datetime import datetime
import time
# This gist has been upgraded to a repository now!
# The repo version has fixes and improvements!
# Read more here:
# Notebook Scheduler
# ---------------------------------------
# This script helps with the automated processing of Jupyter Notebooks via papermill (
# The script can work in two ways:
# 1. With an external scheduler you can schedule execution of all notebooks in one of the hourly/daily/weekly directories
# 2. If no directory is specified it will use the scheduler library to run its own scheduler processing notebooks in each of the hourly/daily/weekly directories
# This means you can either use your operating systems task scheduler, or if its easier just let the script do the scheduling
# The folder structure is as follows
# <script_folder>/
# ├── hourly/
# │ ├── notebook1.ipynb
# │ ├── notebook2.ipynb
# │ └── snapshots/
# │ ├── notebook1/
# │ │ └── notebook1 2019-03-22 21:22:58.439005.ipynb
# │ └── notebook2/
# │ └── notebook2 2019-03-22 21:23:14.5315001.ipynb
# ├── daily/
# │ ├── notebook1.ipynb
# │ ├── notebook2.ipynb
# │ └── snapshots/
# │ ├── notebook1/
# │ │ └── notebook1 2019-03-22 21:22:58.439005.ipynb
# │ └── notebook2/
# │ └── notebook2 2019-03-22 21:23:14.5315001.ipynb
# └── weekly/
# ├── notebook1.ipynb
# ├── notebook2.ipynb
# └── snapshots/
# ├── notebook1/
# │ └── notebook1 2019-03-22 21:22:58.439005.ipynb
# └── notebook2/
# └── notebook2 2019-03-22 21:23:14.5315001.ipynb
# Folders will be created if they don't already exists in the same folder as this script
# To use, just put notebooks into the hourly / daily / weekly folders and run this script without any arguments
snapshotDir = 'snapshots'
def findFiles(directory, pattern):
# Lists all files in the specified directory that match the specified pattern
for filename in os.listdir(directory):
if fnmatch.fnmatch(filename.lower(), pattern):
yield os.path.join(directory, filename)
def processNotebooks(notebookDirectory, params=None):'Processing ' + notebookDirectory)
# Each time a notebook is processed a snapshot is saved to a snapshot sub-directory
# This checks the sub-directory exists and creates it if not
if os.path.isdir(os.path.join(notebookDirectory,snapshotDir)) == False:
for file in findFiles(notebookDirectory, '*.ipynb'):
nb = os.path.basename(file)
now =
# Within the snapshot directory, each notebook output is stored in its own sub-directory
notebookSnapshot = os.path.join(notebookDirectory, snapshotDir, nb.split('.ipynb')[0])
if os.path.isdir(notebookSnapshot) == False:
# The snapshot file includes a timestamp
output_file = os.path.join(notebookSnapshot, nb.split('.ipynb')[0] + " " + str(now) + ".ipynb")
# Execute the notebook and save the snapshot
except Exception:
# If any errors occur with the notebook processing they will be logged to the log file
logging.exception("Error processing notebook")
if __name__ == '__main__':
# Ensure we're running in the same directory as the script
# Set up logger to display to screen and file
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
console = logging.StreamHandler()
# Check if the subfolders for notebooks exist, and create them if they don't
for directory in ['daily','hourly','weekly']:
if os.path.isdir(directory) == False:
# Get optional directory passed in via command line. If this is specified, then we just process the requested directory.
# This is useful if you're scheduling the processing with an external task scheduler
# If directory is not specified, then we'll set up our own scheduler and process the tasks
parser = argparse.ArgumentParser(description = "Description for my parser")
parser.add_argument("-d", "--directory", help = "Which set of notebooks to process - e.g. hourly", required = False, default = False)
argument = parser.parse_args()
# If a directory has been specified, we'll just process that one directory now and exit
# Only require the schedule module if we're using the internal scheduler
# Install this with pip install schedule
import schedule
# If no directory has been specified, schedule the processing and execute
schedule.every()':40').do(processNotebooks, notebookDirectory='hourly')
schedule.every()'13:15').do(processNotebooks, notebookDirectory='daily')
schedule.every()'13:15').do(processNotebooks, notebookDirectory='weekly')
# Run the scheduled tasks
while True:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment