Skip to content

Instantly share code, notes, and snippets.

@biermeester
Last active August 16, 2018 10:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save biermeester/350a8c9ad2d61b292d55318bc5653fa7 to your computer and use it in GitHub Desktop.
Save biermeester/350a8c9ad2d61b292d55318bc5653fa7 to your computer and use it in GitHub Desktop.
Git hook pre-commit script for stripping output and irrelevant metadata from Jupyter notebooks
#!/usr/bin/env python3
"""
Git pre-commit hook script
Strips output from Jupyter Notebooks
"""
import os
import sys
from subprocess import check_output, check_call
files = check_output(['git', 'diff', '--name-only', '--cached'], encoding='ascii').splitlines()
def strip_output(file_path):
""" Strip all outputs, to reduce git noise """
d, file_name = os.path.split(file_path)
....
jq_format = '(.cells[] | select(has("outputs")) | .outputs) = []| (.cells[] | select(has("execution_count")) | .execution_count) = null
try:
stripped = check_output(['jq', '--indent', '1', jq_format, file_name], cwd=d)
#print(stripped)
except FileNotFoundError:
print(file_name)
print("Please install jq for pre-commit clean-up!")
print("Aborting commit...")
sys.exit(1)
with open(file_path, "wb") as f:
f.write(stripped)
for f in files:
if f[-5:] == b'ipynb':
print(f"Stripping output from {f}...")
strip_output(f)
check_call(['git', 'add', f])
files = check_output(['git', 'diff', '--name-only', '--cached']).splitlines()
if not files:
print("No changes after stripping output, aborting commit!")
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment