Skip to content

Instantly share code, notes, and snippets.

@adamrpah
Created March 22, 2015 17:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamrpah/a4617d0e2153d273fad6 to your computer and use it in GitHub Desktop.
Save adamrpah/a4617d0e2153d273fad6 to your computer and use it in GitHub Desktop.
Python script to remove output and prompt numbering from a notebook file
#!/usr/bin/env python3
'''
File: notebook_cleaner.py
Author: Adam Pah
Description:
Cleans the output from a notebook
'''
#Standard path imports
from __future__ import division, print_function
import argparse
import sys
import os
#Non-standard imports
import json
import IPython.nbformat.current as nbf
#Global directories and variables
def cell_cleaner(nb):
'''
Function to "clean" the cells of a notebook, deleting any output and the prompt numbering
input:
nb - notebook file contents
'''
#Iterate through each one of the cell objects
#Deleting prompt_number and clearing outputs
for cell in nb['worksheets'][0]['cells']:
if 'prompt_number' in cell:
del cell['prompt_number']
if 'outputs' in cell:
cell['outputs'] = []
def notebook_cleaner(fname):
'''
Cleans and writes a notebook
input:
fname - notebook filename
'''
#Read in the notebook
nb = nbf.read(open(fname), 'ipynb')
#Clean it
cell_cleaner(nb)
#Save it back
nbf.write(nb, open(fname, 'w'), 'ipynb')
def determine_ipynb(fname):
'''
Determine if a file is an ipython notebook file or not
input:
fname - filename
output:
returns True/False
'''
ext = os.path.splitext(fname)[-1].lower()
if ext == '.ipynb':
return True
else:
return False
def main(args):
'''
Determines if a directory search needs to be performed or not
Crawls if args.fpath is given
'''
#If not the fpath option then we should have a notebook
if not args.fpath:
if determine_ipynb(args.fname):
notebook_cleaner(args.fname)
else:
print( 'ERROR: This is not an IPython notebook' )
sys.exit()
#Else we start walking from the starting fpath
else:
for root, dirs, files in os.walk(args.fname):
#Make sure that the final folder in the root is not a .ipynb_checkpoints folder
if os.path.basename(root) != '.ipynb_checkpoints':
#Go through each file to find any ipynb files
for tfile in files:
#If it's a notebook let's clean it
if determine_ipynb(tfile):
notebook_cleaner( os.path.join(root, tfile) )
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="python %prog filepath")
parser.add_argument('fname', type=str,
help="The notebook filename or filepath when the fpath option is invoked")
parser.add_argument('--fpath', default=False, action='store_true',
help="Crawl directories from a supplied starting point, converting all notebooks found")
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment