Skip to content

Instantly share code, notes, and snippets.

@narphorium
Created November 18, 2022 07:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save narphorium/648b7138da02539d11f84c3482544b9d to your computer and use it in GitHub Desktop.
Save narphorium/648b7138da02539d11f84c3482544b9d to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import json
def strip_empty_lines(code):
return '\n'.join([
line for line in code.split('\n')
if line.strip() != ''])
def convert_html(html):
soup = BeautifulSoup(html, 'lxml')
notebook = {'nbformat': 4, 'nbformat_minor': 1, 'cells': [], 'metadata': {}}
for el in soup.findAll(['div', 'p', 'figure']):
if el.name == 'div' and 'code-sample' in el.attrs['class']:
notebook['cells'].append({
'metadata': {},
'outputs': [],
'source': [strip_empty_lines(el.get_text())],
'execution_count': None,
'cell_type': 'code'
})
elif el.name == 'p':
notebook['cells'].append({
'metadata': {},
'source': [str(el)],
'cell_type': 'markdown'
})
elif el.name == 'figure':
code = str(el)
code = code.replace(
'src="./',
'src="https://third-bit.com/sdxpy/interpreter/')
notebook['cells'].append({
'metadata': {},
'source': [code],
'cell_type': 'markdown'
})
return notebook
html = open("ch3.html").read()
notebook = convert_html(html)
with open('notebook.ipynb', 'w') as output:
output.write(json.dumps(notebook, indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment