Skip to content

Instantly share code, notes, and snippets.

@reznov11
Forked from dmattera/soup_prettify2.py
Created December 21, 2018 18:54
Show Gist options
  • Save reznov11/3540afcb20f53a046498a8d99d821228 to your computer and use it in GitHub Desktop.
Save reznov11/3540afcb20f53a046498a8d99d821228 to your computer and use it in GitHub Desktop.
A simple way set custom indentation levels when using BeautifulSoup's soup.prettify()
# Python == 3.6.2
# bs4 == 4.6.0
# The current version of BeautifulSoup's soup.prettify() function only allows for
# an indentation level = to 1 space. This is a simple, reliable way to allow for the use
# of any indentation level you wish.
import requests
from bs4 import BeautifulSoup
url = "https://www.google.com"
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
def soup_prettify2(soup, desired_indent): #where desired_indent is number of spaces as an int()
pretty_soup = str()
previous_indent = 0
for line in soup.prettify().split("\n"): # iterate over each line of a prettified soup
current_indent = str(line).find("<") # returns the index for the opening html tag '<'
# which is also represents the number of spaces in the lines indentation
if current_indent == -1 or current_indent > previous_indent + 2:
current_indent = previous_indent + 1
# str.find() will equal -1 when no '<' is found. This means the line is some kind
# of text or script instead of an HTML element and should be treated as a child
# of the previous line. also, current_indent should never be more than previous + 1.
previous_indent = current_indent
pretty_soup += write_new_line(line, current_indent, desired_indent)
return pretty_soup
def write_new_line(line, current_indent, desired_indent):
new_line = ""
spaces_to_add = (current_indent * desired_indent) - current_indent
if spaces_to_add > 0:
for i in range(spaces_to_add):
new_line += " "
new_line += str(line) + "\n"
return new_line
pretty_soup = soup_prettify2(soup, desired_indent=4)
print(pretty_soup)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment