Skip to content

Instantly share code, notes, and snippets.

@maksimKorzh
Created November 28, 2019 17:18
Show Gist options
  • Save maksimKorzh/d4d17cfb67c479d2478caceff872f36b to your computer and use it in GitHub Desktop.
Save maksimKorzh/d4d17cfb67c479d2478caceff872f36b to your computer and use it in GitHub Desktop.
A simple and minimalist CLI browser
# Libraries
import requests
from bs4 import BeautifulSoup
class Browser:
# Make HTTP GET request
def fetch(self, url):
# Try to fetch URL
try:
# Obtain HTML response
response = requests.get(url)
# Return response object
return response
# Return none if failed
except:
return None
# Parse content and pretty print it to screen
def parse(self, html):
# Parse the content
content = BeautifulSoup(html, 'lxml')
# Loop over content tags recursively
for tag in content.recursiveChildGenerator():
# If found leaf node tag
if tag.name is None:
# And it's not empty
if not tag.isspace():
# Extract parent tag name
parent = tag.parent.name
# Ignore 'script', 'style' tags
if parent != 'script' and parent != 'style' and tag != 'html':
indent = ''
# Calculate indentaion for each tag
for space in range(0, len([tag for tag in tag.parents])):
indent += ' '
# Distinguish links from other tags
if parent == 'a':
print(indent + '[' + tag.strip() + '] ' + '<' + tag.parent['href'] + '>')
else:
print(indent + tag.strip())
# Download the file
def download(self, filename, response):
# Open file stream to write raw bytes
with open(filename, 'wb') as download_file:
# Loop over respone content
for chunk in response.iter_content(chunk_size=128):
# Write a chunk of data
download_file.write(chunk)
# Run browser loop
def run(self):
while True:
# Init user input string
user_input = input('browse > ')
# Parse user input string
try:
# Extract user command
command = user_input.split()[0]
# Extract user URL
url = user_input.split()[1]
# Case "fetch" command
if command == 'fetch':
print('Fetching URL: %s' % url)
# Fetch user URL
response = self.fetch(url)
# Parse URL
try:
self.parse(response.text)
except:
print('Failed to fetch URL: %s' % url)
continue
# Case "download" command
elif command == 'download':
try:
# Init user filename
filename = user_input.split()[2]
# Fetch user URL
response = self.fetch(url)
# Download data from user URL
try:
print('Downloading data from URL: %s | PATH: "%s"' % (url, filename))
# Download file
self.download(filename, response)
print('Done')
except:
print('Failed to download data from URL: %s' % url)
continue
except:
print('Download URL example: download https://github.com/maksimKorzh/one-time-scrapers/archive/master.zip ots.zip')
continue
except:
if user_input == 'exit':
break
else:
print('Usage:')
print('Fetch URL example: fetch https://www.google.com/')
print('Download URL example: download https://github.com/maksimKorzh/one-time-scrapers/archive/master.zip ots.zip')
if __name__ == '__main__':
browser = Browser()
browser.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment