maksimKorzh/browser.py

## browser.py
# Libraries
import requests
from bs4 import BeautifulSoup

class Browser:
    # Make HTTP GET request
    def fetch(self, url):
        # Try to fetch URL
        try:
            # Obtain HTML response
            response = requests.get(url)

            # Return response object
            return response

        # Return none if failed
        except:
            return None

    # Parse content and pretty print it to screen
    def parse(self, html):
        # Parse the content
        content = BeautifulSoup(html, 'lxml')

        # Loop over content tags recursively
        for tag in content.recursiveChildGenerator():
            # If found leaf node tag
            if tag.name is None:
                # And it's not empty
                if not tag.isspace():
                    # Extract parent tag name
                    parent = tag.parent.name

                    # Ignore 'script', 'style' tags
                    if parent != 'script' and parent != 'style' and tag != 'html':
                        indent = ''

                        # Calculate indentaion for each tag
                        for space in range(0, len([tag for tag in tag.parents])):
                            indent += ' '

                        # Distinguish links from other tags
                        if parent == 'a':
                            print(indent + '[' + tag.strip() + '] ' + '<' + tag.parent['href'] + '>')
                        else:
                            print(indent + tag.strip())

    # Download the file
    def download(self, filename, response):
        # Open file stream to write raw bytes
        with open(filename, 'wb') as download_file:

            # Loop over respone content
            for chunk in response.iter_content(chunk_size=128):
                # Write a chunk of data
                download_file.write(chunk)

    # Run browser loop
    def run(self):
        while True:
            # Init user input string
            user_input = input('browse > ')

            # Parse user input string
            try:
                # Extract user command
                command = user_input.split()[0]

                # Extract user URL
                url = user_input.split()[1]

                # Case "fetch" command
                if command == 'fetch':
                    print('Fetching URL: %s' % url)
                    # Fetch user URL
                    response = self.fetch(url)

                    # Parse URL
                    try:
                        self.parse(response.text)
                    except:
                        print('Failed to fetch URL: %s' % url)
                        continue

                # Case "download" command
                elif command == 'download':
                    try:
                        # Init user filename
                        filename = user_input.split()[2]

                        # Fetch user URL
                        response = self.fetch(url)

                        # Download data from user URL
                        try:
                            print('Downloading data from URL: %s | PATH: "%s"' % (url, filename))
                            # Download file
                            self.download(filename, response)
                            print('Done')
                        except:
                            print('Failed to download data from URL: %s' % url)
                            continue

                    except:
                        print('Download URL example: download https://github.com/maksimKorzh/one-time-scrapers/archive/master.zip ots.zip')
                        continue


            except:
                if user_input == 'exit':
                    break

                else:
                    print('Usage:')
                    print('Fetch URL example: fetch https://www.google.com/')
                    print('Download URL example: download https://github.com/maksimKorzh/one-time-scrapers/archive/master.zip ots.zip')


if __name__ == '__main__':
    browser = Browser()
    browser.run()
	# Libraries
	import requests
	from bs4 import BeautifulSoup

	class Browser:
	# Make HTTP GET request
	def fetch(self, url):
	# Try to fetch URL
	try:
	# Obtain HTML response
	response = requests.get(url)

	# Return response object
	return response

	# Return none if failed
	except:
	return None

	# Parse content and pretty print it to screen
	def parse(self, html):
	# Parse the content
	content = BeautifulSoup(html, 'lxml')

	# Loop over content tags recursively
	for tag in content.recursiveChildGenerator():
	# If found leaf node tag
	if tag.name is None:
	# And it's not empty
	if not tag.isspace():
	# Extract parent tag name
	parent = tag.parent.name

	# Ignore 'script', 'style' tags
	if parent != 'script' and parent != 'style' and tag != 'html':
	indent = ''

	# Calculate indentaion for each tag
	for space in range(0, len([tag for tag in tag.parents])):
	indent += ' '

	# Distinguish links from other tags
	if parent == 'a':
	print(indent + '[' + tag.strip() + '] ' + '<' + tag.parent['href'] + '>')
	else:
	print(indent + tag.strip())

	# Download the file
	def download(self, filename, response):
	# Open file stream to write raw bytes
	with open(filename, 'wb') as download_file:

	# Loop over respone content
	for chunk in response.iter_content(chunk_size=128):
	# Write a chunk of data
	download_file.write(chunk)

	# Run browser loop
	def run(self):
	while True:
	# Init user input string
	user_input = input('browse > ')

	# Parse user input string
	try:
	# Extract user command
	command = user_input.split()[0]

	# Extract user URL
	url = user_input.split()[1]

	# Case "fetch" command
	if command == 'fetch':
	print('Fetching URL: %s' % url)
	# Fetch user URL
	response = self.fetch(url)

	# Parse URL
	try:
	self.parse(response.text)
	except:
	print('Failed to fetch URL: %s' % url)
	continue

	# Case "download" command
	elif command == 'download':
	try:
	# Init user filename
	filename = user_input.split()[2]

	# Fetch user URL
	response = self.fetch(url)

	# Download data from user URL
	try:
	print('Downloading data from URL: %s \| PATH: "%s"' % (url, filename))
	# Download file
	self.download(filename, response)
	print('Done')
	except:
	print('Failed to download data from URL: %s' % url)
	continue

	except:
	print('Download URL example: download https://github.com/maksimKorzh/one-time-scrapers/archive/master.zip ots.zip')
	continue


	except:
	if user_input == 'exit':
	break

	else:
	print('Usage:')
	print('Fetch URL example: fetch https://www.google.com/')
	print('Download URL example: download https://github.com/maksimKorzh/one-time-scrapers/archive/master.zip ots.zip')


	if __name__ == '__main__':
	browser = Browser()
	browser.run()