MiniXC/yahoo_scrape.py

## yahoo_scrape.py
from urllib.request import urlopen, Request
from urllib.error import HTTPError
import logging
from lxml.html import parse

def yahoo_scrape(symbol_string):
    """
    returns the string sector for a given symbol
    """
    sector_string = ''
    url = f'https://finance.yahoo.com/quote/{symbol_string}/profile'
    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\
                  AppleWebKit/537.36 (KHTML, like Gecko)\
                  Chrome/67.0.3396.99 Safari/537.36'
    headers = {'User-Agent':user_agent}
    request = Request(url, None, headers)
    try:
        response = urlopen(request)
        tree = parse(response)
        sector_string = tree\
        .xpath('//*[@id="Col1-0-Profile-Proxy"]/section/div[1]/div/div/p[2]/span[2]')[0].text
        company_name = tree.xpath('//div[@id="quote-header-info"]/div/div/div/h1')[0].text.split('(')[0]
        exchange_and_curr = tree.xpath('//div[@id="quote-header-info"]/div/div[1]/div[2]/span')[0].text
        exchange = exchange_and_curr.split(' - ')[0]
        currency = exchange_and_curr.split(' ')[-1]
    except HTTPError as err:
        return False
    except IndexError:
        # no sector found
        sector_string = None
        return False
    return [sector_string, company_name, exchange, currency]
	from urllib.request import urlopen, Request
	from urllib.error import HTTPError
	import logging
	from lxml.html import parse

	def yahoo_scrape(symbol_string):
	"""
	returns the string sector for a given symbol
	"""
	sector_string = ''
	url = f'https://finance.yahoo.com/quote/{symbol_string}/profile'
	user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\
	AppleWebKit/537.36 (KHTML, like Gecko)\
	Chrome/67.0.3396.99 Safari/537.36'
	headers = {'User-Agent':user_agent}
	request = Request(url, None, headers)
	try:
	response = urlopen(request)
	tree = parse(response)
	sector_string = tree\
	.xpath('//*[@id="Col1-0-Profile-Proxy"]/section/div[1]/div/div/p[2]/span[2]')[0].text
	company_name = tree.xpath('//div[@id="quote-header-info"]/div/div/div/h1')[0].text.split('(')[0]
	exchange_and_curr = tree.xpath('//div[@id="quote-header-info"]/div/div[1]/div[2]/span')[0].text
	exchange = exchange_and_curr.split(' - ')[0]
	currency = exchange_and_curr.split(' ')[-1]
	except HTTPError as err:
	return False
	except IndexError:
	# no sector found
	sector_string = None
	return False
	return [sector_string, company_name, exchange, currency]