Skip to content

Instantly share code, notes, and snippets.

@MiniXC
Last active May 25, 2020 19:48
Show Gist options
  • Save MiniXC/14994d8f9eb382ed0f27457f3b313286 to your computer and use it in GitHub Desktop.
Save MiniXC/14994d8f9eb382ed0f27457f3b313286 to your computer and use it in GitHub Desktop.
from urllib.request import urlopen, Request
from urllib.error import HTTPError
import logging
from lxml.html import parse
def yahoo_scrape(symbol_string):
"""
returns the string sector for a given symbol
"""
sector_string = ''
url = f'https://finance.yahoo.com/quote/{symbol_string}/profile'
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\
AppleWebKit/537.36 (KHTML, like Gecko)\
Chrome/67.0.3396.99 Safari/537.36'
headers = {'User-Agent':user_agent}
request = Request(url, None, headers)
try:
response = urlopen(request)
tree = parse(response)
sector_string = tree\
.xpath('//*[@id="Col1-0-Profile-Proxy"]/section/div[1]/div/div/p[2]/span[2]')[0].text
company_name = tree.xpath('//div[@id="quote-header-info"]/div/div/div/h1')[0].text.split('(')[0]
exchange_and_curr = tree.xpath('//div[@id="quote-header-info"]/div/div[1]/div[2]/span')[0].text
exchange = exchange_and_curr.split(' - ')[0]
currency = exchange_and_curr.split(' ')[-1]
except HTTPError as err:
return False
except IndexError:
# no sector found
sector_string = None
return False
return [sector_string, company_name, exchange, currency]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment