Skip to content

Instantly share code, notes, and snippets.

@aphi
Last active February 19, 2022 17:51
Show Gist options
  • Save aphi/b68b1fd585db931e455e7285c7702166 to your computer and use it in GitHub Desktop.
Save aphi/b68b1fd585db931e455e7285c7702166 to your computer and use it in GitHub Desktop.
Extract data from Investing.com for use within investpy
"""
Extract data from Investing.com for use within investpy
Output is comma-separated and can be copied directly into stocks.csv to add missing stocks
$ python search_investing_urls.py
country,name,full_name,tag,isin,id,currency,symbol
united states,Airbnb,Airbnb,airbnb-inc,US0090661010,1167744,USD,ABNB
united kingdom,Deliveroo Holdings,Deliveroo Holdings PLC,deliveroo-holdings,GB00BNC5T391,1172028,GBP,ROO
united states,Nio A ADR,Nio Inc Class A ADR,nio-inc,US62914V1061,1096032,USD,NIO
"""
import re
from collections import Counter, OrderedDict
from urllib.request import Request, urlopen
# Enter URL list here
URLS = [
'https://www.investing.com/equities/airbnb-inc',
'https://www.investing.com/equities/deliveroo-holdings',
'https://www.investing.com/equities/nio-inc'
]
def select_match(matches):
""" Most frequent element, or first in case of tie """
if not matches:
return ''
return max(matches, key=Counter(matches).get)
regexs = OrderedDict(
country=r'market\\"\:\{\\"name\\"\:\\"([a-zA-Z0-9- ]*)',
name=r'name\\\\\\"\:\{\\\\\\"shortName\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
full_name=r'underlyingName\\\\\\"\:\\\\\\"([a-zA-Z0-9- ]*)',
tag=r'query"\:\{"equity"\:\["([a-zA-Z0-9- ]*)',
isin=r'isin\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
id=r'instrument_id\\"\:\\"([a-zA-Z0-9- ]*)',
currency=r'currency\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
symbol=r'SectionInstrument_Ticker\\"\:\\"([a-zA-Z0-9- ]*)',
)
print('country,name,full_name,tag,isin,id,currency,symbol')
for url in URLS:
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
page_text = urlopen(req).read().decode('utf-8')
row = []
for search_field, regex in regexs.items():
pattern = re.compile(regex)
matches = pattern.findall(page_text)
match = select_match(matches)
if search_field=='country': # special case
match = match.lower()
row.append(match)
print(','.join(row))
@aphi
Copy link
Author

aphi commented Feb 19, 2022

Usage:

$ python search_investing_urls.py 
country,name,full_name,tag,isin,id,currency,symbol
united states,Airbnb,Airbnb,airbnb-inc,US0090661010,1167744,USD,ABNB
united kingdom,Deliveroo Holdings,Deliveroo Holdings PLC,deliveroo-holdings,GB00BNC5T391,1172028,GBP,ROO
united states,Nio A ADR,Nio Inc Class A ADR,nio-inc,US62914V1061,1096032,USD,NIO

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment