aphi/search_investing_urls.py

## search_investing_urls.py
"""
Extract data from Investing.com for use within investpy
Output is comma-separated and can be copied directly into stocks.csv to add missing stocks

$ python search_investing_urls.py
country,name,full_name,tag,isin,id,currency,symbol
united states,Airbnb,Airbnb,airbnb-inc,US0090661010,1167744,USD,ABNB
united kingdom,Deliveroo Holdings,Deliveroo Holdings PLC,deliveroo-holdings,GB00BNC5T391,1172028,GBP,ROO
united states,Nio A ADR,Nio Inc Class A ADR,nio-inc,US62914V1061,1096032,USD,NIO
"""

import re
from collections import Counter, OrderedDict
from urllib.request import Request, urlopen

# Enter URL list here
URLS = [
    'https://www.investing.com/equities/airbnb-inc',
    'https://www.investing.com/equities/deliveroo-holdings',
    'https://www.investing.com/equities/nio-inc'
]

def select_match(matches):
    """ Most frequent element, or first in case of tie """
    if not matches:
        return ''
    return max(matches, key=Counter(matches).get)

regexs = OrderedDict(
    country=r'market\\"\:\{\\"name\\"\:\\"([a-zA-Z0-9- ]*)',
    name=r'name\\\\\\"\:\{\\\\\\"shortName\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
    full_name=r'underlyingName\\\\\\"\:\\\\\\"([a-zA-Z0-9- ]*)',
    tag=r'query"\:\{"equity"\:\["([a-zA-Z0-9- ]*)',
    isin=r'isin\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
    id=r'instrument_id\\"\:\\"([a-zA-Z0-9- ]*)',
    currency=r'currency\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
    symbol=r'SectionInstrument_Ticker\\"\:\\"([a-zA-Z0-9- ]*)',
)

print('country,name,full_name,tag,isin,id,currency,symbol')
for url in URLS:
    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    page_text = urlopen(req).read().decode('utf-8')

    row = []
    for search_field, regex in regexs.items():
        pattern = re.compile(regex)

        matches = pattern.findall(page_text)
        match = select_match(matches)

        if search_field=='country': # special case
            match = match.lower()

        row.append(match)

    print(','.join(row))
	"""
	Extract data from Investing.com for use within investpy
	Output is comma-separated and can be copied directly into stocks.csv to add missing stocks

	$ python search_investing_urls.py
	country,name,full_name,tag,isin,id,currency,symbol
	united states,Airbnb,Airbnb,airbnb-inc,US0090661010,1167744,USD,ABNB
	united kingdom,Deliveroo Holdings,Deliveroo Holdings PLC,deliveroo-holdings,GB00BNC5T391,1172028,GBP,ROO
	united states,Nio A ADR,Nio Inc Class A ADR,nio-inc,US62914V1061,1096032,USD,NIO
	"""

	import re
	from collections import Counter, OrderedDict
	from urllib.request import Request, urlopen

	# Enter URL list here
	URLS = [
	'https://www.investing.com/equities/airbnb-inc',
	'https://www.investing.com/equities/deliveroo-holdings',
	'https://www.investing.com/equities/nio-inc'
	]

	def select_match(matches):
	""" Most frequent element, or first in case of tie """
	if not matches:
	return ''
	return max(matches, key=Counter(matches).get)

	regexs = OrderedDict(
	country=r'market\\"\:\{\\"name\\"\:\\"([a-zA-Z0-9- ]*)',
	name=r'name\\\\\\"\:\{\\\\\\"shortName\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
	full_name=r'underlyingName\\\\\\"\:\\\\\\"([a-zA-Z0-9- ]*)',
	tag=r'query"\:\{"equity"\:\["([a-zA-Z0-9- ]*)',
	isin=r'isin\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
	id=r'instrument_id\\"\:\\"([a-zA-Z0-9- ]*)',
	currency=r'currency\\\\\\":\\\\\\"([a-zA-Z0-9- ]*)',
	symbol=r'SectionInstrument_Ticker\\"\:\\"([a-zA-Z0-9- ]*)',
	)

	print('country,name,full_name,tag,isin,id,currency,symbol')
	for url in URLS:
	req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
	page_text = urlopen(req).read().decode('utf-8')

	row = []
	for search_field, regex in regexs.items():
	pattern = re.compile(regex)

	matches = pattern.findall(page_text)
	match = select_match(matches)

	if search_field=='country': # special case
	match = match.lower()

	row.append(match)

	print(','.join(row))