meta-ks/NSE_utils.py

## NSE_utils.py
import os
import time
import io
import requests


nse_urls = {
                'N50':'',
                'N100':'',
                'N200':'',
                'N500':'https://archives.nseindia.com/content/indices/ind_nifty500list.csv',
                'TOP1000':'https://static.nseindia.com//s3fs-public/inline-files/MCAP_31032020_TOP1000.xlsx', #older data
                'N_ALL':'https://static.nseindia.com//s3fs-public/inline-files/MCAP31122022_0.xlsx',          #recent
            }


def download_nse_data(nse_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache',
    }

    res = requests.get(nse_url, headers=headers)
    if(res.status_code == 200):
        print(f'[*]Got <200> at: {nse_url}')

    return res


def get_BSE_all_active_equities():
    bse_all_scrips_url = 'https://api.bseindia.com/BseIndiaAPI/api/LitsOfScripCSVDownload/w'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        # 'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Referer': 'https://www.bseindia.com/',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-site',
        'Sec-Fetch-User': '?1',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache',
    }

    params = {
        'segment': 'Equity',
        'status': 'Active',
        'industry': '',
        'Group': '',
        'Scripcode': '',
    }

    csv_res = requests.get(bse_all_scrips_url, params=params, headers=headers)

    if(csv_res.status_code == 200):
        print(f'[*]Got <200> at: {bse_all_scrips_url}')
        df = pd.read_csv(io.StringIO(csv_res.text))
    else:
        print(f'[-]Got {csv_res.status_code} at: {bse_all_scrips_url}.....')
        df = None

    return df


def nse_scrip_info(scrip):
    #Need to debug this routine; nse response is unpredictable,
    #Better use jugaad-data or nsepy and get the quote page
    print(f'--------------------I AM NOT WORKING PROPERLY--------------')
    nse_equity_url = 'https://www.nseindia.com/api/quote-equity'
    #Somehow using referer as variable is not wroking, omitting for now
    # referer_url = f'https://www.nseindia.com/get-quotes/equity?symbol={scrip}'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'Connection': 'keep-alive',
        # 'Referer': referer_url,
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache',
    }
    params = {'symbol': scrip}
    try:
        #Cookies are omitted as of now. Can add later if req
        res = requests.get(nse_equity_url, params=params, headers=headers)
        json_d = res.json()
    except Exception as e:
        print(f'[-]Failed to fetch {scrip}. Got <{res.status_code}>: {e}: {res.content}')
        return None

    return json_d


def get_N500():
    csv_res = download_nse_data(nse_urls['N500'])
    df = pd.read_csv(io.StringIO(csv_res.text))
    # df = pd.read_csv(nse_urls['N500'])
    # print('\nN500 data:\n--------------------------\n')
    # print(df)
    return df


def get_Ntop1000():
    xlx_res = download_nse_data(nse_urls['TOP1000'])
    with io.BytesIO(xlx_res.content) as fh:
        df = pd.io.excel.read_excel(fh)
    # df = pd.read_excel(nse_urls['TOP1000'])
    # print('\nN_TOP_1000 data:\n--------------------------\n')
    # print(df)
    return df


def get_nifty_all():
    xlx_res = download_nse_data(nse_urls['N_ALL'])
    with io.BytesIO(xlx_res.content) as fh:
        df = pd.io.excel.read_excel(fh)
    # df = pd.read_excel(nse_urls['N_ALL'])
    # print('\nN_All data:\n--------------------------\n')
    # print(df)
    return df


def download_equity_data_from_nse(nifty_equities=None, top_n=1000, refresh_data=False, pickle_df=False):
    from jugaad_data.nse import NSELive
    from utils.serializer_utils import unpickle_py_obj, pickle_py_obj

    if nifty_equities is not None:
        n_all = nifty_equities
    else:
        n_all = get_nifty_all()

    pkl_dest_path = 'NSE_data/'

    if not os.path.exists(pkl_dest_path):
        print(f'[*]Creating destination dir: {pkl_dest_path}')
        os.makedirs(pkl_dest_path)

    pkl_files = os.listdir(pkl_dest_path)

    top_scrips = list(n_all['Symbol'][:top_n])

    n = NSELive()
    rows_list = []

    for ix,scrip in enumerate(top_scrips):
        fp = f'NSE_data/{scrip}'
        empty_pkl = False

        try:
            if not refresh_data:
                # print(f'[{ix}]Unpickling: {scrip}')
                q = unpickle_py_obj(fp)
                if q.get('msg'):
                    print(f'[-]Missing data: {scrip}: {q}')
                    # empty_pkl = True
        except Exception as e:
            print(f'[-]Error: {e}')
            empty_pkl = True

        if empty_pkl or refresh_data or scrip not in pkl_files:
            q = n.stock_quote(scrip)
            pickle_py_obj(q, fp, print_stat=False)
            print(f'[{ix+1}/{top_n}]Pickled: {scrip}')
            time.sleep(1)

        #The list is in order of MCap
        mcap_rank = ix+1
        nifty_slice = f'Nifty {50*(ix//50+1)}'
        try:
            mcap_in_1000cr = round(n_all.query(f"Symbol=='{scrip}'")['Market capitalization as on December 30, 2022\n(Rs in Lakhs)'].item()/100000, 3)
        except Exception as e:
            mcap_in_1000cr = 0
            print(f'[-]Error in MCap: {e}')

        info = q.get('info', {})
        sym = scrip

        # sym = info.get('symbol')

        meta = q.get('metadata', {})
        lst_date = meta.get('listingDate')
        sector_index = meta.get('pdSectorInd')

        sec_info = q.get('securityInfo', {})
        is_derivatives = sec_info.get('derivatives')

        p_info = q.get('priceInfo', {})
        pband = p_info.get('pPriceBand')
        if pband:
            pband = int(pband.replace('No Band', '0'))
        lowerCP = p_info.get('lowerCP')
        upperCP = p_info.get('upperCP')

        ind_info = q.get('industryInfo', {})
        macro = ind_info.get('macro')
        sector = ind_info.get('sector')
        industry = ind_info.get('industry')
        basic_ind = ind_info.get('basicIndustry')

        eq_dict = {
            'Symbol':sym, 'Nifty Slice': f'{mcap_rank}: {nifty_slice}',
            # 'rank':mcap_rank, 'nifty slice':nifty_slice,
            'Macro':macro, 'Sector':sector, 'Industry':industry, 'Basic Industry':basic_ind,
            'Sector Index': sector_index, 'Market Cap (in 1000Cr)':mcap_in_1000cr,
            'Price Band':pband, 'MCap Rank': mcap_rank, 'Nifty x':nifty_slice
        }

        rows_list.append(eq_dict)

    eq_df = pd.DataFrame(rows_list)
    if pickle_df:
        pickle_py_obj(eq_df, EQUITIES_NSE_DATA_FILEPATH)
    # pd.set_option('display.max_rows', 20)

    return eq_df
	import os
	import time
	import io
	import requests


	nse_urls = {
	'N50':'',
	'N100':'',
	'N200':'',
	'N500':'https://archives.nseindia.com/content/indices/ind_nifty500list.csv',
	'TOP1000':'https://static.nseindia.com//s3fs-public/inline-files/MCAP_31032020_TOP1000.xlsx', #older data
	'N_ALL':'https://static.nseindia.com//s3fs-public/inline-files/MCAP31122022_0.xlsx', #recent
	}


	def download_nse_data(nse_url):
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.5',
	'DNT': '1',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1',
	'Sec-Fetch-Dest': 'document',
	'Sec-Fetch-Mode': 'navigate',
	'Sec-Fetch-Site': 'none',
	'Sec-Fetch-User': '?1',
	'Pragma': 'no-cache',
	'Cache-Control': 'no-cache',
	}

	res = requests.get(nse_url, headers=headers)
	if(res.status_code == 200):
	print(f'[*]Got <200> at: {nse_url}')

	return res


	def get_BSE_all_active_equities():
	bse_all_scrips_url = 'https://api.bseindia.com/BseIndiaAPI/api/LitsOfScripCSVDownload/w'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.5',
	# 'Accept-Encoding': 'gzip, deflate, br',
	'Connection': 'keep-alive',
	'Referer': 'https://www.bseindia.com/',
	'Upgrade-Insecure-Requests': '1',
	'Sec-Fetch-Dest': 'document',
	'Sec-Fetch-Mode': 'navigate',
	'Sec-Fetch-Site': 'same-site',
	'Sec-Fetch-User': '?1',
	'Pragma': 'no-cache',
	'Cache-Control': 'no-cache',
	}

	params = {
	'segment': 'Equity',
	'status': 'Active',
	'industry': '',
	'Group': '',
	'Scripcode': '',
	}

	csv_res = requests.get(bse_all_scrips_url, params=params, headers=headers)

	if(csv_res.status_code == 200):
	print(f'[*]Got <200> at: {bse_all_scrips_url}')
	df = pd.read_csv(io.StringIO(csv_res.text))
	else:
	print(f'[-]Got {csv_res.status_code} at: {bse_all_scrips_url}.....')
	df = None

	return df


	def nse_scrip_info(scrip):
	#Need to debug this routine; nse response is unpredictable,
	#Better use jugaad-data or nsepy and get the quote page
	print(f'--------------------I AM NOT WORKING PROPERLY--------------')
	nse_equity_url = 'https://www.nseindia.com/api/quote-equity'
	#Somehow using referer as variable is not wroking, omitting for now
	# referer_url = f'https://www.nseindia.com/get-quotes/equity?symbol={scrip}'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0',
	'Accept': '/',
	'Accept-Language': 'en-US,en;q=0.5',
	'Connection': 'keep-alive',
	# 'Referer': referer_url,
	'Sec-Fetch-Dest': 'empty',
	'Sec-Fetch-Mode': 'cors',
	'Sec-Fetch-Site': 'same-origin',
	'Pragma': 'no-cache',
	'Cache-Control': 'no-cache',
	}
	params = {'symbol': scrip}
	try:
	#Cookies are omitted as of now. Can add later if req
	res = requests.get(nse_equity_url, params=params, headers=headers)
	json_d = res.json()
	except Exception as e:
	print(f'[-]Failed to fetch {scrip}. Got <{res.status_code}>: {e}: {res.content}')
	return None

	return json_d


	def get_N500():
	csv_res = download_nse_data(nse_urls['N500'])
	df = pd.read_csv(io.StringIO(csv_res.text))
	# df = pd.read_csv(nse_urls['N500'])
	# print('\nN500 data:\n--------------------------\n')
	# print(df)
	return df


	def get_Ntop1000():
	xlx_res = download_nse_data(nse_urls['TOP1000'])
	with io.BytesIO(xlx_res.content) as fh:
	df = pd.io.excel.read_excel(fh)
	# df = pd.read_excel(nse_urls['TOP1000'])
	# print('\nN_TOP_1000 data:\n--------------------------\n')
	# print(df)
	return df


	def get_nifty_all():
	xlx_res = download_nse_data(nse_urls['N_ALL'])
	with io.BytesIO(xlx_res.content) as fh:
	df = pd.io.excel.read_excel(fh)
	# df = pd.read_excel(nse_urls['N_ALL'])
	# print('\nN_All data:\n--------------------------\n')
	# print(df)
	return df


	def download_equity_data_from_nse(nifty_equities=None, top_n=1000, refresh_data=False, pickle_df=False):
	from jugaad_data.nse import NSELive
	from utils.serializer_utils import unpickle_py_obj, pickle_py_obj

	if nifty_equities is not None:
	n_all = nifty_equities
	else:
	n_all = get_nifty_all()

	pkl_dest_path = 'NSE_data/'

	if not os.path.exists(pkl_dest_path):
	print(f'[*]Creating destination dir: {pkl_dest_path}')
	os.makedirs(pkl_dest_path)

	pkl_files = os.listdir(pkl_dest_path)

	top_scrips = list(n_all['Symbol'][:top_n])

	n = NSELive()
	rows_list = []

	for ix,scrip in enumerate(top_scrips):
	fp = f'NSE_data/{scrip}'
	empty_pkl = False

	try:
	if not refresh_data:
	# print(f'[{ix}]Unpickling: {scrip}')
	q = unpickle_py_obj(fp)
	if q.get('msg'):
	print(f'[-]Missing data: {scrip}: {q}')
	# empty_pkl = True
	except Exception as e:
	print(f'[-]Error: {e}')
	empty_pkl = True

	if empty_pkl or refresh_data or scrip not in pkl_files:
	q = n.stock_quote(scrip)
	pickle_py_obj(q, fp, print_stat=False)
	print(f'[{ix+1}/{top_n}]Pickled: {scrip}')
	time.sleep(1)

	#The list is in order of MCap
	mcap_rank = ix+1
	nifty_slice = f'Nifty {50*(ix//50+1)}'
	try:
	mcap_in_1000cr = round(n_all.query(f"Symbol=='{scrip}'")['Market capitalization as on December 30, 2022\n(Rs in Lakhs)'].item()/100000, 3)
	except Exception as e:
	mcap_in_1000cr = 0
	print(f'[-]Error in MCap: {e}')

	info = q.get('info', {})
	sym = scrip

	# sym = info.get('symbol')

	meta = q.get('metadata', {})
	lst_date = meta.get('listingDate')
	sector_index = meta.get('pdSectorInd')

	sec_info = q.get('securityInfo', {})
	is_derivatives = sec_info.get('derivatives')

	p_info = q.get('priceInfo', {})
	pband = p_info.get('pPriceBand')
	if pband:
	pband = int(pband.replace('No Band', '0'))
	lowerCP = p_info.get('lowerCP')
	upperCP = p_info.get('upperCP')

	ind_info = q.get('industryInfo', {})
	macro = ind_info.get('macro')
	sector = ind_info.get('sector')
	industry = ind_info.get('industry')
	basic_ind = ind_info.get('basicIndustry')

	eq_dict = {
	'Symbol':sym, 'Nifty Slice': f'{mcap_rank}: {nifty_slice}',
	# 'rank':mcap_rank, 'nifty slice':nifty_slice,
	'Macro':macro, 'Sector':sector, 'Industry':industry, 'Basic Industry':basic_ind,
	'Sector Index': sector_index, 'Market Cap (in 1000Cr)':mcap_in_1000cr,
	'Price Band':pband, 'MCap Rank': mcap_rank, 'Nifty x':nifty_slice
	}

	rows_list.append(eq_dict)

	eq_df = pd.DataFrame(rows_list)
	if pickle_df:
	pickle_py_obj(eq_df, EQUITIES_NSE_DATA_FILEPATH)
	# pd.set_option('display.max_rows', 20)

	return eq_df