rep-movsd/money_control_dividends.py

## money_control_dividends.py
#!/usr/bin/python3

'''
This script parses the HTML file from https://www.moneycontrol.com/stocks/marketinfo/dividends_declared/index.php and
creates a CSV file with relevant data

Sheet will contain all upcoming dividends after the current date, and those whose percentage is declared
It calculates the net percentage earning per rupee invested if you buy the stock at current price and hold it till the record date

To run this script, you need to first create:
1. A file called scrip.json with empty dictionary "{}"
2. A folder called cache
3. Save the HTML output from the above URL into a file called input.html (use save as from browser or use curl)

The CSV it creates has formulae in it that need Google finance api to do the required calculations.
You must import this CSV into Google Sheets for it to work

Each HTML page the script fetches for individual stocks is saved in the cache folder to avoid repeated fetches
'''


import os.path
import sys
import json
import requests
import re
from datetime import datetime
from bs4 import BeautifulSoup

# Scrip file
with open('scrip.json') as f:
    dctScrips = json.load(f)


# Creates the basic data array with rows containing name, percent and record date
def parseInputHTML(sFile):

    # Open the input file
    with open(sFile) as f:
        sHTML = f.read()

    # Find the table element with class "dvdtbl"
    soup = BeautifulSoup(sHTML, 'html.parser')
    table = soup.find('table', class_='dvdtbl')

    # Initialize an empty list to store table data
    arrData = []

    # Create the basic data
    for row in table.find_all('tr')[2:]:  # Skip the first 2 rows (header)
        columns = row.find_all('td')
        if columns:
            # There are 5 columns in the table
            # Company name, Dividend type, Percent,Announcement, Record date, Ex Dividend date
            # if the date is in the past, ignore it, else store these into dctData
            sName = columns[0].text.strip()
            sPercent = columns[2].text.strip()
            sRecordDate = columns[4].text.strip()

            # Extract the scrips URL from the first column and save it
            elemLink = columns[0].find('a')
            sURL = 'https://www.moneycontrol.com/india' + elemLink.get('href')

            # if the date is in the past, ignore it, else store these into dctData
            dtNow = datetime.today()
            try:
                dtRecord = datetime.strptime(sRecordDate, "%d-%m-%Y")
            except ValueError:
                continue

            if dtRecord >= dtNow and sPercent != '0.00':
                arrData.append(
                    {
                        'name': sName,
                        'percent': sPercent,
                        'recordDate': dtRecord.strftime("%Y-%m-%d"),
                        'url': sURL,
                    }
                )

    return arrData


# Function to get the HTML for a scrip, from cached file if possible
def getScripHTML(sName, sURL):
    # Is there a file?
    print(f'Loading {sName}', file=sys.stderr)
    sFile = f'cache/{sName}.html'
    if os.path.exists(sFile):
        with open(sFile) as f:
            sHTML = f.read()
            return sHTML
    else:
        print(f'Fetching {sName}', file=sys.stderr)
        response = requests.get(sURL)
        if response.status_code == 200:
            with open(sFile, 'w') as f:
                f.write(response.text)
            return response.text

def getCodes(row):
    '''
        Gets the NSE and BSE codes for this row and plugs it into the scrip dictionary
    '''
    dctScrip = dctScrips.get(row['name'])
    if not 'BSE' in dctScrip or not 'NSE' in dctScrip:
        # Fetch the HTML for the scrip
        sHTML = getScripHTML(row['name'], row['url'])
        soup = BeautifulSoup(sHTML, 'html.parser')

        # Find the element with class "company_slider"
        div = soup.find('ul', class_='company_slider')
        if div:
            elBSE = div.find(string='BSE:')
            sBSE = elBSE.parent.nextSibling.nextSibling.text if elBSE else ''
            elNSE = div.find(string='NSE:')
            sNSE = elNSE.parent.nextSibling.nextSibling.text if elNSE else ''
            dctScrip['BSE'] = sBSE
            dctScrip['NSE'] = sNSE
            return True
    return False

def getFaceValue(row):
    '''
    Gets the face value for this scrip and plugs it into the scrip dictionary
    '''
    dctScrip = dctScrips.get(row['name'])
    if not 'FV' in dctScrip:
        sHTML = getScripHTML(row['name'], row['url'])

        # Look for the string 'var scid = "*"' in the HTML string
        rx = r'var scid = "([A-Z0-9]+)";'
        match = re.search(rx, sHTML)
        if match:
            sSCID = match.group(1)
            sExchange = 'nse' if dctScrip.get('NSE') else 'bse'
            sURL = f'https://priceapi.moneycontrol.com/pricefeed/{sExchange}/equitycash/{sSCID}'
            print(f'Fetch SCID {sSCID} for {row["name"]} - {sURL}', file=sys.stderr)

            response = requests.get(sURL, headers={'User-Agent': 'Mozilla/5.0'})
            print(response.status_code, file=sys.stderr)
            if response.status_code == 200:
                dctResponse = response.json()
                sFaceValue = dctResponse.get('data', {}).get('FV', '1.00')
                if sFaceValue:
                    dctScrip['FV'] = sFaceValue
                    return True
    return False

arrData = parseInputHTML('input.html')

# Iterate over the data and fetch the BSE, NSE code and face value
bChanged = False
for row in arrData:
    sName = row['name']
    if sName not in dctScrips:
        dctScrips[sName] = {}

    # Get the BSE and NSE code if needed
    bChanged = getCodes(row) or bChanged

    # Get the face value if needed
    bChanged = getFaceValue(row) or bChanged

    # print(f'{sName} - {dctScrips[sName]}')


# Write the scrip dictionary back to file if changed
if bChanged:
    with open('scrip.json', 'w') as f:
        json.dump(dctScrips, f, indent=4)


# Iterate over the data and create the output csv

#            A       B        C              D         E              F             G
arrOut = [[ 'Name', 'Codes', 'Record date', 'Div. %', 'Stock Price', 'Face value', 'Net Earning%']]

# Data starts from row 2
nRow = 2

def getCell(col, row):
    return f'{col}{row}'

for row in arrData:

    # These are relative to last column
    cellPct = getCell('D', nRow)
    cellPrice = getCell('E', nRow)
    cellFV = getCell('F', nRow)

    # Actual money earned per share is the percent times of face value of share
    # e.g. face value of 10 and 50% dividend means 5 rupees per share
    cellEarningPerShare = f'{cellPct} * {cellFV} / 100'

    # Money earned per rupee invested is (faceValue / price) * earning Per Share
    cellEarningPerRupee = f'({cellFV} / {cellPrice}) * {cellEarningPerShare}'

    # print(row)
    sName = row['name']
    dctScrip = dctScrips.get(sName)

    sGoogleCode = f'NSE:{dctScrip["NSE"]}' if dctScrip.get('NSE')    else f'BOM:{dctScrip["BSE"]}'
    cellPriceLookup = f'=GOOGLEFINANCE("{sGoogleCode}", "PRICE")'

    arrOut.append([
        sName,
        dctScrips[sName]['BSE'] + '/' + dctScrips[sName]['NSE'],
        row['recordDate'],
        row['percent'],
        cellPriceLookup,
        dctScrip['FV'],
        f'=ROUND({cellEarningPerRupee}, 2)'
    ])
    nRow += 1


for e in arrOut:
    sRow = '\t'.join(map(str, e))
    print(sRow)
	#!/usr/bin/python3

	'''
	This script parses the HTML file from https://www.moneycontrol.com/stocks/marketinfo/dividends_declared/index.php and
	creates a CSV file with relevant data

	Sheet will contain all upcoming dividends after the current date, and those whose percentage is declared
	It calculates the net percentage earning per rupee invested if you buy the stock at current price and hold it till the record date

	To run this script, you need to first create:
	1. A file called scrip.json with empty dictionary "{}"
	2. A folder called cache
	3. Save the HTML output from the above URL into a file called input.html (use save as from browser or use curl)

	The CSV it creates has formulae in it that need Google finance api to do the required calculations.
	You must import this CSV into Google Sheets for it to work

	Each HTML page the script fetches for individual stocks is saved in the cache folder to avoid repeated fetches
	'''


	import os.path
	import sys
	import json
	import requests
	import re
	from datetime import datetime
	from bs4 import BeautifulSoup

	# Scrip file
	with open('scrip.json') as f:
	dctScrips = json.load(f)


	# Creates the basic data array with rows containing name, percent and record date
	def parseInputHTML(sFile):

	# Open the input file
	with open(sFile) as f:
	sHTML = f.read()

	# Find the table element with class "dvdtbl"
	soup = BeautifulSoup(sHTML, 'html.parser')
	table = soup.find('table', class_='dvdtbl')

	# Initialize an empty list to store table data
	arrData = []

	# Create the basic data
	for row in table.find_all('tr')[2:]: # Skip the first 2 rows (header)
	columns = row.find_all('td')
	if columns:
	# There are 5 columns in the table
	# Company name, Dividend type, Percent,Announcement, Record date, Ex Dividend date
	# if the date is in the past, ignore it, else store these into dctData
	sName = columns[0].text.strip()
	sPercent = columns[2].text.strip()
	sRecordDate = columns[4].text.strip()

	# Extract the scrips URL from the first column and save it
	elemLink = columns[0].find('a')
	sURL = 'https://www.moneycontrol.com/india' + elemLink.get('href')

	# if the date is in the past, ignore it, else store these into dctData
	dtNow = datetime.today()
	try:
	dtRecord = datetime.strptime(sRecordDate, "%d-%m-%Y")
	except ValueError:
	continue

	if dtRecord >= dtNow and sPercent != '0.00':
	arrData.append(
	{
	'name': sName,
	'percent': sPercent,
	'recordDate': dtRecord.strftime("%Y-%m-%d"),
	'url': sURL,
	}
	)

	return arrData


	# Function to get the HTML for a scrip, from cached file if possible
	def getScripHTML(sName, sURL):
	# Is there a file?
	print(f'Loading {sName}', file=sys.stderr)
	sFile = f'cache/{sName}.html'
	if os.path.exists(sFile):
	with open(sFile) as f:
	sHTML = f.read()
	return sHTML
	else:
	print(f'Fetching {sName}', file=sys.stderr)
	response = requests.get(sURL)
	if response.status_code == 200:
	with open(sFile, 'w') as f:
	f.write(response.text)
	return response.text

	def getCodes(row):
	'''
	Gets the NSE and BSE codes for this row and plugs it into the scrip dictionary
	'''
	dctScrip = dctScrips.get(row['name'])
	if not 'BSE' in dctScrip or not 'NSE' in dctScrip:
	# Fetch the HTML for the scrip
	sHTML = getScripHTML(row['name'], row['url'])
	soup = BeautifulSoup(sHTML, 'html.parser')

	# Find the element with class "company_slider"
	div = soup.find('ul', class_='company_slider')
	if div:
	elBSE = div.find(string='BSE:')
	sBSE = elBSE.parent.nextSibling.nextSibling.text if elBSE else ''
	elNSE = div.find(string='NSE:')
	sNSE = elNSE.parent.nextSibling.nextSibling.text if elNSE else ''
	dctScrip['BSE'] = sBSE
	dctScrip['NSE'] = sNSE
	return True
	return False

	def getFaceValue(row):
	'''
	Gets the face value for this scrip and plugs it into the scrip dictionary
	'''
	dctScrip = dctScrips.get(row['name'])
	if not 'FV' in dctScrip:
	sHTML = getScripHTML(row['name'], row['url'])

	# Look for the string 'var scid = "*"' in the HTML string
	rx = r'var scid = "([A-Z0-9]+)";'
	match = re.search(rx, sHTML)
	if match:
	sSCID = match.group(1)
	sExchange = 'nse' if dctScrip.get('NSE') else 'bse'
	sURL = f'https://priceapi.moneycontrol.com/pricefeed/{sExchange}/equitycash/{sSCID}'
	print(f'Fetch SCID {sSCID} for {row["name"]} - {sURL}', file=sys.stderr)

	response = requests.get(sURL, headers={'User-Agent': 'Mozilla/5.0'})
	print(response.status_code, file=sys.stderr)
	if response.status_code == 200:
	dctResponse = response.json()
	sFaceValue = dctResponse.get('data', {}).get('FV', '1.00')
	if sFaceValue:
	dctScrip['FV'] = sFaceValue
	return True
	return False

	arrData = parseInputHTML('input.html')

	# Iterate over the data and fetch the BSE, NSE code and face value
	bChanged = False
	for row in arrData:
	sName = row['name']
	if sName not in dctScrips:
	dctScrips[sName] = {}

	# Get the BSE and NSE code if needed
	bChanged = getCodes(row) or bChanged

	# Get the face value if needed
	bChanged = getFaceValue(row) or bChanged

	# print(f'{sName} - {dctScrips[sName]}')


	# Write the scrip dictionary back to file if changed
	if bChanged:
	with open('scrip.json', 'w') as f:
	json.dump(dctScrips, f, indent=4)


	# Iterate over the data and create the output csv

	# A B C D E F G
	arrOut = [[ 'Name', 'Codes', 'Record date', 'Div. %', 'Stock Price', 'Face value', 'Net Earning%']]

	# Data starts from row 2
	nRow = 2

	def getCell(col, row):
	return f'{col}{row}'

	for row in arrData:

	# These are relative to last column
	cellPct = getCell('D', nRow)
	cellPrice = getCell('E', nRow)
	cellFV = getCell('F', nRow)

	# Actual money earned per share is the percent times of face value of share
	# e.g. face value of 10 and 50% dividend means 5 rupees per share
	cellEarningPerShare = f'{cellPct} * {cellFV} / 100'

	# Money earned per rupee invested is (faceValue / price) * earning Per Share
	cellEarningPerRupee = f'({cellFV} / {cellPrice}) * {cellEarningPerShare}'

	# print(row)
	sName = row['name']
	dctScrip = dctScrips.get(sName)

	sGoogleCode = f'NSE:{dctScrip["NSE"]}' if dctScrip.get('NSE') else f'BOM:{dctScrip["BSE"]}'
	cellPriceLookup = f'=GOOGLEFINANCE("{sGoogleCode}", "PRICE")'

	arrOut.append([
	sName,
	dctScrips[sName]['BSE'] + '/' + dctScrips[sName]['NSE'],
	row['recordDate'],
	row['percent'],
	cellPriceLookup,
	dctScrip['FV'],
	f'=ROUND({cellEarningPerRupee}, 2)'
	])
	nRow += 1


	for e in arrOut:
	sRow = '\t'.join(map(str, e))
	print(sRow)