Skip to content

Instantly share code, notes, and snippets.

@rep-movsd
Last active November 6, 2023 09:06
Show Gist options
  • Save rep-movsd/26b5308c71a75705097bba94beeb0699 to your computer and use it in GitHub Desktop.
Save rep-movsd/26b5308c71a75705097bba94beeb0699 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
'''
This script parses the HTML file from https://www.moneycontrol.com/stocks/marketinfo/dividends_declared/index.php and
creates a CSV file with relevant data
Sheet will contain all upcoming dividends after the current date, and those whose percentage is declared
It calculates the net percentage earning per rupee invested if you buy the stock at current price and hold it till the record date
To run this script, you need to first create:
1. A file called scrip.json with empty dictionary "{}"
2. A folder called cache
3. Save the HTML output from the above URL into a file called input.html (use save as from browser or use curl)
The CSV it creates has formulae in it that need Google finance api to do the required calculations.
You must import this CSV into Google Sheets for it to work
Each HTML page the script fetches for individual stocks is saved in the cache folder to avoid repeated fetches
'''
import os.path
import sys
import json
import requests
import re
from datetime import datetime
from bs4 import BeautifulSoup
# Scrip file
with open('scrip.json') as f:
dctScrips = json.load(f)
# Creates the basic data array with rows containing name, percent and record date
def parseInputHTML(sFile):
# Open the input file
with open(sFile) as f:
sHTML = f.read()
# Find the table element with class "dvdtbl"
soup = BeautifulSoup(sHTML, 'html.parser')
table = soup.find('table', class_='dvdtbl')
# Initialize an empty list to store table data
arrData = []
# Create the basic data
for row in table.find_all('tr')[2:]: # Skip the first 2 rows (header)
columns = row.find_all('td')
if columns:
# There are 5 columns in the table
# Company name, Dividend type, Percent,Announcement, Record date, Ex Dividend date
# if the date is in the past, ignore it, else store these into dctData
sName = columns[0].text.strip()
sPercent = columns[2].text.strip()
sRecordDate = columns[4].text.strip()
# Extract the scrips URL from the first column and save it
elemLink = columns[0].find('a')
sURL = 'https://www.moneycontrol.com/india' + elemLink.get('href')
# if the date is in the past, ignore it, else store these into dctData
dtNow = datetime.today()
try:
dtRecord = datetime.strptime(sRecordDate, "%d-%m-%Y")
except ValueError:
continue
if dtRecord >= dtNow and sPercent != '0.00':
arrData.append(
{
'name': sName,
'percent': sPercent,
'recordDate': dtRecord.strftime("%Y-%m-%d"),
'url': sURL,
}
)
return arrData
# Function to get the HTML for a scrip, from cached file if possible
def getScripHTML(sName, sURL):
# Is there a file?
print(f'Loading {sName}', file=sys.stderr)
sFile = f'cache/{sName}.html'
if os.path.exists(sFile):
with open(sFile) as f:
sHTML = f.read()
return sHTML
else:
print(f'Fetching {sName}', file=sys.stderr)
response = requests.get(sURL)
if response.status_code == 200:
with open(sFile, 'w') as f:
f.write(response.text)
return response.text
def getCodes(row):
'''
Gets the NSE and BSE codes for this row and plugs it into the scrip dictionary
'''
dctScrip = dctScrips.get(row['name'])
if not 'BSE' in dctScrip or not 'NSE' in dctScrip:
# Fetch the HTML for the scrip
sHTML = getScripHTML(row['name'], row['url'])
soup = BeautifulSoup(sHTML, 'html.parser')
# Find the element with class "company_slider"
div = soup.find('ul', class_='company_slider')
if div:
elBSE = div.find(string='BSE:')
sBSE = elBSE.parent.nextSibling.nextSibling.text if elBSE else ''
elNSE = div.find(string='NSE:')
sNSE = elNSE.parent.nextSibling.nextSibling.text if elNSE else ''
dctScrip['BSE'] = sBSE
dctScrip['NSE'] = sNSE
return True
return False
def getFaceValue(row):
'''
Gets the face value for this scrip and plugs it into the scrip dictionary
'''
dctScrip = dctScrips.get(row['name'])
if not 'FV' in dctScrip:
sHTML = getScripHTML(row['name'], row['url'])
# Look for the string 'var scid = "*"' in the HTML string
rx = r'var scid = "([A-Z0-9]+)";'
match = re.search(rx, sHTML)
if match:
sSCID = match.group(1)
sExchange = 'nse' if dctScrip.get('NSE') else 'bse'
sURL = f'https://priceapi.moneycontrol.com/pricefeed/{sExchange}/equitycash/{sSCID}'
print(f'Fetch SCID {sSCID} for {row["name"]} - {sURL}', file=sys.stderr)
response = requests.get(sURL, headers={'User-Agent': 'Mozilla/5.0'})
print(response.status_code, file=sys.stderr)
if response.status_code == 200:
dctResponse = response.json()
sFaceValue = dctResponse.get('data', {}).get('FV', '1.00')
if sFaceValue:
dctScrip['FV'] = sFaceValue
return True
return False
arrData = parseInputHTML('input.html')
# Iterate over the data and fetch the BSE, NSE code and face value
bChanged = False
for row in arrData:
sName = row['name']
if sName not in dctScrips:
dctScrips[sName] = {}
# Get the BSE and NSE code if needed
bChanged = getCodes(row) or bChanged
# Get the face value if needed
bChanged = getFaceValue(row) or bChanged
# print(f'{sName} - {dctScrips[sName]}')
# Write the scrip dictionary back to file if changed
if bChanged:
with open('scrip.json', 'w') as f:
json.dump(dctScrips, f, indent=4)
# Iterate over the data and create the output csv
# A B C D E F G
arrOut = [[ 'Name', 'Codes', 'Record date', 'Div. %', 'Stock Price', 'Face value', 'Net Earning%']]
# Data starts from row 2
nRow = 2
def getCell(col, row):
return f'{col}{row}'
for row in arrData:
# These are relative to last column
cellPct = getCell('D', nRow)
cellPrice = getCell('E', nRow)
cellFV = getCell('F', nRow)
# Actual money earned per share is the percent times of face value of share
# e.g. face value of 10 and 50% dividend means 5 rupees per share
cellEarningPerShare = f'{cellPct} * {cellFV} / 100'
# Money earned per rupee invested is (faceValue / price) * earning Per Share
cellEarningPerRupee = f'({cellFV} / {cellPrice}) * {cellEarningPerShare}'
# print(row)
sName = row['name']
dctScrip = dctScrips.get(sName)
sGoogleCode = f'NSE:{dctScrip["NSE"]}' if dctScrip.get('NSE') else f'BOM:{dctScrip["BSE"]}'
cellPriceLookup = f'=GOOGLEFINANCE("{sGoogleCode}", "PRICE")'
arrOut.append([
sName,
dctScrips[sName]['BSE'] + '/' + dctScrips[sName]['NSE'],
row['recordDate'],
row['percent'],
cellPriceLookup,
dctScrip['FV'],
f'=ROUND({cellEarningPerRupee}, 2)'
])
nRow += 1
for e in arrOut:
sRow = '\t'.join(map(str, e))
print(sRow)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment