Skip to content

Instantly share code, notes, and snippets.

@arora-nikhil
Forked from foulegg/README.md
Last active December 1, 2022 11:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save arora-nikhil/8c5e64254090cf19816a5566803d9b57 to your computer and use it in GitHub Desktop.
Save arora-nikhil/8c5e64254090cf19816a5566803d9b57 to your computer and use it in GitHub Desktop.
Generate a ClearTax capital gains report using the capital gains statement from Kuvera (depends on beautifulsoup4 and openpyxl)

Installing dependencies

This script requires two Python dependencies: beautifulsoup4 and openpyxl. Install them first using pip:

$ pip install beautifulsoup4
$ pip install openpyxl

You might have to use sudo if installing globally.

Running the script

The script takes as input two excel sheets:

  1. The capital gains statement from Kuvera (let's call it gains.xlsx)
  2. The Excel template from ClearTax (say, template.xlsx)

Note that the Excel file from Kuvera is an .xls. You need save the Kuvera file as .xlsx using MS Excel and use it as input into this script.

For generating the capital gains report for ClearTax, we run the script as follows:

$ python cleartax_capital_gains_report.py gains.xlsx template.xlsx output.xlsx

This will write a file called output.xlsx to the current directory. You can then verify if the information is correct, and then directly upload it to your ITR on ClearTax.

Known issues

import re
import sys
import openpyxl
from datetime import datetime
from bs4 import BeautifulSoup
from openpyxl import load_workbook
from openpyxl.styles.numbers import FORMAT_DATE_DDMMYY, FORMAT_NUMBER_00
class Transaction:
def __init__(self, fund_name, fund_type, isin, folio, txn_data):
self.fund_name = fund_name
self.fund_type = fund_type
self.isin = isin
self.folio = folio
serial_no, units, purchase_date, purchase_value, purchase_nav, acquisition_value, jan31_value, jan31_nav, redemption_date, redemption_value, redemption_nav, stcg, ltcg = txn_data
self.serial_no = int(serial_no)
self.units = float(units)
self.purchase_date = datetime.strptime(purchase_date, '%b %d, %Y').strftime('%d/%m/%Y')
self.purchase_value = float(purchase_value)
self.purchase_nav = float(purchase_nav)
self.acquisition_value = float(acquisition_value)
try:
self.jan31_value = float(jan31_value)
except:
self.jan31_value = None
self.redemption_date = datetime.strptime(redemption_date, '%b %d, %Y').strftime('%d/%m/%Y')
self.redemption_value = float(redemption_value)
try:
self.stcg = float(stcg)
except:
self.stcg = None
try:
self.ltcg = float(ltcg)
except:
self.ltcg = None
def get_data(self):
return (
self.fund_type,
self.isin,
self.fund_name,
self.units,
self.purchase_date,
self.purchase_value,
self.redemption_date,
self.redemption_value / self.units if self.redemption_value else None,
self.jan31_value / self.units if self.jan31_value else None,
0.0, # Set "Transfer expenses (stamp duty, brokerage, etc.) column to 0
self.purchase_nav,
self.acquisition_value
)
# Reads all transactions from the capital gains report from Kuvera and returns the parsed data
# in a tuple of (transactions, total_stcg, total_ltcg)
def read_transactions(capital_gains_xls_file):
fund_name_pattern = re.compile('(.*)\[ISIN')
isin_pattern = re.compile('\[ISIN: (.*)\]')
folio_pattern = re.compile('Folio No: (.*)')
# Open the workbook and the sheet in the ClearTax Capital Gains Excel
wb = load_workbook(capital_gains_xls_file)
cg_sheet = wb.worksheets[0]
# Initialize all variables
current_fund_name = None
current_fund_type = None
current_isin = None
current_folio = None
equity_subtotal = None
debt_subtotal = None
total_stcg = None
total_ltcg = None
all_transactions = []
fund_types = {
'Equity': 'MF (Equity)',
'Hybrid': 'MF (Equity)', # Hybrid funds are also Equity funds from a Capital Gains perspective
'Others': 'MF (Equity)', # Index funds are marked "Others" in Kuvera's report, for some reason
'Debt': 'MF (Other than Equity)'
}
for row in cg_sheet.iter_rows():
if isinstance(row[0].value, str):
# This row contains either Fund Name & ISIN or Folio Number or the (Sub)Total
column_data = row[0].value
isin_match = isin_pattern.search(column_data)
folio_match = folio_pattern.search(column_data)
fund_name_match = fund_name_pattern.search(column_data)
if isin_match:
# This row contains the ISIN
current_isin = isin_match.group(1)
# Extract fund name
fund_name_match = fund_name_pattern.search(column_data)
current_fund_name = fund_name_match.group(1)
elif folio_match:
# This row contains the Folio Number
current_folio = folio_match.group(1)
elif row[0].value == 'Total':
# This row contains the Total Capital Gains
total_stcg = float(row[11].value)
total_ltcg = float(row[12].value)
for fund_type in fund_types:
# This assigns the fund type
if fund_type in column_data:
current_fund_type = fund_types[fund_type]
break
elif isinstance(row[0].value, int):
# This row contains a transaction
txn_data = [cell.value for cell in row]
transaction = Transaction(current_fund_name, current_fund_type, current_isin, current_folio, txn_data)
all_transactions.append(transaction)
return (all_transactions, total_stcg, total_ltcg)
def write_capital_gains_report(all_transactions, output_xlsx_file, cleartax_template_xlsx_file):
wb = load_workbook(cleartax_template_xlsx_file)
# We only populate data in the Mutual Fund Sheet, which is the second one in the template
mf_sheet = wb.worksheets[2]
# Columns 9, 11 and 13 are computed from the other columns
columns_to_update = [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 15]
column_types = [
'string',
'string',
'string',
'float',
'date',
'float',
'date',
'float',
'float',
'string',
'float',
'float'
]
for i in range(len(all_transactions)):
txn_data = all_transactions[i].get_data()
for (val, j, col_type) in zip(txn_data, columns_to_update, column_types):
cell = mf_sheet.cell(row=i+2, column=j)
if val is not None:
cell.value = val
if col_type == 'float':
cell.number_format = FORMAT_NUMBER_00
wb.save(output_xlsx_file)
def prepare(capital_gains_xls_file, cleartax_template_xlsx_file, output_xlsx_file):
all_transactions, total_stcg, total_ltcg = read_transactions(capital_gains_xls_file)
stcg_sum = round(sum([txn.stcg for txn in all_transactions if txn.stcg is not None]), 2)
ltcg_sum = round(sum([txn.ltcg for txn in all_transactions if txn.ltcg is not None]), 2)
print('Sum of all STCG across all transactions:', stcg_sum)
print('Total STCG from report:', total_stcg)
print('Sum of all LTCG across all transactions:', ltcg_sum)
print('Total LTCG from report:', total_ltcg)
write_capital_gains_report(all_transactions, output_xlsx_file, cleartax_template_xlsx_file)
if __name__ == '__main__':
prepare(sys.argv[1], sys.argv[2], sys.argv[3])
@zxcv32
Copy link

zxcv32 commented Jul 9, 2022

Hey @arora-nikhil , it looks like clear tax has updated their template, which breaks this script.

New template file name: Cleartax-Capital-Gain-Template-V5.xlsx

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment