rohitsuratekar/BankAnalysis.py

## BankAnalysis.py
"""
Rohit Suratekar
Data file is downloaded from https://github.com/razorpay/ifsc/releases
Data Compilation is done by RazorPay
File has following columns
BANK,IFSC,BRANCH,ADDRESS,CONTACT,CITY,DISTRICT,STATE,RTGS
"""
import csv
from collections import Counter, defaultdict

import matplotlib.pylab as plt
import numpy as np


class Bank:
    """इ    Simple class to handle data
    """
    def __init__(self, data):
        self.data = data
        if type(data) is list:
            ds = data
        else:
            ds = data.split(",")
        self.name = ds[0]
        self.ifsc = ds[1]
        self.branch = ds[2]
        self.address = ds[3]
        self.contact = ds[4]
        self.city = ds[5]
        self.district = ds[6]
        self.state = ds[7]
        try:
            self.rtgs = ds[8]
        except IndexError:
            self.rtgs = False
def get_data():
    all_banks = []
    with open("IFSC.csv") as f:
        bank_data = csv.reader(f)
        for row in bank_data:
            all_banks.append(Bank(row))

    return all_banks


def plot_branch_wise():
    "Plot Branch Wise Data"
    ban_counter = Counter()
    for a in get_data():
        ban_counter.update({a.name})

    bank_names = []
    bank_values = []

    for a in ban_counter.most_common(10):
        bank_names.append(a[0].lower())
        bank_values.append(a[1])

    print(sum(bank_values))
    figure = plt.figure()
    ax = figure.add_subplot(111)
    ind = np.arange(len(bank_names))
    ax.barh(ind, bank_values, color="#57d785")
    ax.set_yticks(ind)
    ax.set_xlabel("Number of branches")
    ax.set_title("Top 10 banks with most number of branches in India\n(as of "
                 "7 July 2018)")
    ax.set_yticklabels(bank_names)
    plt.show()


def plot_city_wise():
    ban_counter = Counter()
    for a in get_data():
        ban_counter.update({a.city})

    bank_names = []
    bank_values = []

    for a in ban_counter.most_common(10):
        bank_names.append(a[0].lower())
        bank_values.append(a[1])

    print(sum(bank_values))
    figure = plt.figure()
    ax = figure.add_subplot(111)
    ind = np.arange(len(bank_names))
    ax.barh(ind, bank_values)
    ax.set_yticks(ind)
    ax.set_xlabel("Number of branches")
    ax.set_title("Top 10 cities with most number of bank branches India\n("
                 "as of 7 July 2018)")
    ax.set_yticklabels(bank_names)
    plt.show()


def plot_state_wise():
    ban_counter = Counter()
    for a in get_data():
        # Labelled data was not perfect. I found few bugs and I corrected
        # However this is very minor fraction and can be ignored
        if a.state.strip() == "GREATER MUMBAI" or a.state.strip() == "MH":
            ban_counter.update({"MAHARASHTRA"})
        elif a.state.strip() == "GUJRAT":
            ban_counter.update({"GUJARAT"})
        elif a.state.strip() == "KARANATAKA":
            ban_counter.update({"KARNATAKA"})
        elif a.state.strip() == "NEW DELHI":
            ban_counter.update({"DELHI"})
        elif a.state.strip() == "MADHYA  PRADESH":
            ban_counter.update({"MADHYA PRADESH"})
        elif a.state.strip() == "RJ":
            ban_counter.update({"RAJASTHAN"})
        elif a.state.strip() == "TN":
            ban_counter.update({"TAMIL NADU"})
        elif a.state.strip() == "CG":
            ban_counter.update({"CHHATTISGARH"})
        else:
            ban_counter.update({a.state.strip()})

    bank_names = []
    bank_values = []

    for a in ban_counter.most_common(10):
        bank_names.append(a[0].lower())
        bank_values.append(a[1])

    print(ban_counter)

    figure = plt.figure()
    ax = figure.add_subplot(111)
    ind = np.arange(len(bank_names))
    ax.barh(ind, bank_values, color="#e3bc13")
    ax.set_yticks(ind)
    ax.set_xlabel("Number of branches")
    ax.set_title("Top 10 states with most number of bank branches India\n("
                 "as of 7 July 2018)")
    ax.set_yticklabels(bank_names)
    plt.show()


def get_population():
    all_states = {}
    with open("population.csv") as f:
        bank_data = csv.reader(f)
        for row in bank_data:
            num = row[1].replace(",", "")
            try:
                all_states[row[0]] = int(num)
            except ValueError:
                pass
    return all_states


def plot_population_normalized():
    population = get_population()
    ban_counter = Counter()
    for a in get_data():
        # Labelled data was not perfect. I found few bugs and I corrected
        # However this is very minor fraction and can be ignored
        if a.state.strip() == "GREATER MUMBAI" or a.state.strip() == "MH":
            ban_counter.update({"MAHARASHTRA"})
        elif a.state.strip() == "GUJRAT":
            ban_counter.update({"GUJARAT"})
        elif a.state.strip() == "KARANATAKA":
            ban_counter.update({"KARNATAKA"})
        elif a.state.strip() == "NEW DELHI":
            ban_counter.update({"DELHI"})
        elif a.state.strip() == "MADHYA  PRADESH":
            ban_counter.update({"MADHYA PRADESH"})
        elif a.state.strip() == "RJ":
            ban_counter.update({"RAJASTHAN"})
        elif a.state.strip() == "TN":
            ban_counter.update({"TAMIL NADU"})
        elif a.state.strip() == "CG":
            ban_counter.update({"CHHATTISGARH"})
        else:
            ban_counter.update({a.state.strip()})

    bank_names = []
    bank_values = []
    print(ban_counter)
    for a in [x for x in ban_counter.keys()]:
        try:
            ban_counter[a] = ban_counter[a] * 100000 / population[a]
        except KeyError:
            del ban_counter[a]

    for a in ban_counter.most_common():
        bank_names.append(a[0].lower())
        bank_values.append(a[1])

    # bank_names.reverse()
    # bank_values.reverse()
    bank_names = bank_names[:10]
    bank_values = bank_values[:10]

    figure = plt.figure()
    ax = figure.add_subplot(111)
    ind = np.arange(len(bank_names))
    ax.barh(ind, bank_values, color="#e3bc13")
    ax.set_yticks(ind)
    ax.set_xlabel("Number of bank branches/Population (x $10^{-5}$)")
    ax.set_title("Top 10 states with highest branch to citizen ratio\n("
                 "as of 7 July 2018)")
    ax.set_yticklabels(bank_names)
    plt.show()


def plot_complex():
    """
    Plots top cities along with Top banks
    """
    data = get_data()
    bank_counter = Counter()
    city_counter = Counter()
    for a in data:
        bank_counter.update({a.name})
        city_counter.update({a.city})

    top_banks = []

    for a in bank_counter.most_common(10):
        top_banks.append(a[0])

    city_stats = defaultdict(Counter)
    for c in city_counter.most_common(10):
        for d in data:
            if d.city == c[0]:
                if d.name in top_banks:
                    city_stats[c[0]].update({d.name})
                else:
                    city_stats[c[0]].update({"other"})

    city_names = []
    for c in city_stats:
        city_names.append(c)

    top_banks.append("other")
    data_array = []
    for c in city_names:
        data = []
        for b in top_banks:
            data.append(city_stats[c][b])
        data_array.append(data)

    data_array = np.asanyarray(data_array)
    base = data_array[:, top_banks.index(top_banks[0])] * 0
    figure = plt.figure()
    ax = figure.add_subplot(111)
    ax.set_prop_cycle('color',
                      plt.cm.Spectral(np.linspace(0, 1, len(top_banks))))

    ind = np.arange(len(city_names))
    for t in top_banks:
        current_bank = data_array[:, top_banks.index(t)]
        ax.barh(ind, current_bank, left=base, label=t.lower())
        base += current_bank

    ax.set_yticks(ind)

    ax.set_xlabel("Number of branches")
    ax.set_ylabel("City")
    ax.set_title("Top 10 cities with most number of branches in India")
    ax.set_yticklabels([x.lower() for x in city_names])
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title="Top Banks "
                                                                 "of India\n("
                                                                 "branch "
                                                                 "wise)\n")
    plt.show()


if __name__ == "__main__":
    plot_state_wise()
	"""
	Rohit Suratekar
	Data file is downloaded from https://github.com/razorpay/ifsc/releases
	Data Compilation is done by RazorPay
	File has following columns
	BANK,IFSC,BRANCH,ADDRESS,CONTACT,CITY,DISTRICT,STATE,RTGS
	"""
	import csv
	from collections import Counter, defaultdict

	import matplotlib.pylab as plt
	import numpy as np


	class Bank:
	"""इ Simple class to handle data
	"""
	def __init__(self, data):
	self.data = data
	if type(data) is list:
	ds = data
	else:
	ds = data.split(",")
	self.name = ds[0]
	self.ifsc = ds[1]
	self.branch = ds[2]
	self.address = ds[3]
	self.contact = ds[4]
	self.city = ds[5]
	self.district = ds[6]
	self.state = ds[7]
	try:
	self.rtgs = ds[8]
	except IndexError:
	self.rtgs = False
	def get_data():
	all_banks = []
	with open("IFSC.csv") as f:
	bank_data = csv.reader(f)
	for row in bank_data:
	all_banks.append(Bank(row))

	return all_banks


	def plot_branch_wise():
	"Plot Branch Wise Data"
	ban_counter = Counter()
	for a in get_data():
	ban_counter.update({a.name})

	bank_names = []
	bank_values = []

	for a in ban_counter.most_common(10):
	bank_names.append(a[0].lower())
	bank_values.append(a[1])

	print(sum(bank_values))
	figure = plt.figure()
	ax = figure.add_subplot(111)
	ind = np.arange(len(bank_names))
	ax.barh(ind, bank_values, color="#57d785")
	ax.set_yticks(ind)
	ax.set_xlabel("Number of branches")
	ax.set_title("Top 10 banks with most number of branches in India\n(as of "
	"7 July 2018)")
	ax.set_yticklabels(bank_names)
	plt.show()


	def plot_city_wise():
	ban_counter = Counter()
	for a in get_data():
	ban_counter.update({a.city})

	bank_names = []
	bank_values = []

	for a in ban_counter.most_common(10):
	bank_names.append(a[0].lower())
	bank_values.append(a[1])

	print(sum(bank_values))
	figure = plt.figure()
	ax = figure.add_subplot(111)
	ind = np.arange(len(bank_names))
	ax.barh(ind, bank_values)
	ax.set_yticks(ind)
	ax.set_xlabel("Number of branches")
	ax.set_title("Top 10 cities with most number of bank branches India\n("
	"as of 7 July 2018)")
	ax.set_yticklabels(bank_names)
	plt.show()


	def plot_state_wise():
	ban_counter = Counter()
	for a in get_data():
	# Labelled data was not perfect. I found few bugs and I corrected
	# However this is very minor fraction and can be ignored
	if a.state.strip() == "GREATER MUMBAI" or a.state.strip() == "MH":
	ban_counter.update({"MAHARASHTRA"})
	elif a.state.strip() == "GUJRAT":
	ban_counter.update({"GUJARAT"})
	elif a.state.strip() == "KARANATAKA":
	ban_counter.update({"KARNATAKA"})
	elif a.state.strip() == "NEW DELHI":
	ban_counter.update({"DELHI"})
	elif a.state.strip() == "MADHYA PRADESH":
	ban_counter.update({"MADHYA PRADESH"})
	elif a.state.strip() == "RJ":
	ban_counter.update({"RAJASTHAN"})
	elif a.state.strip() == "TN":
	ban_counter.update({"TAMIL NADU"})
	elif a.state.strip() == "CG":
	ban_counter.update({"CHHATTISGARH"})
	else:
	ban_counter.update({a.state.strip()})

	bank_names = []
	bank_values = []

	for a in ban_counter.most_common(10):
	bank_names.append(a[0].lower())
	bank_values.append(a[1])

	print(ban_counter)

	figure = plt.figure()
	ax = figure.add_subplot(111)
	ind = np.arange(len(bank_names))
	ax.barh(ind, bank_values, color="#e3bc13")
	ax.set_yticks(ind)
	ax.set_xlabel("Number of branches")
	ax.set_title("Top 10 states with most number of bank branches India\n("
	"as of 7 July 2018)")
	ax.set_yticklabels(bank_names)
	plt.show()


	def get_population():
	all_states = {}
	with open("population.csv") as f:
	bank_data = csv.reader(f)
	for row in bank_data:
	num = row[1].replace(",", "")
	try:
	all_states[row[0]] = int(num)
	except ValueError:
	pass
	return all_states


	def plot_population_normalized():
	population = get_population()
	ban_counter = Counter()
	for a in get_data():
	# Labelled data was not perfect. I found few bugs and I corrected
	# However this is very minor fraction and can be ignored
	if a.state.strip() == "GREATER MUMBAI" or a.state.strip() == "MH":
	ban_counter.update({"MAHARASHTRA"})
	elif a.state.strip() == "GUJRAT":
	ban_counter.update({"GUJARAT"})
	elif a.state.strip() == "KARANATAKA":
	ban_counter.update({"KARNATAKA"})
	elif a.state.strip() == "NEW DELHI":
	ban_counter.update({"DELHI"})
	elif a.state.strip() == "MADHYA PRADESH":
	ban_counter.update({"MADHYA PRADESH"})
	elif a.state.strip() == "RJ":
	ban_counter.update({"RAJASTHAN"})
	elif a.state.strip() == "TN":
	ban_counter.update({"TAMIL NADU"})
	elif a.state.strip() == "CG":
	ban_counter.update({"CHHATTISGARH"})
	else:
	ban_counter.update({a.state.strip()})

	bank_names = []
	bank_values = []
	print(ban_counter)
	for a in [x for x in ban_counter.keys()]:
	try:
	ban_counter[a] = ban_counter[a] * 100000 / population[a]
	except KeyError:
	del ban_counter[a]

	for a in ban_counter.most_common():
	bank_names.append(a[0].lower())
	bank_values.append(a[1])

	# bank_names.reverse()
	# bank_values.reverse()
	bank_names = bank_names[:10]
	bank_values = bank_values[:10]

	figure = plt.figure()
	ax = figure.add_subplot(111)
	ind = np.arange(len(bank_names))
	ax.barh(ind, bank_values, color="#e3bc13")
	ax.set_yticks(ind)
	ax.set_xlabel("Number of bank branches/Population (x $10^{-5}$)")
	ax.set_title("Top 10 states with highest branch to citizen ratio\n("
	"as of 7 July 2018)")
	ax.set_yticklabels(bank_names)
	plt.show()


	def plot_complex():
	"""
	Plots top cities along with Top banks
	"""
	data = get_data()
	bank_counter = Counter()
	city_counter = Counter()
	for a in data:
	bank_counter.update({a.name})
	city_counter.update({a.city})

	top_banks = []

	for a in bank_counter.most_common(10):
	top_banks.append(a[0])

	city_stats = defaultdict(Counter)
	for c in city_counter.most_common(10):
	for d in data:
	if d.city == c[0]:
	if d.name in top_banks:
	city_stats[c[0]].update({d.name})
	else:
	city_stats[c[0]].update({"other"})

	city_names = []
	for c in city_stats:
	city_names.append(c)

	top_banks.append("other")
	data_array = []
	for c in city_names:
	data = []
	for b in top_banks:
	data.append(city_stats[c][b])
	data_array.append(data)

	data_array = np.asanyarray(data_array)
	base = data_array[:, top_banks.index(top_banks[0])] * 0
	figure = plt.figure()
	ax = figure.add_subplot(111)
	ax.set_prop_cycle('color',
	plt.cm.Spectral(np.linspace(0, 1, len(top_banks))))

	ind = np.arange(len(city_names))
	for t in top_banks:
	current_bank = data_array[:, top_banks.index(t)]
	ax.barh(ind, current_bank, left=base, label=t.lower())
	base += current_bank

	ax.set_yticks(ind)

	ax.set_xlabel("Number of branches")
	ax.set_ylabel("City")
	ax.set_title("Top 10 cities with most number of branches in India")
	ax.set_yticklabels([x.lower() for x in city_names])
	plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title="Top Banks "
	"of India\n("
	"branch "
	"wise)\n")
	plt.show()


	if __name__ == "__main__":
	plot_state_wise()