Skip to content

Instantly share code, notes, and snippets.

@rohitsuratekar
Last active July 16, 2018 14:19
Show Gist options
  • Save rohitsuratekar/15b3ea7a97e0f6a86aa91bb54dc9fed4 to your computer and use it in GitHub Desktop.
Save rohitsuratekar/15b3ea7a97e0f6a86aa91bb54dc9fed4 to your computer and use it in GitHub Desktop.
General statastical analysis of Banks in India
"""
Rohit Suratekar
Data file is downloaded from https://github.com/razorpay/ifsc/releases
Data Compilation is done by RazorPay
File has following columns
BANK,IFSC,BRANCH,ADDRESS,CONTACT,CITY,DISTRICT,STATE,RTGS
"""
import csv
from collections import Counter, defaultdict
import matplotlib.pylab as plt
import numpy as np
class Bank:
"""इ Simple class to handle data
"""
def __init__(self, data):
self.data = data
if type(data) is list:
ds = data
else:
ds = data.split(",")
self.name = ds[0]
self.ifsc = ds[1]
self.branch = ds[2]
self.address = ds[3]
self.contact = ds[4]
self.city = ds[5]
self.district = ds[6]
self.state = ds[7]
try:
self.rtgs = ds[8]
except IndexError:
self.rtgs = False
def get_data():
all_banks = []
with open("IFSC.csv") as f:
bank_data = csv.reader(f)
for row in bank_data:
all_banks.append(Bank(row))
return all_banks
def plot_branch_wise():
"Plot Branch Wise Data"
ban_counter = Counter()
for a in get_data():
ban_counter.update({a.name})
bank_names = []
bank_values = []
for a in ban_counter.most_common(10):
bank_names.append(a[0].lower())
bank_values.append(a[1])
print(sum(bank_values))
figure = plt.figure()
ax = figure.add_subplot(111)
ind = np.arange(len(bank_names))
ax.barh(ind, bank_values, color="#57d785")
ax.set_yticks(ind)
ax.set_xlabel("Number of branches")
ax.set_title("Top 10 banks with most number of branches in India\n(as of "
"7 July 2018)")
ax.set_yticklabels(bank_names)
plt.show()
def plot_city_wise():
ban_counter = Counter()
for a in get_data():
ban_counter.update({a.city})
bank_names = []
bank_values = []
for a in ban_counter.most_common(10):
bank_names.append(a[0].lower())
bank_values.append(a[1])
print(sum(bank_values))
figure = plt.figure()
ax = figure.add_subplot(111)
ind = np.arange(len(bank_names))
ax.barh(ind, bank_values)
ax.set_yticks(ind)
ax.set_xlabel("Number of branches")
ax.set_title("Top 10 cities with most number of bank branches India\n("
"as of 7 July 2018)")
ax.set_yticklabels(bank_names)
plt.show()
def plot_state_wise():
ban_counter = Counter()
for a in get_data():
# Labelled data was not perfect. I found few bugs and I corrected
# However this is very minor fraction and can be ignored
if a.state.strip() == "GREATER MUMBAI" or a.state.strip() == "MH":
ban_counter.update({"MAHARASHTRA"})
elif a.state.strip() == "GUJRAT":
ban_counter.update({"GUJARAT"})
elif a.state.strip() == "KARANATAKA":
ban_counter.update({"KARNATAKA"})
elif a.state.strip() == "NEW DELHI":
ban_counter.update({"DELHI"})
elif a.state.strip() == "MADHYA PRADESH":
ban_counter.update({"MADHYA PRADESH"})
elif a.state.strip() == "RJ":
ban_counter.update({"RAJASTHAN"})
elif a.state.strip() == "TN":
ban_counter.update({"TAMIL NADU"})
elif a.state.strip() == "CG":
ban_counter.update({"CHHATTISGARH"})
else:
ban_counter.update({a.state.strip()})
bank_names = []
bank_values = []
for a in ban_counter.most_common(10):
bank_names.append(a[0].lower())
bank_values.append(a[1])
print(ban_counter)
figure = plt.figure()
ax = figure.add_subplot(111)
ind = np.arange(len(bank_names))
ax.barh(ind, bank_values, color="#e3bc13")
ax.set_yticks(ind)
ax.set_xlabel("Number of branches")
ax.set_title("Top 10 states with most number of bank branches India\n("
"as of 7 July 2018)")
ax.set_yticklabels(bank_names)
plt.show()
def get_population():
all_states = {}
with open("population.csv") as f:
bank_data = csv.reader(f)
for row in bank_data:
num = row[1].replace(",", "")
try:
all_states[row[0]] = int(num)
except ValueError:
pass
return all_states
def plot_population_normalized():
population = get_population()
ban_counter = Counter()
for a in get_data():
# Labelled data was not perfect. I found few bugs and I corrected
# However this is very minor fraction and can be ignored
if a.state.strip() == "GREATER MUMBAI" or a.state.strip() == "MH":
ban_counter.update({"MAHARASHTRA"})
elif a.state.strip() == "GUJRAT":
ban_counter.update({"GUJARAT"})
elif a.state.strip() == "KARANATAKA":
ban_counter.update({"KARNATAKA"})
elif a.state.strip() == "NEW DELHI":
ban_counter.update({"DELHI"})
elif a.state.strip() == "MADHYA PRADESH":
ban_counter.update({"MADHYA PRADESH"})
elif a.state.strip() == "RJ":
ban_counter.update({"RAJASTHAN"})
elif a.state.strip() == "TN":
ban_counter.update({"TAMIL NADU"})
elif a.state.strip() == "CG":
ban_counter.update({"CHHATTISGARH"})
else:
ban_counter.update({a.state.strip()})
bank_names = []
bank_values = []
print(ban_counter)
for a in [x for x in ban_counter.keys()]:
try:
ban_counter[a] = ban_counter[a] * 100000 / population[a]
except KeyError:
del ban_counter[a]
for a in ban_counter.most_common():
bank_names.append(a[0].lower())
bank_values.append(a[1])
# bank_names.reverse()
# bank_values.reverse()
bank_names = bank_names[:10]
bank_values = bank_values[:10]
figure = plt.figure()
ax = figure.add_subplot(111)
ind = np.arange(len(bank_names))
ax.barh(ind, bank_values, color="#e3bc13")
ax.set_yticks(ind)
ax.set_xlabel("Number of bank branches/Population (x $10^{-5}$)")
ax.set_title("Top 10 states with highest branch to citizen ratio\n("
"as of 7 July 2018)")
ax.set_yticklabels(bank_names)
plt.show()
def plot_complex():
"""
Plots top cities along with Top banks
"""
data = get_data()
bank_counter = Counter()
city_counter = Counter()
for a in data:
bank_counter.update({a.name})
city_counter.update({a.city})
top_banks = []
for a in bank_counter.most_common(10):
top_banks.append(a[0])
city_stats = defaultdict(Counter)
for c in city_counter.most_common(10):
for d in data:
if d.city == c[0]:
if d.name in top_banks:
city_stats[c[0]].update({d.name})
else:
city_stats[c[0]].update({"other"})
city_names = []
for c in city_stats:
city_names.append(c)
top_banks.append("other")
data_array = []
for c in city_names:
data = []
for b in top_banks:
data.append(city_stats[c][b])
data_array.append(data)
data_array = np.asanyarray(data_array)
base = data_array[:, top_banks.index(top_banks[0])] * 0
figure = plt.figure()
ax = figure.add_subplot(111)
ax.set_prop_cycle('color',
plt.cm.Spectral(np.linspace(0, 1, len(top_banks))))
ind = np.arange(len(city_names))
for t in top_banks:
current_bank = data_array[:, top_banks.index(t)]
ax.barh(ind, current_bank, left=base, label=t.lower())
base += current_bank
ax.set_yticks(ind)
ax.set_xlabel("Number of branches")
ax.set_ylabel("City")
ax.set_title("Top 10 cities with most number of branches in India")
ax.set_yticklabels([x.lower() for x in city_names])
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title="Top Banks "
"of India\n("
"branch "
"wise)\n")
plt.show()
if __name__ == "__main__":
plot_state_wise()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment