Last active
July 16, 2018 14:19
-
-
Save rohitsuratekar/15b3ea7a97e0f6a86aa91bb54dc9fed4 to your computer and use it in GitHub Desktop.
General statastical analysis of Banks in India
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Rohit Suratekar | |
Data file is downloaded from https://github.com/razorpay/ifsc/releases | |
Data Compilation is done by RazorPay | |
File has following columns | |
BANK,IFSC,BRANCH,ADDRESS,CONTACT,CITY,DISTRICT,STATE,RTGS | |
""" | |
import csv | |
from collections import Counter, defaultdict | |
import matplotlib.pylab as plt | |
import numpy as np | |
class Bank: | |
"""इ Simple class to handle data | |
""" | |
def __init__(self, data): | |
self.data = data | |
if type(data) is list: | |
ds = data | |
else: | |
ds = data.split(",") | |
self.name = ds[0] | |
self.ifsc = ds[1] | |
self.branch = ds[2] | |
self.address = ds[3] | |
self.contact = ds[4] | |
self.city = ds[5] | |
self.district = ds[6] | |
self.state = ds[7] | |
try: | |
self.rtgs = ds[8] | |
except IndexError: | |
self.rtgs = False | |
def get_data(): | |
all_banks = [] | |
with open("IFSC.csv") as f: | |
bank_data = csv.reader(f) | |
for row in bank_data: | |
all_banks.append(Bank(row)) | |
return all_banks | |
def plot_branch_wise(): | |
"Plot Branch Wise Data" | |
ban_counter = Counter() | |
for a in get_data(): | |
ban_counter.update({a.name}) | |
bank_names = [] | |
bank_values = [] | |
for a in ban_counter.most_common(10): | |
bank_names.append(a[0].lower()) | |
bank_values.append(a[1]) | |
print(sum(bank_values)) | |
figure = plt.figure() | |
ax = figure.add_subplot(111) | |
ind = np.arange(len(bank_names)) | |
ax.barh(ind, bank_values, color="#57d785") | |
ax.set_yticks(ind) | |
ax.set_xlabel("Number of branches") | |
ax.set_title("Top 10 banks with most number of branches in India\n(as of " | |
"7 July 2018)") | |
ax.set_yticklabels(bank_names) | |
plt.show() | |
def plot_city_wise(): | |
ban_counter = Counter() | |
for a in get_data(): | |
ban_counter.update({a.city}) | |
bank_names = [] | |
bank_values = [] | |
for a in ban_counter.most_common(10): | |
bank_names.append(a[0].lower()) | |
bank_values.append(a[1]) | |
print(sum(bank_values)) | |
figure = plt.figure() | |
ax = figure.add_subplot(111) | |
ind = np.arange(len(bank_names)) | |
ax.barh(ind, bank_values) | |
ax.set_yticks(ind) | |
ax.set_xlabel("Number of branches") | |
ax.set_title("Top 10 cities with most number of bank branches India\n(" | |
"as of 7 July 2018)") | |
ax.set_yticklabels(bank_names) | |
plt.show() | |
def plot_state_wise(): | |
ban_counter = Counter() | |
for a in get_data(): | |
# Labelled data was not perfect. I found few bugs and I corrected | |
# However this is very minor fraction and can be ignored | |
if a.state.strip() == "GREATER MUMBAI" or a.state.strip() == "MH": | |
ban_counter.update({"MAHARASHTRA"}) | |
elif a.state.strip() == "GUJRAT": | |
ban_counter.update({"GUJARAT"}) | |
elif a.state.strip() == "KARANATAKA": | |
ban_counter.update({"KARNATAKA"}) | |
elif a.state.strip() == "NEW DELHI": | |
ban_counter.update({"DELHI"}) | |
elif a.state.strip() == "MADHYA PRADESH": | |
ban_counter.update({"MADHYA PRADESH"}) | |
elif a.state.strip() == "RJ": | |
ban_counter.update({"RAJASTHAN"}) | |
elif a.state.strip() == "TN": | |
ban_counter.update({"TAMIL NADU"}) | |
elif a.state.strip() == "CG": | |
ban_counter.update({"CHHATTISGARH"}) | |
else: | |
ban_counter.update({a.state.strip()}) | |
bank_names = [] | |
bank_values = [] | |
for a in ban_counter.most_common(10): | |
bank_names.append(a[0].lower()) | |
bank_values.append(a[1]) | |
print(ban_counter) | |
figure = plt.figure() | |
ax = figure.add_subplot(111) | |
ind = np.arange(len(bank_names)) | |
ax.barh(ind, bank_values, color="#e3bc13") | |
ax.set_yticks(ind) | |
ax.set_xlabel("Number of branches") | |
ax.set_title("Top 10 states with most number of bank branches India\n(" | |
"as of 7 July 2018)") | |
ax.set_yticklabels(bank_names) | |
plt.show() | |
def get_population(): | |
all_states = {} | |
with open("population.csv") as f: | |
bank_data = csv.reader(f) | |
for row in bank_data: | |
num = row[1].replace(",", "") | |
try: | |
all_states[row[0]] = int(num) | |
except ValueError: | |
pass | |
return all_states | |
def plot_population_normalized(): | |
population = get_population() | |
ban_counter = Counter() | |
for a in get_data(): | |
# Labelled data was not perfect. I found few bugs and I corrected | |
# However this is very minor fraction and can be ignored | |
if a.state.strip() == "GREATER MUMBAI" or a.state.strip() == "MH": | |
ban_counter.update({"MAHARASHTRA"}) | |
elif a.state.strip() == "GUJRAT": | |
ban_counter.update({"GUJARAT"}) | |
elif a.state.strip() == "KARANATAKA": | |
ban_counter.update({"KARNATAKA"}) | |
elif a.state.strip() == "NEW DELHI": | |
ban_counter.update({"DELHI"}) | |
elif a.state.strip() == "MADHYA PRADESH": | |
ban_counter.update({"MADHYA PRADESH"}) | |
elif a.state.strip() == "RJ": | |
ban_counter.update({"RAJASTHAN"}) | |
elif a.state.strip() == "TN": | |
ban_counter.update({"TAMIL NADU"}) | |
elif a.state.strip() == "CG": | |
ban_counter.update({"CHHATTISGARH"}) | |
else: | |
ban_counter.update({a.state.strip()}) | |
bank_names = [] | |
bank_values = [] | |
print(ban_counter) | |
for a in [x for x in ban_counter.keys()]: | |
try: | |
ban_counter[a] = ban_counter[a] * 100000 / population[a] | |
except KeyError: | |
del ban_counter[a] | |
for a in ban_counter.most_common(): | |
bank_names.append(a[0].lower()) | |
bank_values.append(a[1]) | |
# bank_names.reverse() | |
# bank_values.reverse() | |
bank_names = bank_names[:10] | |
bank_values = bank_values[:10] | |
figure = plt.figure() | |
ax = figure.add_subplot(111) | |
ind = np.arange(len(bank_names)) | |
ax.barh(ind, bank_values, color="#e3bc13") | |
ax.set_yticks(ind) | |
ax.set_xlabel("Number of bank branches/Population (x $10^{-5}$)") | |
ax.set_title("Top 10 states with highest branch to citizen ratio\n(" | |
"as of 7 July 2018)") | |
ax.set_yticklabels(bank_names) | |
plt.show() | |
def plot_complex(): | |
""" | |
Plots top cities along with Top banks | |
""" | |
data = get_data() | |
bank_counter = Counter() | |
city_counter = Counter() | |
for a in data: | |
bank_counter.update({a.name}) | |
city_counter.update({a.city}) | |
top_banks = [] | |
for a in bank_counter.most_common(10): | |
top_banks.append(a[0]) | |
city_stats = defaultdict(Counter) | |
for c in city_counter.most_common(10): | |
for d in data: | |
if d.city == c[0]: | |
if d.name in top_banks: | |
city_stats[c[0]].update({d.name}) | |
else: | |
city_stats[c[0]].update({"other"}) | |
city_names = [] | |
for c in city_stats: | |
city_names.append(c) | |
top_banks.append("other") | |
data_array = [] | |
for c in city_names: | |
data = [] | |
for b in top_banks: | |
data.append(city_stats[c][b]) | |
data_array.append(data) | |
data_array = np.asanyarray(data_array) | |
base = data_array[:, top_banks.index(top_banks[0])] * 0 | |
figure = plt.figure() | |
ax = figure.add_subplot(111) | |
ax.set_prop_cycle('color', | |
plt.cm.Spectral(np.linspace(0, 1, len(top_banks)))) | |
ind = np.arange(len(city_names)) | |
for t in top_banks: | |
current_bank = data_array[:, top_banks.index(t)] | |
ax.barh(ind, current_bank, left=base, label=t.lower()) | |
base += current_bank | |
ax.set_yticks(ind) | |
ax.set_xlabel("Number of branches") | |
ax.set_ylabel("City") | |
ax.set_title("Top 10 cities with most number of branches in India") | |
ax.set_yticklabels([x.lower() for x in city_names]) | |
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title="Top Banks " | |
"of India\n(" | |
"branch " | |
"wise)\n") | |
plt.show() | |
if __name__ == "__main__": | |
plot_state_wise() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment