Skip to content

Instantly share code, notes, and snippets.

@primaryobjects
Last active December 10, 2023 03:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save primaryobjects/d79841fb869116dbae00b6ab7c62cd26 to your computer and use it in GitHub Desktop.
Save primaryobjects/d79841fb869116dbae00b6ab7c62cd26 to your computer and use it in GitHub Desktop.
FDIC Bank Failures by Year in Python using Flask, pandas, requests, sqlite3 https://pandas-numpy-practice.primaryobjects.repl.co

FDIC Bank Failures by Year

An example web app in Python using Flask, pandas, requests, sqlite3.

Why

Just practicing Python programming. This app demonstrates the following:

  • Making an HTTP request to download an open-source government CSV dataset.
  • Saving the data to an sqlite db.
  • Querying the data to retrieve table names, column names, and a summary of the data.
  • Plotting a chart of bank failures grouped by year.
  • Rendering a web page endpoint on '/' to display the chart, along with a table of the bank names sorted alphabetically.

Screenshot

cap

<!DOCTYPE html>
<html>
<head>
<title>Bank Failures by Year</title>
<!-- Include Bulma CSS -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.3/css/bulma.min.css">
</head>
<body>
<section class="section">
<div class="container">
<h1 class="title">Bank Failures by Year</h1>
<figure class="image is-4by3">
<img src="{{ url_for('static', filename='bank_failures.png') }}" alt="Plot of bank failures by year">
</figure>
<h2 class="subtitle">Failed Banks in {{data.year}}</h2>
<ul>
{% for bank in data.failed_banks|sort %}
<li>{{ bank }}</li>
{% endfor %}
</ul>
</div>
</section>
</body>
</html>
import sqlite3
from io import StringIO
import matplotlib.pyplot as plt
import pandas as pd
import requests
from flask import Flask, render_template
import base64
import io
app = Flask(__name__)
def create_db(file_name = 'banklist.db'):
# Download dataset.
response = requests.get('https://www.fdic.gov/bank/individual/failed/banklist.csv')
# Verify successful download.
assert response.status_code == 200, 'Failed to download data'
# Get the response content.
data = response.content.decode('latin1')
# Convert the string to a dataframe.
df = pd.read_csv(StringIO(data))
# Save to a database.
conn = sqlite3.connect(file_name)
df.to_sql(name=file_name.replace('.db', ''), con=conn, if_exists='replace')
conn.commit()
return conn
def load_db(file_name = 'banklist.db'):
# Check if the database file banklist.db exists.
conn = sqlite3.connect(file_name)
if not conn:
print('Initializing database.')
conn = create_db()
return conn
def summary(conn):
# Get the names of all tables.
cur = conn.cursor()
cur.execute('SELECT name FROM sqlite_master WHERE type="table"')
tables = cur.fetchall()
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query('SELECT * FROM {}'.format(table_name), conn)
print(f'Table: {table_name}\n')
print(f'Rows: {table.shape[0]}\n')
print('Columns:\n')
for col in table.columns:
print(f'- {col} ({table[col].dtype}) Unique values: {table[col].nunique()}')
print('\nSample:\n')
df = pd.read_sql_query(f'SELECT * FROM {table_name} LIMIT 5', conn)
print(df)
print('\n---\n')
def bank_failures_by_year(conn):
# Read the data.
df = pd.read_sql_query('SELECT * FROM banklist', conn)
# Remove trailing spaces from column names.
df.columns = df.columns.str.strip()
# Summarize the table.
print(df)
# Convert the data string to a datetime.
df['Closing Date'] = pd.to_datetime(df['Closing Date'])
# Create a new column for the year.
df['Year'] = df['Closing Date'].dt.year
# Group by year and count the number of failures.
df_grouped = df.groupby('Year').size()
print(df_grouped)
# Display a chart of the result.
plt.figure(figsize=(10,6))
plt.plot(df_grouped.index, df_grouped.values, marker='o')
plt.xlabel('Year')
plt.ylabel('Number of Bank Failures')
plt.title('Bank Failures by Year')
plt.grid(True)
#plt.show()
plt.savefig('static/bank_failures.png')
# Get the year with the maximum count.
max_year = df_grouped.idxmax()
# Get the names of all banks that failed in this year.
failed_banks = df[df['Year'] == max_year]['Bank Name'].tolist()
return { "failed_banks": failed_banks, "year": max_year }
@app.route('/')
def index():
conn = load_db()
data = bank_failures_by_year(conn)
conn.close()
return render_template('index.html', data = data)
def main():
conn = load_db()
# Print a summary of the tables and data.
summary(conn)
bank_failures_by_year(conn)
conn.close()
app.run(host='0.0.0.0', debug=True)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment