|
import sqlite3 |
|
from io import StringIO |
|
import matplotlib.pyplot as plt |
|
import pandas as pd |
|
import requests |
|
from flask import Flask, render_template |
|
import base64 |
|
import io |
|
|
|
app = Flask(__name__) |
|
|
|
def create_db(file_name = 'banklist.db'): |
|
# Download dataset. |
|
response = requests.get('https://www.fdic.gov/bank/individual/failed/banklist.csv') |
|
|
|
# Verify successful download. |
|
assert response.status_code == 200, 'Failed to download data' |
|
|
|
# Get the response content. |
|
data = response.content.decode('latin1') |
|
|
|
# Convert the string to a dataframe. |
|
df = pd.read_csv(StringIO(data)) |
|
|
|
# Save to a database. |
|
conn = sqlite3.connect(file_name) |
|
|
|
df.to_sql(name=file_name.replace('.db', ''), con=conn, if_exists='replace') |
|
|
|
conn.commit() |
|
return conn |
|
|
|
def load_db(file_name = 'banklist.db'): |
|
# Check if the database file banklist.db exists. |
|
conn = sqlite3.connect(file_name) |
|
if not conn: |
|
print('Initializing database.') |
|
conn = create_db() |
|
|
|
return conn |
|
|
|
def summary(conn): |
|
# Get the names of all tables. |
|
cur = conn.cursor() |
|
|
|
cur.execute('SELECT name FROM sqlite_master WHERE type="table"') |
|
tables = cur.fetchall() |
|
|
|
for table_name in tables: |
|
table_name = table_name[0] |
|
table = pd.read_sql_query('SELECT * FROM {}'.format(table_name), conn) |
|
|
|
print(f'Table: {table_name}\n') |
|
print(f'Rows: {table.shape[0]}\n') |
|
print('Columns:\n') |
|
for col in table.columns: |
|
print(f'- {col} ({table[col].dtype}) Unique values: {table[col].nunique()}') |
|
|
|
print('\nSample:\n') |
|
df = pd.read_sql_query(f'SELECT * FROM {table_name} LIMIT 5', conn) |
|
print(df) |
|
|
|
print('\n---\n') |
|
|
|
def bank_failures_by_year(conn): |
|
# Read the data. |
|
df = pd.read_sql_query('SELECT * FROM banklist', conn) |
|
|
|
# Remove trailing spaces from column names. |
|
df.columns = df.columns.str.strip() |
|
|
|
# Summarize the table. |
|
print(df) |
|
|
|
# Convert the data string to a datetime. |
|
df['Closing Date'] = pd.to_datetime(df['Closing Date']) |
|
|
|
# Create a new column for the year. |
|
df['Year'] = df['Closing Date'].dt.year |
|
|
|
# Group by year and count the number of failures. |
|
df_grouped = df.groupby('Year').size() |
|
|
|
print(df_grouped) |
|
|
|
# Display a chart of the result. |
|
plt.figure(figsize=(10,6)) |
|
plt.plot(df_grouped.index, df_grouped.values, marker='o') |
|
plt.xlabel('Year') |
|
plt.ylabel('Number of Bank Failures') |
|
plt.title('Bank Failures by Year') |
|
plt.grid(True) |
|
|
|
#plt.show() |
|
|
|
plt.savefig('static/bank_failures.png') |
|
|
|
# Get the year with the maximum count. |
|
max_year = df_grouped.idxmax() |
|
|
|
# Get the names of all banks that failed in this year. |
|
failed_banks = df[df['Year'] == max_year]['Bank Name'].tolist() |
|
|
|
return { "failed_banks": failed_banks, "year": max_year } |
|
|
|
@app.route('/') |
|
def index(): |
|
conn = load_db() |
|
data = bank_failures_by_year(conn) |
|
conn.close() |
|
|
|
return render_template('index.html', data = data) |
|
|
|
def main(): |
|
conn = load_db() |
|
|
|
# Print a summary of the tables and data. |
|
summary(conn) |
|
|
|
bank_failures_by_year(conn) |
|
|
|
conn.close() |
|
|
|
app.run(host='0.0.0.0', debug=True) |
|
|
|
main() |
|
|