Skip to content

Instantly share code, notes, and snippets.

@joeleonjr
Created December 3, 2016 22:05
Show Gist options
  • Save joeleonjr/c2a9108214111f142f4ad0ee9c7fe2e4 to your computer and use it in GitHub Desktop.
Save joeleonjr/c2a9108214111f142f4ad0ee9c7fe2e4 to your computer and use it in GitHub Desktop.
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
loansData = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')
loansData.dropna(inplace=True)
print(loansData.describe())
#generate boxplot for Amount Funded
plt.figure(1)
loansData.boxplot(column='Amount.Funded.By.Investors')
plt.show()
#75% less than $16,000. most between $6,000 and $16,000
#generate histogram for Amount Funded
plt.figure(2)
loansData.hist(column='Amount.Funded.By.Investors')
plt.show()
#$7500-$12500 was the most frequently loanded amount, closely followed by $2500-$7500. falls off at $20,000
#generate probability plot for Amount Funded
plt.figure(3)
prob_plot_funded = stats.probplot(loansData['Amount.Funded.By.Investors'], dist='norm', plot=plt)
plt.show()
#not normally distributed
#generate boxplot for Amount Requested
plt.figure(4)
loansData.boxplot(column='Amount.Requested')
plt.show()
#75% less than $17,000. most between $6,000 and $17,000
#generate histogram for Amount Requested
plt.figure(5)
loansData.hist(column='Amount.Requested')
plt.show()
#most requested was $4,000-$7,500, followed by $7,500-$12,000. Interestingly, many only needed $2,000-$4,000
#generate probability plot for Amount Requested
plt.figure(6)
prob_plot_funded = stats.probplot(loansData['Amount.Requested'], dist='norm', plot=plt)
plt.show()
#not normal distribution plot
##Overall Amount Funded and Amount Requested were quite similar.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment