Skip to content

Instantly share code, notes, and snippets.

@jmsword
jmsword / logistic_regression.py
Created January 3, 2017 18:30
Logistic Regression
import pandas as pd
import statsmodels.api as sm
import numpy as np
#read in clean loan data
df = pd.read_csv('loansData_clean.csv')
#create column to highlight interest rates below 12%
df['IR_TF'] = df['Interest.Rate'] > .12
@jmsword
jmsword / logistic_regression.py
Created December 29, 2016 19:27
logistic regression version 2
import pandas as pd
import statsmodels.api as sm
import numpy as np
#read in clean loan data
df = pd.read_csv('loansData_clean.csv')
#create column to highlight interest rates below 12%
df['IR_TF'] = df['Interest.Rate'] > .12
@jmsword
jmsword / logistic_regression.py
Last active December 29, 2016 14:01
Logistic Regression
import pandas as pd
import statsmodels.api as sm
df = pd.read_csv('loansData_clean.csv')
df['IR_TF'] = df['Interest.Rate'] > .12
df['IR_TF'] = df['IR_TF'].map(lambda x: 1 if x == True else 0)
df['Statsmodel.Intercept'] = df['Interest.Rate'].map(lambda x: 1)
@jmsword
jmsword / linear_regression.py
Created December 20, 2016 15:47
Linear Regression
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
loansData = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')
#Remove '%' from 'Interest.Rate' column and contert to number
loansData['Interest.Rate']=loansData['Interest.Rate'].map(lambda x: round(float(x.rstrip('%')) / 100, 4))
#Remove 'months' from the 'Loan.Length' column
@jmsword
jmsword / chi_squared.py
Created December 12, 2016 19:38
Chi Squared Test
from scipy import stats
import collections
import pandas as pd
import matplotlib.pyplot as plt
loansData = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')
loansData.dropna(inplace=True)
freq = collections.Counter(loansData['Open.CREDIT.Lines'])
@jmsword
jmsword / prob_lending_club.py
Created December 11, 2016 18:27
Lending Club Data Project
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
loansData = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')
loansData.dropna(inplace=True)
loansData.boxplot(column='Amount.Requested')
plt.savefig('Amount Requested Boxplot.png')
import numpy as np
import scipy.stats as stats
import collections
import matplotlib.pyplot as plt
x = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 9, 9]
c = collections.Counter(x)
count_sum = sum(c.values())
@jmsword
jmsword / tbay.py
Created November 21, 2016 21:47
Jeff's Tbay project
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, ForeignKey, DateTime
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('postgresql://ubuntu:thinkful@localhost:5432/tbay')
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base()
#modeling the bicycle industry
#classes
class bicycle(object):
#create the bicycle class so that each bike created has a model name, weight, and cost to produce
def __init__(self, model, weight, cost):
self.model = model
self.weight = weight
self.cost = cost
#modeling the bicycle industry
#create the customer class
class customer(object):
def __init__(self, name, fund):
self.name = name
self.fund = fund
#create the bicycle class so that each bike created has a model name, weight, and cost to produce
class bicycle(object):