Skip to content

Instantly share code, notes, and snippets.

@bakineggs
Created January 3, 2012 20:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bakineggs/1556651 to your computer and use it in GitHub Desktop.
Save bakineggs/1556651 to your computer and use it in GitHub Desktop.
figures out credit worthiness of borrowers
require 'fastercsv'
require 'mrscake'
require 'open-uri'
def download_loans
open 'https://www.lendingclub.com/info/download.action?file=LoanStats.csv' do |source|
open 'LoanStats.csv', 'w' do |destination|
destination.write source.read
end
end
'LoanStats.csv'
end
def numeric_grade grade
('G'[0] - grade[0]) * 5 + (grade[1] - '1'[0])
end
IGNORED_STATUSES = [:Issued, :"In Review", :"Partially Funded"]
GOOD_STATUSES = [:"Fully Paid", :Current]
OK_STATUSES = [:"In Grace Period", :"Performing Payment Plan"]
BAD_STATUSES = [:"Late (16-30 days)", :"Late (31-120 days)"]
VERY_BAD_STATUSES = [:"Charged Off", :Default]
PREDICTION_ATTRIBUTES = [
:requested,
:rate,
:length,
:credit_grade,
:purpose,
:payment,
:dti_ratio,
:home_ownership,
:income,
:fico_lower_bound,
:earliest_credit_line,
:open_credit_lines,
:total_credit_lines,
:credit_balance,
:credit_utilization,
:inquiries,
:now_delinquent,
:delinquent_amount,
:total_delinquencies,
:last_delinquency,
:public_records,
:last_public_record,
:employment_years,
]
columns = {}
data = MrsCake::DataSet.new
FasterCSV.foreach ARGV[0] || download_loans do |row|
next if row.length <= 2 # skip the headers
if columns.empty?
row.each_with_index { |col, i| columns[col] = i }
next
end
get = lambda do |column|
if index = columns[column]
row[index]
else
raise "Couldn't find column: #{column}"
end
end
loan = {
:requested => get.call('Amount Requested').to_i,
:funded => get.call('Amount Funded By Investors').to_i,
:rate => get.call('Interest Rate').to_f,
:length => get.call('Loan Length').to_i,
:credit_grade => numeric_grade(get.call('CREDIT Grade')),
:purpose => get.call('Loan Purpose').to_sym,
:payment => get.call('Monthly PAYMENT').to_f,
:status => get.call('Status').sub('Does not meet the current credit policy Status: ', '').to_sym,
:dti_ratio => get.call('Debt-To-Income Ratio').to_f,
:outstanding_principle => get.call('Remaining Principal Funded by Investors').to_f,
:payments_made => get.call('Payments To Date (Funded by investors)').to_f,
:home_ownership => get.call('Home Ownership').to_sym,
:income => get.call('Monthly Income').to_f,
:fico_lower_bound => get.call('FICO Range').to_i,
:earliest_credit_line => get.call('Earliest CREDIT Line').to_i,
:open_credit_lines => get.call('Open CREDIT Lines').to_i,
:total_credit_lines => get.call('Total CREDIT Lines').to_i,
:credit_balance => get.call('Revolving CREDIT Balance').to_f,
:credit_utilization => get.call('Revolving Line Utilization').to_f,
:inquiries => get.call('Inquiries in the Last 6 Months').to_i,
:now_delinquent => get.call('Accounts Now Delinquent').to_f,
:delinquent_amount => get.call('Delinquent Amount').to_f,
:total_delinquencies => get.call('Delinquencies (Last 2 yrs)').to_i,
:last_delinquency => get.call('Months Since Last Delinquency').to_i,
:public_records => get.call('Public Records On File').to_i,
:last_public_record => get.call('Months Since Last Record').to_i,
:employment_years => get.call('Employment Length').to_i,
}
if IGNORED_STATUSES.include? loan[:status]
next
elsif GOOD_STATUSES.include? loan[:status]
loan_rating = :good
elsif OK_STATUSES.include? loan[:status]
loan_rating = :ok
elsif BAD_STATUSES.include? loan[:status]
loan_rating = :bad
elsif VERY_BAD_STATUSES.include? loan[:status]
loan_rating = :very_bad
else
raise "Unknown loan status: #{loan[:status]}"
end
data.add PREDICTION_ATTRIBUTES.map {|attr| loan[attr]}, loan_rating
end
code = data.train.generate_code 'ruby'
PREDICTION_ATTRIBUTES.each_with_index do |attr, i|
code.gsub! "data[#{i}]", attr.to_s
end
puts code
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment