Skip to content

Instantly share code, notes, and snippets.

@jonathanedelman
Created January 18, 2019 23:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonathanedelman/d0f3dc7515299c3e963c68e2f7dc22cd to your computer and use it in GitHub Desktop.
Save jonathanedelman/d0f3dc7515299c3e963c68e2f7dc22cd to your computer and use it in GitHub Desktop.
import calendar
from datetime import datetime
from collections import namedtuple
import re
import sys
import time
import numpy as np
import pandas as pd
from demyst.df.df2 import df2
import os
import pdb
@df2
def data_function(df):
providers = [
"infutor_property_append",
"utilityscore_savings",
"info_connect_company",
"acxiom_place",
"utilityscore_savings",
"housecanary_property_mortgage_lien",
"google_latlon"
]
results = df.connectors.fetch(providers, df.inputs)
model_inputs_dict = get_model_inputs(df)
model_inputs_df_dict = {}
for key, value in model_inputs_dict.items():
model_inputs_df_dict[key] = {"0" : value}
df = pd.DataFrame.from_dict(model_inputs_df_dict)
prediction = run_dataframe(df)
return({"result" : prediction[0]})
def get_model_inputs(df):
model_inputs = {}
model_inputs["infutor_property_append_property[0]_parking_type"] = df.connectors.get("infutor_property_append", "property.0.parking_type")
model_inputs["infutor_property_append_mortgage[0]_loan_code"] = df.connectors.get("infutor_property_append", "mortgage.0.loan_code")
model_inputs["infutor_property_append_property[0]_land_use"] = df.connectors.get("infutor_property_append", "property.0.land_use")
model_inputs["infutor_property_append_address[0]_state"] = df.connectors.get("infutor_property_append", "address.0.state")
model_inputs["infutor_property_append_property[0]_building_code"] = df.connectors.get("infutor_property_append", "property.0.building_code")
model_inputs["infutor_property_append_mortgage[0]_term"] = df.connectors.get("infutor_property_append", "mortgage.0.term")
model_inputs["infutor_property_append_property[0]_full_baths"] = df.connectors.get("infutor_property_append", "property.0.full_baths")
model_inputs["infutor_property_append_property[0]_stories_number"] = df.connectors.get("infutor_property_append", "property.0.stories_number")
model_inputs["infutor_property_append_property[0]_bedrooms"] = df.connectors.get("infutor_property_append", "property.0.bedrooms")
model_inputs["infutor_property_append_property[0]_residential_code"] = df.connectors.get("infutor_property_append", "property.0.residential_code")
model_inputs["infutor_property_append_mortgage[0]_date"] = df.connectors.get("infutor_property_append", "mortgage.0.date")
model_inputs["infutor_property_append_mortgage[0]_loan_to_value"] = df.connectors.get("infutor_property_append", "mortgage.0.loan_to_value")
model_inputs["infutor_property_append_property[0]_heat"] = df.connectors.get("infutor_property_append", "property.0.heat")
model_inputs["infutor_property_append_property[0]_stories_code"] = df.connectors.get("infutor_property_append", "property.0.stories_code")
model_inputs["infutor_property_append_mortgage[0]_deed_code"] = df.connectors.get("infutor_property_append", "mortgage.0.deed_code")
model_inputs["infutor_property_append_property[0]_baths_calculated"] = df.connectors.get("infutor_property_append", "property.0.baths_calculated")
model_inputs["infutor_property_append_property[0]_sales_date"] = df.connectors.get("infutor_property_append", "property.0.sales_date")
model_inputs["infutor_property_append_property[0]_air_conditioned"] = df.connectors.get("infutor_property_append", "property.0.air_conditioned")
model_inputs["infutor_property_append_property[0]_year_built"] = df.connectors.get("infutor_property_append", "property.0.year_built")
model_inputs["infutor_property_append_property[0]_property_indicator"] = df.connectors.get("infutor_property_append", "property.0.property_indicator")
model_inputs["infutor_property_append_mortgage[0]_due_date"] = df.connectors.get("infutor_property_append", "mortgage.0.due_date")
model_inputs["infutor_property_append_property[0]_parking_sqft"] = df.connectors.get("infutor_property_append", "property.0.parking_sqft")
model_inputs["infutor_property_append_property[0]_latitude"] = df.connectors.get("infutor_property_append", "property.0.latitude")
model_inputs["infutor_property_append_property[0]_garage"] = df.connectors.get("infutor_property_append", "property.0.garage")
model_inputs["infutor_property_append_property[0]_exterior_walls_type"] = df.connectors.get("infutor_property_append", "property.0.exterior_walls_type")
model_inputs["infutor_property_append_property[0]_baths"] = df.connectors.get("infutor_property_append", "property.0.baths")
model_inputs["infutor_property_append_property[0]_building_sqft_indicator"] = df.connectors.get("infutor_property_append", "property.0.building_sqft_indicator")
model_inputs["utilityscore_savings_data_ht_heatpump_scorechange"] = df.connectors.get("utilityscore_savings", "data.ht_heatpump_scorechange")
model_inputs["utilityscore_savings_data_ht_natgas_billsavings_annual"] = df.connectors.get("utilityscore_savings", "data.ht_heatpump_billsavings_annual")
model_inputs["utilityscore_savings_data_tl_1_28_billsavings_annual"] = df.connectors.get("utilityscore_savings", "tl_1.28_billsavings_annual")
model_inputs["utilityscore_savings_data_ht_natgas_scorechange"] = df.connectors.get("utilityscore_savings", "ht_natgas_scorechange")
model_inputs["utilityscore_bill_data_natural_gas_bill_current_month"] = df.connectors.get("utilityscore_savings", "natural_gas_bill_current_month")
model_inputs["info_connect_company_results[0]_sic_list[0]_year_first_appeared"] = df.connectors.get("info_connect_company", "results.0.sic_list.0.year_first_appeared")
model_inputs["info_connect_company_results[0]_location_longitude"] = df.connectors.get("info_connect_company", "results.0.location.longitude")
model_inputs["info_connect_company_results[0]_location_employee_size_actual"] = df.connectors.get("info_connect_company", "results.0.location_employee_size_actual")
model_inputs["acxiom_place_property_description_home_square_footage"] = df.connectors.get("acxiom_place", "property_description.home_square_footage")
model_inputs["acxiom_place_property_value_assessed_value"] = df.connectors.get("acxiom_place", "property_value.assessed_value")
model_inputs["acxiom_place_property_description_property_type_detail"] = df.connectors.get("acxiom_place", "property_description.property_type_detail")
model_inputs["acxiom_place_property_value_market_value_quality_indicator"] = df.connectors.get("acxiom_place", "property_value.market_value_quality_indicator")
model_inputs["acxiom_place_property_value_market_value"] = df.connectors.get("acxiom_place", "property_value.market_value")
model_inputs["acxiom_place_property_description_year_built"] = df.connectors.get("acxiom_place", "property_description.year_built")
model_inputs["acxiom_place_property_value_market_value_decile"] = df.connectors.get("acxiom_place", "property_value.market_value_decile")
model_inputs["housecanary_property_mortgage_lien_mortgage_lien[0]_lien_type"] = df.connectors.get("housecanary_property_mortgage_lien", "mortgage_lien.0.lien_type")
model_inputs["housecanary_property_mortgage_lien_address_info_city"] = df.connectors.get("housecanary_property_mortgage_lien", "address_info.city")
model_inputs["housecanary_property_mortgage_lien_mortgage_lien[1]_lien_type"] = df.connectors.get("housecanary_property_mortgage_lien", "mortgage_lien.1.lien_type")
model_inputs["housecanary_property_mortgage_lien_address_info_state"] = df.connectors.get("housecanary_property_mortgage_lien", "address_info.state")
model_inputs["housecanary_property_mortgage_lien_mortgage_lien[0]_due_date"] = df.connectors.get("housecanary_property_mortgage_lien", "mortgage_lien.0.due_date")
model_inputs["housecanary_property_mortgage_lien_address_info_latitude"] = df.connectors.get("housecanary_property_mortgage_lien", "address_info.latitude")
model_inputs["google_latlon_city"] = df.connectors.get("google_latlon", "city")
return(model_inputs)
PY3 = sys.version_info[0] == 3
if PY3:
string_types = str,
text_type = str
long_type = int
else:
string_types = basestring,
text_type = unicode
long_type = long
def predict(row):
round_infutor_property_append_mortgage_0__date = np.float32(row[u'infutor_property_append_mortgage[0]_date'])
round_infutor_property_append_property_0__bedrooms = np.float32(row[u'infutor_property_append_property[0]_bedrooms'])
infutor_property_append_property_0__exterior_walls_type = row[u'infutor_property_append_property[0]_exterior_walls_type']
round_infutor_property_append_mortgage_0__due_date = np.float32(row[u'infutor_property_append_mortgage[0]_due_date'])
round_infutor_property_append_property_0__property_indicator = np.float32(row[u'infutor_property_append_property[0]_property_indicator'])
round_utilityscore_savings_data_ht_natgas_billsavings_annual = np.float32(row[u'utilityscore_savings_data_ht_natgas_billsavings_annual'])
round_infutor_property_append_property_0__stories_number_mi = np.float32(row[u'infutor_property_append_property[0]_stories_number-mi'])
round_info_connect_company_results_0__sic_list_0__year_first_appeared_mi = np.float32(row[u'info_connect_company_results[0]_sic_list[0]_year_first_appeared-mi'])
infutor_property_append_mortgage_0__loan_code = row[u'infutor_property_append_mortgage[0]_loan_code']
round_infutor_property_append_property_0__latitude = np.float32(row[u'infutor_property_append_property[0]_latitude'])
round_infutor_property_append_property_0__year_built_mi = np.float32(row[u'infutor_property_append_property[0]_year_built-mi'])
infutor_property_append_property_0__building_sqft_indicator = row[u'infutor_property_append_property[0]_building_sqft_indicator']
round_infutor_property_append_property_0__full_baths = np.float32(row[u'infutor_property_append_property[0]_full_baths'])
acxiom_place_property_value_market_value_quality_indicator = row[u'acxiom_place_property_value_market_value_quality_indicator']
round_infutor_property_append_mortgage_0__due_date_mi = np.float32(row[u'infutor_property_append_mortgage[0]_due_date-mi'])
round_acxiom_place_property_value_market_value = np.float32(row[u'acxiom_place_property_value_market_value'])
round_info_connect_company_results_0__location_longitude = np.float32(row[u'info_connect_company_results[0]_location_longitude'])
round_utilityscore_bill_data_natural_gas_bill_current_month = np.float32(row[u'utilityscore_bill_data_natural_gas_bill_current_month'])
round_infutor_property_append_property_0__year_built = np.float32(row[u'infutor_property_append_property[0]_year_built'])
round_utilityscore_savings_data_tl_1_28_billsavings_annual = np.float32(row[u'utilityscore_savings_data_tl_1_28_billsavings_annual'])
round_acxiom_place_property_description_home_square_footage = np.float32(row[u'acxiom_place_property_description_home_square_footage'])
round_info_connect_company_results_0__location_longitude_mi = np.float32(row[u'info_connect_company_results[0]_location_longitude-mi'])
round_infutor_property_append_mortgage_0__term_mi = np.float32(row[u'infutor_property_append_mortgage[0]_term-mi'])
round_info_connect_company_results_0__location_employee_size_actual_mi = np.float32(row[u'info_connect_company_results[0]_location_employee_size_actual-mi'])
housecanary_property_mortgage_lien_address_info_city = row[u'housecanary_property_mortgage_lien_address_info_city']
infutor_property_append_property_0__heat = row[u'infutor_property_append_property[0]_heat']
round_acxiom_place_property_value_assessed_value = np.float32(row[u'acxiom_place_property_value_assessed_value'])
infutor_property_append_property_0__building_code = row[u'infutor_property_append_property[0]_building_code']
round_housecanary_property_mortgage_lien_address_info_latitude = np.float32(row[u'housecanary_property_mortgage_lien_address_info_latitude'])
round_acxiom_place_property_value_market_value_decile = np.float32(row[u'acxiom_place_property_value_market_value_decile'])
round_infutor_property_append_property_0__baths = np.float32(row[u'infutor_property_append_property[0]_baths'])
round_infutor_property_append_property_0__sales_date = np.float32(row[u'infutor_property_append_property[0]_sales_date'])
round_utilityscore_savings_data_ht_natgas_scorechange = np.float32(row[u'utilityscore_savings_data_ht_natgas_scorechange'])
round_infutor_property_append_property_0__parking_sqft_mi = np.float32(row[u'infutor_property_append_property[0]_parking_sqft-mi'])
housecanary_property_mortgage_lien_mortgage_lien_0__lien_type = row[u'housecanary_property_mortgage_lien_mortgage_lien[0]_lien_type']
round_acxiom_place_property_description_year_built = np.float32(row[u'acxiom_place_property_description_year_built'])
acxiom_place_property_description_property_type_detail = row[u'acxiom_place_property_description_property_type_detail']
housecanary_property_mortgage_lien_address_info_state = row[u'housecanary_property_mortgage_lien_address_info_state']
infutor_property_append_property_0__residential_code = row[u'infutor_property_append_property[0]_residential_code']
round_infutor_property_append_mortgage_0__loan_to_value_mi = np.float32(row[u'infutor_property_append_mortgage[0]_loan_to_value-mi'])
round_infutor_property_append_property_0__baths_calculated = np.float32(row[u'infutor_property_append_property[0]_baths_calculated'])
infutor_property_append_address_0__state = row[u'infutor_property_append_address[0]_state']
infutor_property_append_mortgage_0__deed_code = row[u'infutor_property_append_mortgage[0]_deed_code']
housecanary_property_mortgage_lien_mortgage_lien_1__lien_type = row[u'housecanary_property_mortgage_lien_mortgage_lien[1]_lien_type']
round_infutor_property_append_property_0__parking_sqft = np.float32(row[u'infutor_property_append_property[0]_parking_sqft'])
google_latlon_city = row[u'google_latlon_city']
round_infutor_property_append_mortgage_0__term = np.float32(row[u'infutor_property_append_mortgage[0]_term'])
infutor_property_append_property_0__air_conditioned = row[u'infutor_property_append_property[0]_air_conditioned']
round_infutor_property_append_property_0__bedrooms_mi = np.float32(row[u'infutor_property_append_property[0]_bedrooms-mi'])
infutor_property_append_property_0__garage = row[u'infutor_property_append_property[0]_garage']
round_infutor_property_append_property_0__land_use = np.float32(row[u'infutor_property_append_property[0]_land_use'])
infutor_property_append_property_0__parking_type = row[u'infutor_property_append_property[0]_parking_type']
round_info_connect_company_results_0__sic_list_0__year_first_appeared = np.float32(row[u'info_connect_company_results[0]_sic_list[0]_year_first_appeared'])
infutor_property_append_property_0__stories_code = row[u'infutor_property_append_property[0]_stories_code']
round_info_connect_company_results_0__location_employee_size_actual = np.float32(row[u'info_connect_company_results[0]_location_employee_size_actual'])
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date = np.float32(row[u'housecanary_property_mortgage_lien_mortgage_lien[0]_due_date'])
round_utilityscore_savings_data_ht_heatpump_scorechange = np.float32(row[u'utilityscore_savings_data_ht_heatpump_scorechange'])
return sum([
-1.2817281,
0.023195254141447758239 * (round_info_connect_company_results_0__location_employee_size_actual_mi),
-0.092069137965156117032 * (round_infutor_property_append_property_0__sales_date > 1007294400.0 and
round_utilityscore_bill_data_natural_gas_bill_current_month <= 261.0 and
round_acxiom_place_property_value_assessed_value > 9822.0 and
round_housecanary_property_mortgage_lien_address_info_latitude > 25.973400115966797),
0.049301193001907848978 * (housecanary_property_mortgage_lien_address_info_city == u'San Jose'),
0.018409753655599749023 * (infutor_property_append_property_0__exterior_walls_type == u'STU'),
0.067001006767696144606 * (infutor_property_append_mortgage_0__deed_code == u'EQ'),
-0.00093551103108941910651 * (infutor_property_append_mortgage_0__deed_code == u'MG'),
-0.37760931500094174762 * (infutor_property_append_property_0__exterior_walls_type == u'STO'),
-0.11247661921688331677 * (not infutor_property_append_mortgage_0__loan_code == u'Federal housing administration' and
round_infutor_property_append_property_0__latitude > 41.70152282714844 and
round_infutor_property_append_property_0__parking_sqft > 481.5),
-0.00057966072725506515972 * (not infutor_property_append_property_0__air_conditioned == u'APF' and
round_info_connect_company_results_0__location_longitude > -117.21580505371094 and
round_infutor_property_append_mortgage_0__due_date <= 2460110336.0),
-0.0014601920943518574038 * (round_info_connect_company_results_0__location_longitude > -86.60734558105469 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date <= 2398291200.0),
0.061974872690480381499 * (round_info_connect_company_results_0__location_employee_size_actual <= 8.5 and
round_infutor_property_append_property_0__land_use <= 182.5 and
round_infutor_property_append_property_0__latitude <= 35.021629333496094 and
round_acxiom_place_property_value_market_value <= 790500.0),
0.014397574335188532968 * (not infutor_property_append_property_0__air_conditioned == u'APF' and
round_infutor_property_append_mortgage_0__date <= 1477051136.0 and
round_infutor_property_append_property_0__property_indicator <= 25.5 and
round_acxiom_place_property_description_home_square_footage <= 4106.0),
0.25892782480643650178 * (housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'stand_alone_second'),
-0.034044581440941676376 * (not infutor_property_append_mortgage_0__loan_code == u'Federal housing administration' and
round_infutor_property_append_property_0__parking_sqft > 481.5 and
round_housecanary_property_mortgage_lien_address_info_latitude > 41.700897216796875),
0.022892893248813078744 * (not google_latlon_city == u'Wisconsin' and
not housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'revolving_credit_line' and
round_info_connect_company_results_0__location_longitude <= -77.73294067382812 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 20.5),
0.14623070702035836921 * (infutor_property_append_property_0__building_sqft_indicator == u'Living' and
round_infutor_property_append_property_0__parking_sqft_mi <= 0.5 and
round_housecanary_property_mortgage_lien_address_info_latitude <= 38.788536071777344),
-0.081089077728497549469 * (round_infutor_property_append_property_0__bedrooms_mi),
-0.0029483748328035619228 * (round_infutor_property_append_mortgage_0__due_date_mi),
0.0041081785949446773809 * (infutor_property_append_property_0__residential_code == u'true' and
round_infutor_property_append_property_0__latitude <= 36.97774124145508),
0.11067889953820891913 * (not infutor_property_append_address_0__state == u'FL' and
round_info_connect_company_results_0__sic_list_0__year_first_appeared > 2011.5 and
round_infutor_property_append_property_0__land_use <= 182.5 and
round_acxiom_place_property_value_assessed_value <= 174337.5),
0.067876977958606268815 * (infutor_property_append_property_0__parking_type == u'small_count'),
0.00049273856028515480333 * (infutor_property_append_property_0__building_sqft_indicator == u'Living' and
round_infutor_property_append_property_0__latitude <= 38.83824920654297 and
round_infutor_property_append_property_0__parking_sqft_mi <= 0.5),
-0.10524023912460792052 * (infutor_property_append_property_0__parking_type == u'10'),
-0.0089108279749572326389 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'arm' and
13.5 < round_utilityscore_savings_data_tl_1_28_billsavings_annual <= 64.5 and
round_acxiom_place_property_value_assessed_value <= 173590.75),
0.10210013027132765207 * (infutor_property_append_mortgage_0__deed_code == u'SE'),
-0.005192225125273015017 * (not housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'arm' and
not infutor_property_append_address_0__state == u'AL' and
not infutor_property_append_address_0__state == u'TX' and
round_infutor_property_append_property_0__property_indicator > 25.5),
0.21260417980706441954 * (round_info_connect_company_results_0__location_longitude <= -86.60734558105469 and
round_infutor_property_append_mortgage_0__date <= 1429185536.0 and
round_infutor_property_append_property_0__latitude <= 42.265342712402344),
0.00082349943731364514624 * (round_info_connect_company_results_0__sic_list_0__year_first_appeared_mi),
0.05709733286115848544 * (infutor_property_append_property_0__garage == u'900'),
-0.0068840063547447507414 * (infutor_property_append_property_0__garage == u'nan' and
not infutor_property_append_property_0__residential_code == u'true' and
round_utilityscore_savings_data_ht_natgas_billsavings_annual > 67.5),
0.033919707504219104299 * (not infutor_property_append_address_0__state == u'AL' and
round_infutor_property_append_mortgage_0__date <= 1478347264.0 and
round_infutor_property_append_property_0__property_indicator <= 25.5 and
round_acxiom_place_property_description_home_square_footage <= 4106.0),
-0.045784478398248784625 * (housecanary_property_mortgage_lien_address_info_state == u'VT'),
-0.0090722532439276373784 * (round_infutor_property_append_property_0__baths <= 850.0 and
29.551132202148438 < round_infutor_property_append_property_0__latitude <= 32.48159408569336 and
round_infutor_property_append_property_0__property_indicator <= 25.5),
-0.06240244506303161548 * (round_infutor_property_append_property_0__land_use > 113.5 and
round_infutor_property_append_property_0__property_indicator > 10.5 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 19.5 and
round_housecanary_property_mortgage_lien_address_info_latitude > 26.149200439453125),
-0.035882271689557244942 * (google_latlon_city == u'Washington'),
-0.0070213632527199578912 * (google_latlon_city == u'Wisconsin'),
0.058621277088209855499 * (infutor_property_append_address_0__state == u'CA'),
0.025140573694474359356 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'commercial' and
not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'va' and
round_info_connect_company_results_0__location_longitude <= -77.73294067382812 and
round_utilityscore_savings_data_ht_natgas_scorechange <= 9.5),
-0.031139254740244363961 * (not infutor_property_append_property_0__heat == u'ST0' and
round_utilityscore_bill_data_natural_gas_bill_current_month <= 234.5 and
round_utilityscore_savings_data_ht_heatpump_scorechange > 20.5 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date <= 2319364608.0),
-0.02016035121696956034 * (infutor_property_append_property_0__garage == u'460'),
-0.0012913543125716946385 * (acxiom_place_property_value_market_value_quality_indicator == u'Assessor provided HMV'),
-0.064376088532432318434 * (round_info_connect_company_results_0__location_employee_size_actual_mi <= 0.5 and
round_housecanary_property_mortgage_lien_address_info_latitude > 35.07745361328125),
0.10875634433391008493 * (infutor_property_append_property_0__parking_type == u'OTP'),
0.048117080679030581336 * (housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'nan' and
not infutor_property_append_property_0__building_sqft_indicator == u'Living' and
round_info_connect_company_results_0__location_longitude <= -77.78125 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 20.5),
0.035747745948532617943 * (not infutor_property_append_address_0__state == u'MS' and
round_info_connect_company_results_0__sic_list_0__year_first_appeared > 2011.5 and
round_acxiom_place_property_value_market_value_decile > 2.5 and
round_housecanary_property_mortgage_lien_address_info_latitude > 26.149200439453125),
-0.0046859879670745974503 * (round_infutor_property_append_property_0__parking_sqft > 481.5 and
round_utilityscore_savings_data_ht_natgas_scorechange > 6.5 and
round_acxiom_place_property_value_assessed_value > 173572.25),
0.001123016180305815969 * (not infutor_property_append_property_0__garage == u'910' and
infutor_property_append_property_0__stories_code == u'20' and
round_info_connect_company_results_0__location_longitude <= -86.60734558105469 and
round_housecanary_property_mortgage_lien_address_info_latitude > 32.6228141784668),
-0.023367056946218711655 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'arm' and
not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'open_end' and
round_infutor_property_append_mortgage_0__term > 11.0 and
round_utilityscore_savings_data_tl_1_28_billsavings_annual > 13.5),
-0.058780826771765971872 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'va' and
not infutor_property_append_mortgage_0__deed_code == u'SE' and
round_infutor_property_append_property_0__sales_date > 1007294400.0 and
round_acxiom_place_property_value_assessed_value > 9822.0),
-0.26414878820012022143 * (infutor_property_append_property_0__heat == u'WA0'),
0.060152537289799107301 * (housecanary_property_mortgage_lien_address_info_state == u'small_count'),
0.39978229038756629699 * (housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'va'),
0.0054936792191972727561 * (not infutor_property_append_property_0__stories_code == u'20' and
round_info_connect_company_results_0__location_longitude <= -77.73294067382812 and
round_infutor_property_append_mortgage_0__date <= 1430870400.0 and
round_infutor_property_append_property_0__latitude <= 46.322837829589844),
0.079003524843442626824 * (not infutor_property_append_mortgage_0__loan_code == u'Conventional' and
round_infutor_property_append_property_0__parking_sqft > 417.0 and
round_infutor_property_append_property_0__parking_sqft_mi <= 0.5),
0.0892078696820238215 * (housecanary_property_mortgage_lien_address_info_state == u'NV'),
-0.2201020087224584143 * (housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'stand_alone_refi'),
0.17766584714344402229 * (infutor_property_append_mortgage_0__loan_code == u'Federal housing administration'),
3.2891806633716988548E-05 * (round_infutor_property_append_property_0__baths_calculated),
-0.0037757989371318429611 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'stand_alone_refi' and
not infutor_property_append_address_0__state == u'AL' and
round_utilityscore_savings_data_ht_heatpump_scorechange > 1.5 and
round_utilityscore_savings_data_tl_1_28_billsavings_annual <= 96.5),
-0.19837047460053552905 * (housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'arm'),
-0.047207146909264079138 * (29.551132202148438 < round_infutor_property_append_property_0__latitude <= 33.250518798828125 and
round_infutor_property_append_property_0__property_indicator <= 25.5),
-0.029469681859407039631 * (round_infutor_property_append_property_0__latitude <= 46.322837829589844 and
round_utilityscore_savings_data_tl_1_28_billsavings_annual > 9.5 and
round_acxiom_place_property_value_assessed_value > 173572.25 and
round_housecanary_property_mortgage_lien_address_info_latitude > 26.149200439453125),
0.0013068472855566413537 * (infutor_property_append_property_0__residential_code == u'true' and
round_housecanary_property_mortgage_lien_address_info_latitude <= 37.38833999633789),
0.026421255539679267682 * (housecanary_property_mortgage_lien_address_info_state == u'TN'),
-0.031087349940688759758 * (infutor_property_append_address_0__state == u'HI'),
0.0066014236440385468119 * (infutor_property_append_property_0__parking_type == u'900'),
-0.056200138283587858234 * (not infutor_property_append_property_0__garage == u'110' and
round_utilityscore_bill_data_natural_gas_bill_current_month <= 169.0 and
round_utilityscore_savings_data_ht_heatpump_scorechange > 1.5 and
round_utilityscore_savings_data_tl_1_28_billsavings_annual <= 96.5),
-0.00013490740783744678511 * (not infutor_property_append_mortgage_0__loan_code == u'Federal housing administration' and
round_utilityscore_savings_data_tl_1_28_billsavings_annual <= 96.5),
-0.0024875155928570230612 * (housecanary_property_mortgage_lien_address_info_state == u'OK'),
0.020004685347299131642 * (infutor_property_append_property_0__parking_type == u'120'),
-0.10346560941511488696 * (housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'small_count'),
0.21129243303116562736 * (housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'fha'),
0.00025316564425088302183 * (round_info_connect_company_results_0__sic_list_0__year_first_appeared > 2011.5 and
round_infutor_property_append_property_0__land_use <= 182.5 and
round_infutor_property_append_property_0__latitude > 35.021629333496094 and
round_acxiom_place_property_value_market_value_decile > 2.5),
-0.094351831214349091148 * (round_info_connect_company_results_0__location_employee_size_actual_mi <= 0.5 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date <= 2344377600.0),
0.068992624418633310968 * (not housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'revolving_credit_line' and
not infutor_property_append_property_0__stories_code == u'20' and
round_info_connect_company_results_0__location_employee_size_actual <= 8.5 and
round_infutor_property_append_property_0__sales_date <= 1430006400.0),
0.036947894544292848862 * (housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'conventional'),
0.0023078475380004250493 * (not infutor_property_append_property_0__heat == u'WA0' and
round_infutor_property_append_property_0__latitude <= 46.322837829589844 and
round_infutor_property_append_property_0__property_indicator <= 26.5 and
round_infutor_property_append_property_0__sales_date > 1007294400.0),
-0.07615649345706404505 * (round_infutor_property_append_mortgage_0__date > 1037534400.0 and
round_infutor_property_append_mortgage_0__term > 12.5 and
round_utilityscore_bill_data_natural_gas_bill_current_month <= 298.0 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date <= 2065564800.0),
0.48804826697903586075 * (infutor_property_append_address_0__state == u'AL'),
0.041219353380197276682 * (round_infutor_property_append_property_0__land_use <= 182.5 and
round_infutor_property_append_property_0__latitude <= 36.31702423095703 and
round_infutor_property_append_property_0__parking_sqft > 481.5 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 12.5),
-3.9695644752190851362E-06 * (round_infutor_property_append_property_0__year_built),
0.00043225999743165029058 * (infutor_property_append_property_0__exterior_walls_type == u'CBS'),
-0.024499969412636758009 * (round_infutor_property_append_mortgage_0__date > 1429185536.0 and
round_infutor_property_append_property_0__parking_sqft > 487.5 and
round_utilityscore_savings_data_ht_natgas_scorechange > 5.5 and
round_acxiom_place_property_value_assessed_value > 157875.0),
0.0054561435174921126337 * (round_infutor_property_append_property_0__bedrooms_mi <= 0.5 and
round_infutor_property_append_property_0__latitude <= 38.78849411010742),
-0.079709804516415502795 * (infutor_property_append_property_0__air_conditioned == u'0'),
-0.042583513431250032133 * (infutor_property_append_address_0__state == u'ID'),
0.3837381654966293687 * (infutor_property_append_property_0__exterior_walls_type == u'FWD'),
-0.052102154565908706529 * (not infutor_property_append_address_0__state == u'AL' and
25.5 < round_infutor_property_append_property_0__property_indicator <= 85.0),
0.096604371398342797628 * (acxiom_place_property_value_market_value_quality_indicator == u'AVM used to create HMV' and
round_infutor_property_append_property_0__property_indicator <= 25.5 and
round_acxiom_place_property_value_assessed_value <= 858625.5 and
round_housecanary_property_mortgage_lien_address_info_latitude <= 35.07745361328125),
0.14114048740825299588 * (round_infutor_property_append_mortgage_0__date <= 1478347264.0 and
round_infutor_property_append_property_0__latitude <= 46.322837829589844 and
356.0 < round_infutor_property_append_property_0__parking_sqft <= 481.5),
0.027941705700137785262 * (not infutor_property_append_address_0__state == u'TX' and
round_info_connect_company_results_0__location_employee_size_actual <= 8.5 and
round_infutor_property_append_mortgage_0__date <= 1196510464.0 and
round_acxiom_place_property_description_year_built > 1966.5),
-0.071671042345031243936 * (infutor_property_append_property_0__heat == u'ST0'),
-6.6432894328848670799E-12 * (round_infutor_property_append_mortgage_0__date),
0.013398939224272536641 * (infutor_property_append_property_0__heat == u'FA0'),
-0.056949320279225362773 * (not google_latlon_city == u'Wisconsin' and
not housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'revolving_credit_line' and
round_utilityscore_savings_data_ht_heatpump_scorechange > 20.5 and
round_utilityscore_savings_data_ht_natgas_billsavings_annual <= 158.5),
0.015201564349282211536 * (not acxiom_place_property_value_market_value_quality_indicator == u'Assessor provided HMV' and
not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'arm' and
not infutor_property_append_property_0__parking_type == u'140' and
round_infutor_property_append_property_0__property_indicator <= 25.5),
0.041875774901891961444 * (infutor_property_append_property_0__building_code == u'CX0'),
0.0054950232752226961172 * (infutor_property_append_property_0__building_sqft_indicator == u'Living'),
0.051027861607374554176 * (infutor_property_append_property_0__residential_code == u'true'),
0.002842963143241221402 * (not google_latlon_city == u'Wisconsin' and
round_infutor_property_append_property_0__year_built > 1905.5 and
round_utilityscore_bill_data_natural_gas_bill_current_month <= 234.5 and
round_utilityscore_savings_data_ht_natgas_billsavings_annual <= 98.5),
-0.012311617873986558866 * (round_infutor_property_append_mortgage_0__date > 901152000.0 and
round_infutor_property_append_mortgage_0__due_date > 1668513536.0 and
round_infutor_property_append_property_0__sales_date > 695563200.0 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date <= 2031393536.0),
0.0081224959532529509426 * (infutor_property_append_property_0__exterior_walls_type == u'WOS'),
6.3732936410792523683E-06 * (round_info_connect_company_results_0__sic_list_0__year_first_appeared),
0.052964780766471243612 * (infutor_property_append_address_0__state == u'NJ'),
-0.054770987529025670637 * (round_infutor_property_append_mortgage_0__date <= 1478347264.0 and
round_utilityscore_savings_data_tl_1_28_billsavings_annual > 3.5 and
round_acxiom_place_property_value_assessed_value > 158175.0 and
round_housecanary_property_mortgage_lien_address_info_latitude > 26.149200439453125),
0.20990642949433990783 * (google_latlon_city == u'Alaska'),
0.0016883355175206257552 * (round_infutor_property_append_property_0__bedrooms),
0.0731751409830084798 * (housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'fha'),
0.04677214740453635583 * (acxiom_place_property_description_property_type_detail == u'Condo'),
-0.013720910837640537885 * (housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'commercial' and
round_utilityscore_savings_data_ht_natgas_billsavings_annual <= 288.5 and
round_acxiom_place_property_value_assessed_value > 173572.25 and
round_housecanary_property_mortgage_lien_address_info_latitude > 26.149200439453125),
-0.0041471775409361397863 * (round_info_connect_company_results_0__location_employee_size_actual),
-0.024782974857746269126 * (housecanary_property_mortgage_lien_address_info_state == u'MS'),
0.065463102675698908728 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'open_end' and
not infutor_property_append_address_0__state == u'AL' and
round_infutor_property_append_property_0__property_indicator <= 25.5 and
round_acxiom_place_property_value_assessed_value <= 174337.5),
0.057445685419087365309 * (round_infutor_property_append_mortgage_0__date <= 1460332800.0 and
round_infutor_property_append_mortgage_0__loan_to_value_mi <= 0.5 and
round_infutor_property_append_property_0__parking_sqft > 481.5 and
round_utilityscore_savings_data_ht_natgas_scorechange <= 7.5),
-0.024573653046536428346 * (round_info_connect_company_results_0__location_longitude <= -79.74142456054688 and
round_infutor_property_append_property_0__parking_sqft > 481.5 and
round_acxiom_place_property_value_assessed_value > 173572.25 and
round_housecanary_property_mortgage_lien_address_info_latitude > 26.149200439453125),
-0.081086516685212925437 * (round_infutor_property_append_property_0__parking_sqft > 481.5 and
round_utilityscore_savings_data_ht_natgas_scorechange > 7.5 and
round_acxiom_place_property_value_assessed_value > 157875.0),
0.078103359725012505566 * (housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'nan' and
round_info_connect_company_results_0__location_longitude <= -79.74142456054688 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 19.5 and
round_acxiom_place_property_value_assessed_value > 157875.0),
0.040335383676739750003 * (not infutor_property_append_property_0__garage == u'nan' and
round_infutor_property_append_property_0__latitude <= 38.64501190185547 and
round_acxiom_place_property_value_market_value_decile > 1.5),
-0.00066914816245097156339 * (round_infutor_property_append_mortgage_0__term_mi > 0.5 and
round_infutor_property_append_property_0__parking_sqft_mi > 0.5 and
round_utilityscore_savings_data_ht_natgas_scorechange > 7.5),
0.033761203097387601502 * (round_info_connect_company_results_0__location_longitude_mi),
-0.13261534622741569245 * (housecanary_property_mortgage_lien_address_info_city == u'Richmond'),
-0.034164887540077147621 * (round_infutor_property_append_mortgage_0__date > 1429185536.0 and
round_infutor_property_append_property_0__land_use <= 182.5 and
round_infutor_property_append_property_0__stories_number_mi > 0.5 and
round_acxiom_place_property_value_assessed_value > 9822.0),
0.04610668254685500872 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'commercial' and
not infutor_property_append_address_0__state == u'AL' and
round_utilityscore_bill_data_natural_gas_bill_current_month <= 261.0 and
round_utilityscore_savings_data_ht_natgas_billsavings_annual <= 98.5),
0.030615821431097797745 * (infutor_property_append_property_0__parking_type == u'780'),
-0.16571466196852752129 * (housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'arm'),
0.052090102801425224033 * (infutor_property_append_property_0__heat == u'small_count'),
-0.00125419227685314035 * (infutor_property_append_property_0__parking_type == u'460'),
-0.10483327402118035321 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'nan' and
not infutor_property_append_property_0__residential_code == u'true'),
-0.017889484101711244229 * (not housecanary_property_mortgage_lien_address_info_state == u'nan' and
round_info_connect_company_results_0__sic_list_0__year_first_appeared > 2011.5 and
round_infutor_property_append_mortgage_0__due_date > 1553990400.0 and
round_infutor_property_append_property_0__land_use > 182.5),
0.16418096184793673498 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'commercial' and
round_infutor_property_append_property_0__land_use <= 182.5 and
round_acxiom_place_property_value_assessed_value <= 858625.5 and
round_housecanary_property_mortgage_lien_address_info_latitude <= 35.07745361328125),
-0.030032803171152588906 * (round_infutor_property_append_mortgage_0__date <= 1477051136.0 and
round_infutor_property_append_mortgage_0__due_date <= 2553336064.0 and
round_infutor_property_append_property_0__parking_sqft_mi > 0.5 and
round_acxiom_place_property_value_assessed_value > 19476.5),
0.017960900963541127223 * (not infutor_property_append_address_0__state == u'MS' and
round_infutor_property_append_property_0__property_indicator <= 15.5 and
round_housecanary_property_mortgage_lien_address_info_latitude <= 35.07745361328125),
-0.017537077932073118214 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'va' and
round_infutor_property_append_property_0__parking_sqft_mi > 0.5 and
round_acxiom_place_property_value_assessed_value > 9822.0 and
round_acxiom_place_property_value_market_value_decile > 2.5),
0.042725586816626061337 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'nan' and
infutor_property_append_property_0__building_sqft_indicator == u'Living' and
round_infutor_property_append_mortgage_0__date <= 1429466368.0 and
round_housecanary_property_mortgage_lien_address_info_latitude <= 38.788536071777344),
0.0010050520210167785806 * (not infutor_property_append_address_0__state == u'TX' and
round_info_connect_company_results_0__sic_list_0__year_first_appeared > 2011.5 and
round_infutor_property_append_mortgage_0__date <= 1196510464.0 and
round_utilityscore_savings_data_ht_heatpump_scorechange > 0.5),
-0.098564306244529470424 * (not housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'fha' and
round_infutor_property_append_mortgage_0__date > 1477051136.0 and
round_infutor_property_append_mortgage_0__due_date <= 2460110336.0 and
round_infutor_property_append_property_0__land_use > 113.5),
0.032696001788155619472 * (not google_latlon_city == u'Georgia' and
not infutor_property_append_property_0__heat == u'WA0' and
round_infutor_property_append_mortgage_0__loan_to_value_mi <= 0.5 and
round_housecanary_property_mortgage_lien_address_info_latitude <= 41.71910095214844),
0.043317689651104607229 * (infutor_property_append_property_0__stories_code == u'20' and
round_info_connect_company_results_0__location_longitude <= -86.60734558105469 and
round_infutor_property_append_property_0__latitude > 32.62242126464844 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date > 1855224064.0),
-0.00012570053266100668496 * (round_infutor_property_append_mortgage_0__date > 1196510464.0 and
round_infutor_property_append_property_0__baths_calculated > 150.0 and
round_infutor_property_append_property_0__latitude > 25.772335052490234 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date > 2048760064.0),
-0.004375814916617885332 * (round_infutor_property_append_property_0__year_built_mi),
-0.0055559070562017061692 * (not housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'arm' and
not infutor_property_append_address_0__state == u'AL' and
round_utilityscore_savings_data_tl_1_28_billsavings_annual > 9.5 and
round_housecanary_property_mortgage_lien_address_info_latitude > 26.149200439453125),
-0.049643481129169261812 * (not infutor_property_append_address_0__state == u'AL' and
round_infutor_property_append_property_0__property_indicator <= 25.5 and
round_acxiom_place_property_value_market_value_decile <= 2.5 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date <= 2302343936.0),
-0.10439466147047891942 * (not infutor_property_append_property_0__heat == u'000' and
round_infutor_property_append_property_0__parking_sqft > 478.5 and
round_utilityscore_bill_data_natural_gas_bill_current_month <= 298.0 and
round_acxiom_place_property_value_market_value > 271500.0),
0.63443595739491698282 * (housecanary_property_mortgage_lien_address_info_city == u'Miami'),
0.017961558440306214096 * (google_latlon_city == u'Tennessee'),
0.026495692107433851253 * (not housecanary_property_mortgage_lien_address_info_state == u'IN' and
round_info_connect_company_results_0__location_employee_size_actual <= 10.5 and
round_infutor_property_append_property_0__land_use <= 182.5 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 6.5),
0.19850461451300194238 * (not infutor_property_append_address_0__state == u'IN' and
round_infutor_property_append_mortgage_0__date <= 1430157568.0 and
round_infutor_property_append_property_0__parking_sqft <= 481.5 and
round_acxiom_place_property_value_market_value_decile > 2.5),
-0.040589024368467646608 * (not infutor_property_append_property_0__stories_code == u'20' and
round_info_connect_company_results_0__location_longitude <= -86.60734558105469 and
round_infutor_property_append_mortgage_0__date > 1429401600.0 and
round_utilityscore_savings_data_tl_1_28_billsavings_annual <= 84.5),
-0.00098255748475251135712 * (round_infutor_property_append_property_0__parking_sqft > 481.5 and
round_acxiom_place_property_value_assessed_value > 173572.25 and
round_housecanary_property_mortgage_lien_address_info_latitude > 42.43675231933594),
0.0028358494938966298526 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'va' and
round_info_connect_company_results_0__location_employee_size_actual <= 8.5 and
round_acxiom_place_property_value_market_value_decile > 2.5 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date > 1573732864.0),
-0.11125731383684984988 * (infutor_property_append_property_0__heat == u'000'),
0.01840129918681300894 * (not housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'fha' and
housecanary_property_mortgage_lien_mortgage_lien_1__lien_type == u'nan' and
not infutor_property_append_property_0__heat == u'HA0' and
round_utilityscore_savings_data_ht_natgas_scorechange <= 9.5),
0.022988718243437667593 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'commercial' and
round_info_connect_company_results_0__location_longitude <= -77.69873046875 and
round_infutor_property_append_property_0__baths_calculated <= 1150.0 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 19.5),
0.0066143950936835560483 * (infutor_property_append_mortgage_0__loan_code == u'Private party lender'),
-0.00075417061862730169937 * (not infutor_property_append_property_0__residential_code == u'true' and
round_info_connect_company_results_0__location_employee_size_actual <= 10.5 and
round_utilityscore_bill_data_natural_gas_bill_current_month <= 98.5 and
round_housecanary_property_mortgage_lien_address_info_latitude > 26.238903045654297),
-0.0047142359823650692788 * (round_infutor_property_append_mortgage_0__term_mi),
-0.18941933660613796686 * (infutor_property_append_address_0__state == u'MS'),
-0.026549570003581851596 * (not infutor_property_append_property_0__stories_code == u'20' and
round_infutor_property_append_property_0__year_built > 1920.5 and
round_utilityscore_savings_data_tl_1_28_billsavings_annual <= 79.5),
0.082891745101753233627 * (not housecanary_property_mortgage_lien_address_info_state == u'IN' and
not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'arm' and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 6.5 and
round_utilityscore_savings_data_ht_natgas_scorechange <= 1.5),
-0.024960636750697019759 * (round_infutor_property_append_property_0__property_indicator <= 25.5 and
round_acxiom_place_property_value_assessed_value > 34424.5 and
round_acxiom_place_property_value_market_value_decile <= 2.5 and
round_housecanary_property_mortgage_lien_address_info_latitude > 29.578269958496094),
0.11174522271667881868 * (housecanary_property_mortgage_lien_address_info_city == u'Las Vegas'),
-0.091111510522722594096 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'commercial' and
round_infutor_property_append_mortgage_0__date > 1429185536.0 and
round_infutor_property_append_property_0__latitude > 36.392608642578125 and
round_infutor_property_append_property_0__parking_sqft > 481.5),
-0.021516703839645094642 * (round_infutor_property_append_property_0__parking_sqft > 481.5 and
round_acxiom_place_property_value_assessed_value > 173572.25 and
round_housecanary_property_mortgage_lien_address_info_latitude > 41.28672409057617),
-0.024972065931734000604 * (infutor_property_append_property_0__exterior_walls_type == u'BRI'),
-0.030007288717990986127 * (not infutor_property_append_address_0__state == u'CA' and
round_infutor_property_append_property_0__property_indicator <= 25.5 and
round_acxiom_place_property_value_market_value > 49000.0 and
round_housecanary_property_mortgage_lien_address_info_latitude > 35.07745361328125),
-0.026123465520471858548 * (infutor_property_append_property_0__heat == u'HA0'),
-0.0011160608603480034421 * (infutor_property_append_address_0__state == u'KY'),
-0.029644789799809240921 * (not acxiom_place_property_value_market_value_quality_indicator == u'AVM used to create HMV' and
round_infutor_property_append_property_0__latitude <= 33.96094512939453 and
round_infutor_property_append_property_0__property_indicator <= 25.5 and
round_housecanary_property_mortgage_lien_address_info_latitude <= 35.07745361328125),
-0.01448908975275075596 * (housecanary_property_mortgage_lien_address_info_state == u'KY'),
0.16553754683016277949 * (infutor_property_append_property_0__exterior_walls_type == u'XXX'),
0.0077119566714770326765 * (round_infutor_property_append_property_0__full_baths),
0.019229881739981097982 * (round_infutor_property_append_mortgage_0__date <= 1477051136.0 and
round_infutor_property_append_mortgage_0__due_date <= 2553336064.0 and
round_infutor_property_append_property_0__parking_sqft_mi <= 0.5 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 34.5),
0.0093787265593843552325 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'commercial' and
round_info_connect_company_results_0__location_longitude <= -77.73294067382812 and
round_infutor_property_append_property_0__baths_calculated <= 1150.0 and
round_utilityscore_savings_data_ht_natgas_scorechange <= 9.5),
0.004175000301684980708 * (infutor_property_append_property_0__garage == u'120'),
0.063334120434741944528 * (not housecanary_property_mortgage_lien_mortgage_lien_0__lien_type == u'commercial' and
infutor_property_append_property_0__residential_code == u'true' and
round_infutor_property_append_property_0__bedrooms <= 10.0 and
round_utilityscore_savings_data_ht_natgas_scorechange <= 7.5),
-0.051287244520322586294 * (round_info_connect_company_results_0__location_longitude > -86.60734558105469 and
round_utilityscore_savings_data_ht_heatpump_scorechange <= 29.5 and
round_housecanary_property_mortgage_lien_address_info_latitude > 27.95344352722168),
0.084777552840039779869 * (infutor_property_append_property_0__exterior_walls_type == u'FSD'),
-0.060354241451994382339 * (infutor_property_append_address_0__state == u'VT'),
-0.025097867952441850481 * (infutor_property_append_property_0__exterior_walls_type == u'SDS'),
-7.1444728048161489039E-14 * (round_infutor_property_append_mortgage_0__due_date),
0.019499215285723854596 * (housecanary_property_mortgage_lien_address_info_city == u'Los Angeles'),
-7.3522681830185315899E-07 * (round_acxiom_place_property_description_year_built),
0.10122444813161098065 * (not infutor_property_append_property_0__heat == u'WA0' and
infutor_property_append_property_0__stories_code == u'20' and
round_info_connect_company_results_0__location_employee_size_actual <= 8.5 and
round_housecanary_property_mortgage_lien_mortgage_lien_0__due_date > 1948622336.0),
0.0020249745728749173404 * (not infutor_property_append_property_0__parking_type == u'nan' and
round_infutor_property_append_property_0__property_indicator <= 10.5 and
round_housecanary_property_mortgage_lien_address_info_latitude <= 37.13014221191406) ])
def get_type_conversion():
return {
u'housecanary_property_mortgage_lien_mortgage_lien[0]_due_date': {'convert_func': parse_date, 'convert_args': ('%Y-%m-%dT%H:%M:%SZ',)},
u'infutor_property_append_mortgage[0]_date': {'convert_func': parse_date, 'convert_args': ('%Y-%m-%dT%H:%M:%SZ',)},
u'infutor_property_append_property[0]_sales_date': {'convert_func': parse_date, 'convert_args': ('%Y-%m-%dT%H:%M:%SZ',)},
u'infutor_property_append_mortgage[0]_due_date': {'convert_func': parse_date, 'convert_args': ('%Y-%m-%dT%H:%M:%SZ',)},}
INDICATOR_COLS = [u'infutor_property_append_mortgage[0]_term', u'info_connect_company_results[0]_location_employee_size_actual', u'info_connect_company_results[0]_sic_list[0]_year_first_appeared', u'infutor_property_append_property[0]_year_built', u'infutor_property_append_property[0]_bedrooms', u'infutor_property_append_mortgage[0]_due_date', u'info_connect_company_results[0]_location_longitude', u'infutor_property_append_property[0]_parking_sqft', u'infutor_property_append_property[0]_stories_number', u'infutor_property_append_mortgage[0]_loan_to_value']
IMPUTE_VALUES = {
u'utilityscore_savings_data_ht_heatpump_scorechange': 11.000000,
u'utilityscore_savings_data_ht_natgas_billsavings_annual': 72.000000,
u'info_connect_company_results[0]_sic_list[0]_year_first_appeared': 2015.000000,
u'infutor_property_append_property[0]_land_use': 163.000000,
u'acxiom_place_property_value_assessed_value': 173581.500000,
u'utilityscore_savings_data_tl_1_28_billsavings_annual': 44.000000,
u'infutor_property_append_property[0]_stories_number': 100.000000,
u'infutor_property_append_mortgage[0]_term': 30.000000,
u'infutor_property_append_property[0]_full_baths': 2.000000,
u'acxiom_place_property_description_home_square_footage': 2000.000000,
u'infutor_property_append_property[0]_bedrooms': 3.000000,
u'infutor_property_append_mortgage[0]_date': 1430049600.000000,
u'infutor_property_append_property[0]_baths_calculated': 200.000000,
u'infutor_property_append_mortgage[0]_loan_to_value': 76.500000,
u'acxiom_place_property_description_year_built': 1984.000000,
u'infutor_property_append_property[0]_property_indicator': 11.000000,
u'utilityscore_bill_data_natural_gas_bill_current_month': 90.000000,
u'infutor_property_append_mortgage[0]_due_date': 2307571200.000000,
u'infutor_property_append_property[0]_parking_sqft': 504.000000,
u'utilityscore_savings_data_ht_natgas_scorechange': 5.000000,
u'infutor_property_append_property[0]_latitude': 39.262570,
u'infutor_property_append_property[0]_baths': 200.000000,
u'info_connect_company_results[0]_location_employee_size_actual': 4.000000,
u'housecanary_property_mortgage_lien_mortgage_lien[0]_due_date': 2122156800.000000,
u'housecanary_property_mortgage_lien_address_info_latitude': 39.528283,
u'acxiom_place_property_value_market_value_decile': 4.000000,
u'infutor_property_append_property[0]_year_built': 1983.000000,
u'acxiom_place_property_value_market_value': 286000.000000,
u'info_connect_company_results[0]_location_longitude': -87.994023,
u'infutor_property_append_property[0]_sales_date': 1355961600.000000,}
def bag_of_words(text):
""" set of whole words in a block of text """
if type(text) == float:
return set()
return set(word.lower() for word in
re.findall(r'\w+', text, re.UNICODE | re.IGNORECASE))
def parse_date(x, date_format):
""" convert date strings to numeric values. """
try:
# float values no longer pass isinstance(x, np.float64)
if isinstance(x, (np.float64, float)):
x = long_type(x)
if '%f' in date_format and date_format.startswith('v2'):
temp = str(x)
if re.search('[\+-][0-9]+$', temp):
temp = re.sub('[\+-][0-9]+$', '', temp)
date_format = date_format[2:]
dt = datetime.strptime(temp, date_format)
sec = calendar.timegm(dt.timetuple())
return sec * 1000 + dt.microsecond // 1000
elif '%M' in date_format:
temp = str(x)
if re.search('[\+-][0-9]+$', temp):
temp = re.sub('[\+-][0-9]+$', '', temp)
return calendar.timegm(datetime.strptime(temp, date_format).timetuple())
else:
return datetime.strptime(str(x), date_format).toordinal()
except:
return float('nan')
def parse_percentage(s):
""" remove percent sign so percentage variables can be converted to numeric """
if isinstance(s, float):
return s
if isinstance(s, int):
return float(s)
try:
return float(s.replace('%', ''))
except:
return float('nan')
def parse_nonstandard_na(s):
""" if a column contains numbers and a unique non-numeric,
then the non-numeric is considered to be N/A
"""
try:
ret = float(s)
if np.isinf(ret):
return float('nan')
return ret
except:
return float('nan')
def parse_length(s):
""" convert feet and inches as string to inches as numeric """
try:
if '"' in s and "'" in s:
sp = s.split("'")
return float(sp[0]) * 12 + float(sp[1].replace('"', ''))
else:
if "'" in s:
return float(s.replace("'", '')) * 12
else:
return float(s.replace('"', ''))
except:
return float('nan')
def parse_currency(s):
""" strip currency characters and commas from currency columns """
if not isinstance(s, text_type):
return float('nan')
s = re.sub(u'[\$\u20AC\u00A3\uFFE1\u00A5\uFFE5]|(EUR)', '', s)
s = s.replace(',', '')
try:
return float(s)
except:
return float('nan')
def parse_currency_replace_cents_period(val, currency_symbol):
try:
if np.isnan(val):
return val
except TypeError:
pass
if not isinstance(val, string_types):
raise ValueError('Found wrong value for currency: {}'.format(val))
try:
val = val.replace(currency_symbol, "", 1)
val = val.replace(" ", "")
val = val.replace(",", "")
val = float(val)
except ValueError:
val = float('nan')
return val
def parse_currency_replace_cents_comma(val, currency_symbol):
try:
if np.isnan(val):
return val
except TypeError:
pass
if not isinstance(val, string_types):
raise ValueError('Found wrong value for currency: {}'.format(val))
try:
val = val.replace(currency_symbol, "", 1)
val = val.replace(" ", "")
val = val.replace(".", "")
val = val.replace(",", ".")
val = float(val)
except ValueError:
val = float('nan')
return val
def parse_currency_replace_no_cents(val, currency_symbol):
try:
if np.isnan(val):
return val
except TypeError:
pass
if not isinstance(val, string_types):
raise ValueError('Found wrong value for currency: {}'.format(val))
try:
val = val.replace(currency_symbol, "", 1)
val = val.replace(" ", "")
val = val.replace(",", "")
val = val.replace(".", "")
val = float(val)
except ValueError:
val = float('nan')
return val
def parse_numeric_types(ds):
""" convert strings with numeric types (date, currency, etc.)
to actual numeric values """
TYPE_CONVERSION = get_type_conversion()
for col in ds.columns:
if col in TYPE_CONVERSION:
convert_func = TYPE_CONVERSION[col]['convert_func']
convert_args = TYPE_CONVERSION[col]['convert_args']
ds[col] = ds[col].apply(convert_func, args=convert_args)
return ds
def sanitize_name(name):
safe = name.strip().replace("-", "_").replace("$", "_").replace(".", "_")
safe = safe.replace("{", "_").replace("}", "_")
safe = safe.replace('"', '_')
return safe
def rename_columns(ds):
new_names = {}
existing_names = set()
disambiguation = {}
blank_index = 0
for old_col in ds.columns:
col = sanitize_name(old_col)
if col == '':
col = 'Unnamed: %d' % blank_index
blank_index += 1
if col in existing_names:
suffix = '_%d' % disambiguation.setdefault(col, 1)
disambiguation[col] += 1
col = col + suffix
existing_names.add(col)
new_names[old_col] = col
ds.rename(columns=new_names, inplace=True)
return ds
def add_missing_indicators(ds):
for col in INDICATOR_COLS:
ds[col + '-mi'] = ds[col].isnull().astype(int)
return ds
def impute_values(ds):
for col in ds:
if col in IMPUTE_VALUES:
ds.loc[ds[col].isnull(), col] = IMPUTE_VALUES[col]
return ds
BIG_LEVELS = {
u'infutor_property_append_property[0]_garage': [
u'1',
u'10',
u'110',
u'120',
u'2',
u'420',
u'450',
u'460',
u'670',
u'780',
u'810',
u'900',
u'910',
u'920',
u'950',
u'A00',
],
u'housecanary_property_mortgage_lien_address_info_state': [
u'AK',
u'AL',
u'AR',
u'AZ',
u'CA',
u'CO',
u'CT',
u'FL',
u'GA',
u'HI',
u'IA',
u'ID',
u'IL',
u'IN',
u'KS',
u'KY',
u'LA',
u'MA',
u'MD',
u'ME',
u'MI',
u'MN',
u'MO',
u'MS',
u'MT',
u'NC',
u'ND',
u'NE',
u'NH',
u'NJ',
u'NM',
u'NV',
u'NY',
u'OH',
u'OK',
u'OR',
u'PA',
u'RI',
u'SC',
u'TN',
u'TX',
u'UT',
u'VA',
u'VT',
u'WA',
u'WI',
],
u'infutor_property_append_property[0]_parking_type': [
u'10',
u'110',
u'120',
u'140',
u'2',
u'420',
u'450',
u'460',
u'670',
u'780',
u'810',
u'900',
u'910',
u'920',
u'950',
u'A00',
u'OFP',
u'OOP',
u'OTP',
u'PAP',
],
u'infutor_property_append_property[0]_heat': [
u'000',
u'00E',
u'00G',
u'ACE',
u'BB0',
u'BBE',
u'CF0',
u'CL0',
u'FA0',
u'FAG',
u'FAH',
u'FN0',
u'HA0',
u'HP0',
u'HS0',
u'HW0',
u'PK0',
u'RD0',
u'SP0',
u'ST0',
u'UN0',
u'WA0',
],
u'infutor_property_append_property[0]_stories_code': [
u'10',
u'15',
u'20',
u'25',
u'30',
u'40',
u'50',
u'X10',
u'X20',
],
u'acxiom_place_property_description_property_type_detail': [
u'2-4 unit (duplex, triplex, quad)',
u'Apartment (5+ units)',
u'Condo',
u'Miscellaneous residence (combo store/flat)',
u'Mobile home',
u'Single family dwelling unit',
],
u'infutor_property_append_property[0]_building_code': [
u'A0B',
u'C00',
u'C0R',
u'CN0',
u'CNR',
u'CT4',
u'CV0',
u'CX0',
u'ES0',
u'MA0',
u'R00',
u'R30',
u'RM0',
u'RS0',
u'U00',
u'UML',
u'UW0',
u'UWD',
u'UWZ',
],
u'google_latlon_city': [
u'Alabama',
u'Alaska',
u'Arizona',
u'Arkansas',
u'California',
u'Colorado',
u'Connecticut',
u'District Of Columbia',
u'Florida',
u'Georgia',
u'Hawaii',
u'Idaho',
u'Illinois',
u'Indiana',
u'Iowa',
u'Kansas',
u'Kentucky',
u'Louisiana',
u'Maine',
u'Maryland',
u'Massachusetts',
u'Michigan',
u'Minnesota',
u'Mississippi',
u'Missouri',
u'Montana',
u'Nebraska',
u'Nevada',
u'New Hampshire',
u'New Jersey',
u'New Mexico',
u'New York',
u'North Carolina',
u'North Dakota',
u'Ohio',
u'Oklahoma',
u'Ontario',
u'Oregon',
u'Pennsylvania',
u'Rhode Island',
u'South Carolina',
u'Tennessee',
u'Texas',
u'Utah',
u'Vermont',
u'Virginia',
u'Washington',
u'Wisconsin',
],
u'housecanary_property_mortgage_lien_address_info_city': [
u'Anaheim',
u'Arlington',
u'Aurora',
u'Austin',
u'Billings',
u'Brooklyn',
u'Chicago',
u'Cincinnati',
u'Colorado Springs',
u'Columbus',
u'Dallas',
u'Denver',
u'Grand Rapids',
u'Greenwood',
u'Houston',
u'Indianapolis',
u'Jacksonville',
u'Kansas City',
u'Las Vegas',
u'Los Angeles',
u'Louisville',
u'Madison',
u'Medford',
u'Miami',
u'Minneapolis',
u'New York',
u'Oklahoma City',
u'Phoenix',
u'Plymouth',
u'Portland',
u'Richmond',
u'Riverside',
u'Rochester',
u'Saint Louis',
u'San Antonio',
u'San Diego',
u'San Francisco',
u'San Jose',
u'Seattle',
u'Springfield',
u'Troy',
],
u'infutor_property_append_mortgage[0]_deed_code': [
u'EQ',
u'MG',
u'MO',
u'SE',
u'TR',
],
u'infutor_property_append_mortgage[0]_loan_code': [
u'Conventional',
u'Federal housing administration',
u'Private party lender',
u'Small business administration',
u'Veterans affairs',
],
u'acxiom_place_property_value_market_value_quality_indicator': [
u'AVM used to create HMV',
u'Assessed value',
u'Assessor provided HMV',
],
u'infutor_property_append_property[0]_residential_code': [
u'false',
u'true',
],
u'infutor_property_append_address[0]_state': [
u'AL',
u'AR',
u'AZ',
u'CA',
u'CO',
u'CT',
u'FL',
u'GA',
u'HI',
u'IA',
u'ID',
u'IL',
u'IN',
u'KS',
u'KY',
u'LA',
u'MA',
u'MD',
u'ME',
u'MI',
u'MN',
u'MO',
u'MS',
u'MT',
u'NC',
u'NE',
u'NH',
u'NJ',
u'NV',
u'NY',
u'OH',
u'OK',
u'OR',
u'PA',
u'RI',
u'SC',
u'TN',
u'TX',
u'UT',
u'VA',
u'VT',
u'WA',
u'WI',
],
u'housecanary_property_mortgage_lien_mortgage_lien[0]_lien_type': [
u'arm',
u'commercial',
u'construction',
u'conventional',
u'fannie_mae_freddie_mac',
u'fha',
u'open_end',
u'revolving_credit_line',
u'seller_take_back',
u'stand_alone_refi',
u'stand_alone_second',
u'va',
],
u'infutor_property_append_property[0]_air_conditioned': [
u'0',
u'ACE',
u'AFA',
u'AHT',
u'APF',
u'APK',
u'APR',
u'ASP',
],
u'infutor_property_append_property[0]_building_sqft_indicator': [
u'Building',
u'Gross',
u'Living',
],
u'housecanary_property_mortgage_lien_mortgage_lien[1]_lien_type': [
u'arm',
u'commercial',
u'construction',
u'conventional',
u'fha',
u'open_end',
u'revolving_credit_line',
u'stand_alone_first',
u'stand_alone_refi',
u'stand_alone_second',
u'va',
],
u'infutor_property_append_property[0]_exterior_walls_type': [
u'ALV',
u'BRI',
u'BRS',
u'BRV',
u'BRW',
u'CBS',
u'CNB',
u'CON',
u'FCB',
u'FMM',
u'FMV',
u'FRA',
u'FRV',
u'FSD',
u'FST',
u'FWD',
u'LPS',
u'MET',
u'MSN',
u'PCP',
u'PLY',
u'SDS',
u'STO',
u'STU',
u'TLU',
u'TUU',
u'VIN',
u'WDH',
u'WOO',
u'WOS',
u'XXX',
],
}
SMALL_NULLS = {
}
VAR_TYPES = {
u'housecanary_property_mortgage_lien_address_info_latitude': 'N',
u'infutor_property_append_property[0]_parking_type': 'C',
u'utilityscore_savings_data_ht_heatpump_scorechange': 'N',
u'utilityscore_savings_data_ht_natgas_billsavings_annual': 'N',
u'infutor_property_append_mortgage[0]_loan_code': 'C',
u'info_connect_company_results[0]_sic_list[0]_year_first_appeared': 'N',
u'housecanary_property_mortgage_lien_mortgage_lien[0]_lien_type': 'C',
u'infutor_property_append_property[0]_land_use': 'N',
u'acxiom_place_property_value_assessed_value': 'N',
u'utilityscore_savings_data_tl_1_28_billsavings_annual': 'N',
u'infutor_property_append_address[0]_state': 'C',
u'infutor_property_append_property[0]_building_code': 'C',
u'infutor_property_append_mortgage[0]_term': 'N',
u'housecanary_property_mortgage_lien_address_info_state': 'C',
u'infutor_property_append_property[0]_full_baths': 'N',
u'infutor_property_append_property[0]_stories_number': 'N',
u'acxiom_place_property_description_home_square_footage': 'N',
u'acxiom_place_property_value_market_value_quality_indicator': 'C',
u'infutor_property_append_property[0]_bedrooms': 'N',
u'infutor_property_append_property[0]_residential_code': 'C',
u'infutor_property_append_mortgage[0]_date': 'N',
u'housecanary_property_mortgage_lien_address_info_city': 'C',
u'infutor_property_append_mortgage[0]_loan_to_value': 'N',
u'infutor_property_append_property[0]_heat': 'C',
u'infutor_property_append_property[0]_stories_code': 'C',
u'infutor_property_append_mortgage[0]_deed_code': 'C',
u'acxiom_place_property_description_year_built': 'N',
u'infutor_property_append_property[0]_property_indicator': 'N',
u'utilityscore_bill_data_natural_gas_bill_current_month': 'N',
u'infutor_property_append_mortgage[0]_due_date': 'N',
u'infutor_property_append_property[0]_parking_sqft': 'N',
u'utilityscore_savings_data_ht_natgas_scorechange': 'N',
u'infutor_property_append_property[0]_latitude': 'N',
u'infutor_property_append_property[0]_garage': 'C',
u'infutor_property_append_property[0]_exterior_walls_type': 'C',
u'infutor_property_append_property[0]_baths': 'N',
u'acxiom_place_property_description_property_type_detail': 'C',
u'housecanary_property_mortgage_lien_mortgage_lien[0]_due_date': 'N',
u'google_latlon_city': 'C',
u'info_connect_company_results[0]_location_employee_size_actual': 'N',
u'infutor_property_append_property[0]_building_sqft_indicator': 'C',
u'acxiom_place_property_value_market_value_decile': 'N',
u'infutor_property_append_property[0]_year_built': 'N',
u'acxiom_place_property_value_market_value': 'N',
u'info_connect_company_results[0]_location_longitude': 'N',
u'infutor_property_append_property[0]_sales_date': 'N',
u'infutor_property_append_property[0]_air_conditioned': 'C',
u'housecanary_property_mortgage_lien_mortgage_lien[1]_lien_type': 'C',
u'infutor_property_append_property[0]_baths_calculated': 'N',
}
def combine_small_levels(ds):
for col in ds:
if BIG_LEVELS.get(col, None) is not None:
mask = np.logical_and(~ds[col].isin(BIG_LEVELS[col]), ds[col].notnull())
if np.any(mask):
ds.loc[mask, col] = 'small_count'
if SMALL_NULLS.get(col):
mask = ds[col].isnull()
if np.any(mask):
ds.loc[mask, col] = 'small_count'
if VAR_TYPES.get(col) == 'C' or VAR_TYPES.get(col) == 'T':
mask = ds[col].isnull()
if np.any(mask):
if ds[col].dtype == float:
ds[col] = ds[col].astype(object)
ds.loc[mask, col] = 'nan'
return ds
# N/A strings in addition to the ones used by Pandas read_csv()
NA_VALUES = ['null', 'na', 'n/a', '#N/A', 'N/A', '?', '.', '', 'Inf', 'INF', 'inf', '-inf', '-Inf', '-INF', ' ', 'None', 'NaN', '-nan', 'NULL', 'NA', '-1.#IND', '1.#IND', '-1.#QNAN', '1.#QNAN', '#NA', '#N/A N/A', '-NaN', 'nan']
# True/False strings in addition to the ones used by Pandas read_csv()
TRUE_VALUES = ['TRUE', 'True', 'true']
FALSE_VALUES = ['FALSE', 'False', 'false']
DEFAULT_ENCODING = 'utf8'
REQUIRED_COLUMNS = [u"infutor_property_append_property[0]_parking_type",u"utilityscore_savings_data_ht_heatpump_scorechange",u"utilityscore_savings_data_ht_natgas_billsavings_annual",u"infutor_property_append_mortgage[0]_loan_code",u"info_connect_company_results[0]_sic_list[0]_year_first_appeared",u"acxiom_place_property_description_home_square_footage",u"infutor_property_append_property[0]_land_use",u"acxiom_place_property_value_assessed_value",u"acxiom_place_property_description_property_type_detail",u"utilityscore_savings_data_tl_1_28_billsavings_annual",u"infutor_property_append_address[0]_state",u"infutor_property_append_property[0]_building_code",u"infutor_property_append_mortgage[0]_term",u"utilityscore_savings_data_ht_natgas_scorechange",u"infutor_property_append_property[0]_full_baths",u"infutor_property_append_property[0]_stories_number",u"housecanary_property_mortgage_lien_mortgage_lien[0]_lien_type",u"acxiom_place_property_value_market_value_quality_indicator",u"infutor_property_append_property[0]_bedrooms",u"infutor_property_append_property[0]_residential_code",u"infutor_property_append_mortgage[0]_date",u"housecanary_property_mortgage_lien_address_info_city",u"infutor_property_append_mortgage[0]_loan_to_value",u"infutor_property_append_property[0]_heat",u"infutor_property_append_property[0]_stories_code",u"infutor_property_append_mortgage[0]_deed_code",u"acxiom_place_property_description_year_built",u"infutor_property_append_property[0]_property_indicator",u"utilityscore_bill_data_natural_gas_bill_current_month",u"infutor_property_append_mortgage[0]_due_date",u"infutor_property_append_property[0]_parking_sqft",u"housecanary_property_mortgage_lien_address_info_state",u"infutor_property_append_property[0]_latitude",u"infutor_property_append_property[0]_garage",u"infutor_property_append_property[0]_exterior_walls_type",u"infutor_property_append_property[0]_baths",u"info_connect_company_results[0]_location_employee_size_actual",u"housecanary_property_mortgage_lien_mortgage_lien[0]_due_date",u"google_latlon_city",u"housecanary_property_mortgage_lien_address_info_latitude",u"infutor_property_append_property[0]_building_sqft_indicator",u"acxiom_place_property_value_market_value_decile",u"infutor_property_append_property[0]_year_built",u"acxiom_place_property_value_market_value",u"info_connect_company_results[0]_location_longitude",u"infutor_property_append_property[0]_sales_date",u"infutor_property_append_property[0]_air_conditioned",u"housecanary_property_mortgage_lien_mortgage_lien[1]_lien_type",u"infutor_property_append_property[0]_baths_calculated"]
def validate_columns(column_list):
if set(REQUIRED_COLUMNS) <= set(column_list):
return True
else :
raise ValueError("Required columns missing: %s" %
(set(REQUIRED_COLUMNS) - set(column_list)))
def convert_bool(ds):
TYPE_CONVERSION = get_type_conversion()
for col in ds.columns:
if VAR_TYPES.get(col) == 'C' and ds[col].dtype in (int, float):
mask = ds[col].notnull()
ds[col] = ds[col].astype(object)
ds.loc[mask, col] = ds.loc[mask, col].astype(text_type)
elif VAR_TYPES.get(col) == 'N' and ds[col].dtype == bool:
ds[col] = ds[col].astype(float)
elif ds[col].dtype == bool:
ds[col] = ds[col].astype(text_type)
elif ds[col].dtype == object:
if VAR_TYPES.get(col) == 'N' and col not in TYPE_CONVERSION:
mask = ds[col].apply(lambda x: x in TRUE_VALUES)
if np.any(mask):
ds.loc[mask, col] = 1
mask = ds[col].apply(lambda x: x in FALSE_VALUES)
if np.any(mask):
ds.loc[mask, col] = 0
ds[col] = ds[col].astype(float)
elif TYPE_CONVERSION.get(col) is None:
mask = ds[col].notnull()
ds.loc[mask, col] = ds.loc[mask, col].astype(text_type)
return ds
def get_dtypes():
return {a: object for a, b in VAR_TYPES.items() if b == 'C'}
def predict_dataframe(ds):
return ds.apply(predict, axis=1)
def run_dataframe(ds):
ds = rename_columns(ds)
ds = convert_bool(ds)
validate_columns(ds.columns)
ds = parse_numeric_types(ds)
ds = add_missing_indicators(ds)
ds = impute_values(ds)
ds = combine_small_levels(ds)
prediction = 1/(1 + np.exp(-predict_dataframe(ds)))
return prediction
def run(dataset_path, output_path, encoding=None):
if encoding is None:
encoding = DEFAULT_ENCODING
ds = pd.read_csv(dataset_path, na_values=NA_VALUES, low_memory=False,
dtype=get_dtypes(), encoding=encoding)
prediction = run_dataframe(ds)
prediction_file = output_path
prediction.name = 'Prediction'
prediction.to_csv(prediction_file, header=True, index_label='Index')
def _construct_parser():
import argparse
parser = argparse.ArgumentParser(description='Make offline predictions with DataRobot Prime')
parser.add_argument(
'--encoding',
type=str,
help=('the encoding of the dataset you are going to make predictions with. '
'DataRobot Prime defaults to UTF-8 if not otherwise specified. See the '
'"Codecs" column of the Python-supported standards chart '
'(https://docs.python.org/2/library/codecs.html#standard-encodings) '
'for possible alternative entries.'),
metavar='<encoding>'
)
parser.add_argument(
'input_path',
type=str,
help=('a .csv file (your dataset); columns must correspond to the '
'feature set used to generate the DataRobot Prime model.'),
metavar='<data_file>'
)
parser.add_argument(
'output_path',
type=str,
help='the filename where DataRobot writes the results.',
metavar='<output_file>'
)
return parser
def _parse_command(args):
parser = _construct_parser()
parsed_args = parser.parse_args(args[1:])
if parsed_args.encoding is None:
sys.stderr.write('Warning: For input data encodings other than UTF-8, '
'search "Prime examples" in the DataRobot Users Guide at https://app.datarobot.com/docs/users-guide/index.html')
parsed_args.encoding = DEFAULT_ENCODING
return parsed_args
if __name__ == '__main__':
args = _parse_command(sys.argv)
run(args.input_path, args.output_path, encoding=args.encoding)
def required_inputs():
return [
]
def optional_inputs():
return [
]
def output():
"""
DemystType is one of
["Blob", "Boolean", "BusinessName", "City", "Country", "Date", "DateTime",
"Dictionary", "Domain", "EmailAddress", "FirstName", "FullName", "Gender",
"Ip4", "LastName", "Latitude", "List", "Longitude", "MaritalStatus",
"MiddleName", "Number", "Percentage", "Phone", "PostCode", "Range",
"SicCode", "State", "Street", "String", "Url", "UsEin", "UsSsn", "UsSsn4",
"Year", "YearMonth"]
"""
return [
]
def metadata():
return {
"create_provider": False,
"category": "Finance",
"data_provider_name": "Blog Post",
"data_product_name": "Commercial Credit Risk",
"data_product_description": "Predicts commercial credit risk",
"beta": True,
"price_final": True,
"fcra": False,
"region": "us",
"data_provider_website": "www.demyst.com"
}
def tile_data():
req_in = required_inputs()
outs = output()
opt_in = optional_inputs()
meta = metadata()
result_dict = {}
result_dict.update(meta)
result_dict.update({'optional_inputs': opt_in})
result_dict.update({'required_inputs': req_in})
result_dict.update({'output': outs})
return result_dict
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment