Skip to content

Instantly share code, notes, and snippets.

@cheesinglee
Last active Aug 29, 2015
Embed
What would you like to do?
KIVA_SNAPSHOT_URL = 'http://s3.kiva.org/snapshots/kiva_ds_json.zip'
PRUNE_FIELDS = ['terms', 'payments', 'basket_amount', 'description', 'name',
'borrowers', 'translator', 'video', 'image',
'funded_date', 'paid_date', 'paid_amount', 'funded_amount',
'planned_expiration_date', 'bonus_credit_eligibility',
'partner_id']
KEYS = ['sector', 'use', 'posted_date', 'location.country',
'journal_totals.entries', 'activity',
'loan_amount', 'status', 'lender_count']
INPUT_FIELD_IDS = ['000000', # sector
'000001', # use
'000003', # location.country
'000004', # journal_totals.entries
'000005', # activity
'000006', # loan_amount
'000008', # lender_count
'000002-0',
'000002-1',
'000002-2',
'000002-3',
# posted-date.{year,month,day-of-month,day-of-week,'hour'}
'000002-4']
EXCLUDED_FIELD_IDS = ['000002', '000002-5', '000002-6']
OBJECTIVE_FIELD_ID = '000007' # status
if __name__ == '__main__':
api = BigML()
datasets = api.list_datasets("name=kiva-data;order_by=created")['objects']
if len(datasets) == 0:
# no pre-existing data, create from Kiva snapshot
# download kiva snapshot to tmp file
make_ds_from_snapshot()
else:
# find date of most recent BigML dataset
last_date = dateutil.parser.parse(
datasets[0]['created']).replace(tzinfo=dateutil.tz.tzutc())
# use kiva api to grab new loan data
make_ds_from_api(last_date)
print 'building model'
datasets = api.list_datasets("name=kiva-data;order_by=created")['objects']
model = api.create_model([obj['resource'] for obj in datasets],
{'name': 'kiva-model',
'objective_field': OBJECTIVE_FIELD_ID,
'input_fields': INPUT_FIELD_IDS,
'excluded_fields': EXCLUDED_FIELD_IDS,
'balance_objective': True})
print 'Done!'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment