Skip to content

Instantly share code, notes, and snippets.

@edouardklein
Last active August 29, 2015 14:15
Show Gist options
  • Save edouardklein/f541b8ead5e124d48124 to your computer and use it in GitHub Desktop.
Save edouardklein/f541b8ead5e124d48124 to your computer and use it in GitHub Desktop.
HMM2
{
"metadata": {
"name": "",
"signature": "sha256:e03d52a26f4dccc6cc9bde5805be986ec05a75df60c1c7dd98c7d4478ecb1993"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data import\n",
"This is just boilerplate code to import the data from the csv. The only interesting things happen at the beginning of the second cell, where we drop the states that bear too much information."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import csv\n",
"import itertools\n",
"import pandas as pd\n",
"\n",
"#First pass let us create the set of all observed states\n",
"with open('Mike/10-21-2014 approve to not fund.csv','r') as f_not_approved,\\\n",
" open('Mike/approve to fund.csv','r') as f_approved:\n",
" approved = csv.reader(f_approved)\n",
" not_approved = csv.reader(f_not_approved)\n",
" #Discarding first three lines\n",
" [next(approved) for i in range(0,3)]\n",
" [next(not_approved) for i in range(0,3)]\n",
" observed_states = set()\n",
" for row in [x for x in not_approved][:14368]: # Ignoring lines after 14368 because of text overflow error in file\n",
" states = row[3].split('->')\n",
" observed_states.update(states)\n",
" for row in approved:\n",
" states = row[6].split('->')\n",
" observed_states.update(states)\n",
"observed_states = list(observed_states)\n",
"print(pd.DataFrame(observed_states)) "
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 0\n",
"0 ct.resl_general_prod_eng\n",
"1 ct.web_resl_main_pages\n",
"2 lists.mortgage renewal reminder\n",
"3 lists.new to chequing welcome dme\n",
"4 lists.rsi onboard program\n",
"5 ct_phoneoutbound.sell mortgage\n",
"6 ct_ivr_billpay\n",
"7 hist.close.busines visa\n",
"8 dme.mortgage renewal reminder\n",
"9 dme.uloc pa frao and champion\n",
"10 ecom.cause of concern - system limitations\n",
"11 ct.chq_info_eng\n",
"12 dme.boat 2.9 percent for a period - gen\n",
"13 lists.atm cheque imaging dm offer\n",
"14 ct_retail.branch.create investment appt\n",
"15 ct_retail.branch.attend small bus bank appt\n",
"16 apps.recommend exception denied\n",
"17 exit.lifecycle\n",
"18 ct_retail.branch.attend mortgage appt\n",
"19 hist.open.line of credit account\n",
"20 ecom.cause of concern - pricing\n",
"21 ct_phoneoutbound.sell chequing :: 11\n",
"22 dme.edb music access awareness\n",
"23 ct.chq_info_fre\n",
"24 ecom.alert - free format\n",
"25 apps.decline\n",
"26 lists.chequing accounts - right plan program\n",
"27 txn.tc.bill payment.no source available\n",
"28 dme.platinum aeroplan visa preapproved\n",
"29 exit.inbound\n",
".. ...\n",
"220 hist.open.mutual fund account\n",
"221 hist.open.busines visa\n",
"222 ct_retail.c3.loan details\n",
"223 hist.open.tdaf non prime loan\n",
"224 txn.tc.bill payment.call centre\n",
"225 dme.tdw active trader tech launch\n",
"226 exit.transaction\n",
"227 ct_ivr.disconnect :: 11\n",
"228 lead.completed/closed\n",
"229 hist.open.investment advice account\n",
"230 ct.web_resl_mortcalc\n",
"231 ct.web_resl.n2c\n",
"232 ct_retail.branch.attend specialty appt\n",
"233 ct.web_resl_lending\n",
"234 lists.direct deposit no offer\n",
"235 hist.close.business chequing account\n",
"236 ecom.cause of concern - other\n",
"237 entry.branch\n",
"238 lists.heloc retention - mid term attritors\n",
"239 exit.apps\n",
"240 dme.new to chequing welcome dme\n",
"241 lists.mortgage elsewhere ofi leads\n",
"242 hist.open.term deposit account\n",
"243 ct.web_resl.productsandservices\n",
"244 dme.direct deposit no offer\n",
"245 apps.rec. approve\n",
"246 ct.chq_mobile_eng\n",
"247 txn.in-branch\n",
"248 hist.close.line of credit account\n",
"249 ct_ivr.direct.marketing\n",
"\n",
"[250 rows x 1 columns]\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"\n",
"#Removing TMI features\n",
"for f in ['apps.approved', 'entry.apps', 'exit.lifecycle', 'apps.approved with conditions', 'entry.lifecycle', 'apps.pending', \n",
" 'apps.rec. approve', 'exit.apps', 'ecom.general comment', 'apps.conditional waiver', 'lists.direct deposit no offer',\n",
" 'apps.appl.home equity line of credit', 'apps.rec. decline', 'apps.decline', 'apps.appl.mortgage', \n",
" 'lists.facebook custom audiences test', 'apps.exception denied', 'entry.outbound','exit.outbound', 'exit.marketing',\n",
" 'entry.marketing']:\n",
" observed_states.remove(f)\n",
" "
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Second pass to put the sequences observed states in a list. We have some empty sequences now that we removed a lot of states."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"counts_sequences = [] # [[count, [sequence], [outcome]], ...]\n",
"with open('Mike/10-21-2014 approve to not fund.csv','r') as f_not_approved,\\\n",
" open('Mike/approve to fund.csv','r') as f_approved:\n",
" approved = csv.reader(f_approved)\n",
" not_approved = csv.reader(f_not_approved)\n",
" #Discarding first three lines\n",
" [next(approved) for i in range(0,3)]\n",
" [next(not_approved) for i in range(0,3)]\n",
" for row in [x for x in not_approved][:14368]: # Ignoring lines after 14368 because of text overflow error in file\n",
" sequence = []\n",
" for state in row[3].split('->'):\n",
" try:\n",
" sequence.append(observed_states.index(state)) \n",
" except ValueError:\n",
" #Silently drop removed states\n",
" pass\n",
" count = int(row[1])\n",
" if sequence != []: # We have some empty sequences now...\n",
" counts_sequences.append([count, sequence, [0]])\n",
" for row in approved:\n",
" sequence = []\n",
" for state in row[6].split('->'):\n",
" try:\n",
" sequence.append(observed_states.index(state)) \n",
" except ValueError:\n",
" #Silently drop removed states\n",
" pass\n",
" #sequence.append(final_state) #Remnant of our tries on HMMs\n",
" count = int(row[1])\n",
" if sequence != []:\n",
" counts_sequences.append([count, sequence, [1]])\n",
"nb_sequences = sum([x[0] for x in counts_sequences])\n",
"weights_sequences = [[x[0]/nb_sequences,x[1],x[2]] for x in counts_sequences]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Linear SVC\n",
"\n",
"We will run the linear SVC from scikit learn: http://scikit-learn.org/stable/modules/svm.html#classification\n",
"\n",
"The features are as we discussed : a vector of 250 components with the frequency of occupancy of each state."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def obs_to_density(obs):\n",
" answer,_ = histogram(obs, range=[0, len(observed_states)], bins=len(observed_states), density=True)\n",
" return answer\n",
"X = array([obs_to_density(x[1]) for x in weights_sequences])\n",
"Y = array([x[2] for x in weights_sequences]).reshape(-1)\n",
"weights = [x[0] for x in weights_sequences]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 16
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We try this for non weighted until the performance gets bad, then we'll try different techniques to get it back up.\n",
"\n",
"I display the features by order of importance"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from sklearn.svm import LinearSVC\n",
"from sklearn import cross_validation\n",
"from sklearn import metrics\n",
"classifier = LinearSVC(loss='l1')\n",
"classifier.fit(X,Y)\n",
"sorted_coeffs = sorted(abs(classifier.coef_[0]), reverse=True)\n",
"[(observed_states[list(abs(classifier.coef_[0])).index(score)], score) for score in sorted(abs(classifier.coef_[0]), reverse=True)]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 18,
"text": [
"[('entry.branch', 7.2779725824358135),\n",
" ('exit.branch', 7.1875881309742766),\n",
" ('exit.web', 5.8610834719100895),\n",
" ('entry.web', 5.6834188951075832),\n",
" ('entry.transaction', 5.3991012280414621),\n",
" ('exit.transaction', 5.2909997159362474),\n",
" ('ct_web.chequing_admin', 3.6811929965302266),\n",
" ('ct_web.investment', 2.1419622243256122),\n",
" ('apps.recommend exception denied', 1.9547647184529477),\n",
" ('ct.web_resl_easy_web_mortgage', 1.356770941983289),\n",
" ('ct_web.contact us', 0.93350860122031909),\n",
" ('ct.resl_mort_calc_eng', 0.77292356875334278),\n",
" ('ct.resl_mort_calc_eng', 0.77292356875334278),\n",
" ('lists.mortgage offer', 0.5750954787026108),\n",
" ('ct_web.chequing-information', 0.56211053770312502),\n",
" ('hist.open.personal demand chequing account', 0.52802862249424165),\n",
" ('ct.web_resl.productsandservices', 0.46755276255014167),\n",
" ('ct.web_french', 0.42393633672708969),\n",
" ('ct.chq_info_eng', 0.35941219870318608),\n",
" ('ct.web_resl_secure_personal_credit', 0.31744989767773713),\n",
" ('lists.new to chequing email', 0.28797252744535579),\n",
" ('lists.vmt_fee_rebate', 0.2537909222080344),\n",
" ('txn.tc.pre-authorized debit.no source available', 0.21821598683348745),\n",
" ('ct.web_resl.mobile.mortgage', 0.21803071243912223),\n",
" ('ct.web_resl.mobile.mortgage', 0.21803071243912223),\n",
" ('ct_retail.branch.create mortgage appt', 0.20725789619049645),\n",
" ('ct_web_mortgage', 0.2070697473872942),\n",
" ('ct.resl_mortgage_calc_landing', 0.19980880240053403),\n",
" ('lists.uloc champion and frao offer', 0.18423959581150562),\n",
" ('ct.resl_mobile_mortgage_fre', 0.1762114537444934),\n",
" ('ct.resl_mort_first_time_home_buyer_eng', 0.17137623691162288),\n",
" ('txn.agent', 0.16721942016244881),\n",
" ('entry.cei', 0.14980610915073136),\n",
" ('entry.cei', 0.14980610915073136),\n",
" ('lists.marketing no offer 2010', 0.14923879979661542),\n",
" ('lists.uloc leads', 0.14459951667078588),\n",
" ('hist.open.personal demand savings account', 0.14078754341914509),\n",
" ('lists.rsp tfsa bonus rate offer', 0.13652947721045905),\n",
" ('cei.4 - very likely', 0.13087841381885656),\n",
" ('ct_retail.branch.create other appt', 0.12549162472067582),\n",
" ('ct_web.insurance :: 21', 0.12307006263440406),\n",
" ('ct_retail.branch.attend mortgage appt', 0.11684467887808744),\n",
" ('ct.web_resl_intelliresponse', 0.11536930167965742),\n",
" ('ct.resl_asktd_mort_eng ', 0.11222979652664038),\n",
" ('ct_retail.branch.create debt appt', 0.10747367451913566),\n",
" ('ct_ivr.disconnect :: 11', 0.10667392420674809),\n",
" ('ct.chq_info_fre', 0.10423076923076924),\n",
" ('hist.open.retail mortgage', 0.10310705496308481),\n",
" ('ct_ivr.transfer_call', 0.10041019265912308),\n",
" ('lists.rsp tfsa bonus rate xsell', 0.099495792606397959),\n",
" ('txn.tc.bill payment.td abm', 0.095683678533475772),\n",
" ('ct.resl_general_prod_eng', 0.089707027378087015),\n",
" ('ct.resl_mortgage_renewer_eng', 0.084867016679630064),\n",
" ('ct_web.creditcard', 0.072029617938147536),\n",
" ('ct_ivr.nonmembers', 0.068014820682883878),\n",
" ('ct.resl_mortgage_refinancer_eng', 0.067769204733460986),\n",
" ('lists.new to chequing welcome dm', 0.063660381901633104),\n",
" ('hist.open.line of credit account', 0.063221125334546574),\n",
" ('ecom.alert - free format', 0.061044672237277782),\n",
" ('txn.no source available', 0.060525207498192894),\n",
" ('lists.mortgage renewal leads', 0.058003678729378699),\n",
" ('txn.electronic', 0.056607148065339813),\n",
" ('entry.inbound', 0.056237734453240519),\n",
" ('hist.close.proprietary visa card', 0.055068953005671467),\n",
" ('hist.open.mutual fund account', 0.054385254469632473),\n",
" ('ct_retail.branch.attend chequing appt', 0.053297598353334454),\n",
" ('ct_ivr.transfers', 0.052405850867069954),\n",
" ('ct_ivr.account.info', 0.051813542919334273),\n",
" ('ct_phoneoutbound.call back', 0.051183662514666141),\n",
" ('lists.resl retention - mid term attritors', 0.051141219934998129),\n",
" ('ct_ivr_billpay', 0.050864641540466757),\n",
" ('txn.telephone banking (ivr)', 0.050453871114353233),\n",
" ('ct_retail.c3.cheque order', 0.050323118291501948),\n",
" ('hist.close.line of credit account', 0.048183861691264429),\n",
" ('lists.mortgage renewal reminder', 0.047983096757468389),\n",
" ('ct_ivr.direct.marketing', 0.04748628607140605),\n",
" ('ct_retail.branch.attend investment appt', 0.045637552878784106),\n",
" ('ct_retail.branch.attend small bus bank appt', 0.045532684798485316),\n",
" ('ct_retail.branch.create small bus bank appt', 0.045532684798485303),\n",
" ('ct_retail.branch.create appt credit card', 0.04544539555928024),\n",
" ('ct_retail.branch.create appt credit card', 0.04544539555928024),\n",
" ('dme.mortgage estatement communication', 0.04538271015232475),\n",
" ('lists.mortgage renewal ebank', 0.045379957172566654),\n",
" ('ct_phoneoutbound.self general :: 11', 0.04537513029104058),\n",
" ('hist.close.personal demand chequing account', 0.045372640532450957),\n",
" ('ecom.package - td private banking', 0.045350068254356411),\n",
" ('ct_retail.c3.general acct info', 0.045312982013815055),\n",
" ('ct_phoneoutbound.did not reach', 0.045308085340096317),\n",
" ('ct_retail.c3.mortgage activity', 0.04530391096462421),\n",
" ('hist.close.tdaf prime loan', 0.045298705993054703),\n",
" ('lists.prime plus 0.5 percent', 0.045289245625105091),\n",
" ('lead.completed/closed', 0.045287265476244579),\n",
" ('apps.rlse.home equity line of credit', 0.045287124102934471),\n",
" ('lists.new to mortgage contact', 0.045283520003454106),\n",
" ('ct_retail.branch.create investment appt', 0.045269678298928813),\n",
" ('ct_phoneoutbound.no sell', 0.045266452249032349),\n",
" ('lists.mortgage estatement communication', 0.045265471747689072),\n",
" ('hist.open.tdaf non prime loan', 0.045265073641956366),\n",
" ('ecom.cause of concern - other', 0.045264653372150701),\n",
" ('hist.close.tdaf non prime loan', 0.045261768491075827),\n",
" ('ecom.cause of concern - customer service issue', 0.045261396605551674),\n",
" ('txn.tc.bill payment.in-branch', 0.045250150951271863),\n",
" ('ecom.alert - o/s third party req to pay', 0.045246919573343307),\n",
" ('hist.close.retail mortgage', 0.04524453629923491),\n",
" ('lists.uloc pa frao and champion', 0.045242765925644447),\n",
" ('ecom.alert - insufficient cif id - update moci', 0.045235621589127806),\n",
" ('ct_retail.c3.visa activity', 0.045234751842117664),\n",
" ('lists.rsi onboard program', 0.045234409070765202),\n",
" ('ecom.cause of concern - do not call', 0.045229103611467647),\n",
" ('hist.open.proprietary visa card', 0.04522128605245497),\n",
" ('ecom.alert - counterfeit cheques', 0.045198060138309903),\n",
" ('ct_retail.c3.general acct changes', 0.045190624804124176),\n",
" ('ecom.cause of concern - pricing', 0.045186310308914124),\n",
" ('hist.open.tdaf prime loan', 0.045186218151242283),\n",
" ('hist.open.discount brokerage', 0.045183203198494046),\n",
" ('ecom.cause of concern - systems or equipment failure',\n",
" 0.045176623612960494),\n",
" ('ecom.history comment', 0.045173073219106213),\n",
" ('ecom.cause of concern - policy and procedure', 0.045164808738698124),\n",
" ('hist.close.personal demand savings account', 0.045160060724136489),\n",
" ('hist.open.term deposit account', 0.044868983344949175),\n",
" ('lists.mortgage elsewhere ofi leads', 0.044601694568393122),\n",
" ('hist.open.combined term deposit and savings account', 0.044147264708998213),\n",
" ('txn.tc.bill payment.web', 0.043468685860535747),\n",
" ('txn.tc.bill payment.call centre', 0.042849786728170705),\n",
" ('ct_ivr.main_entry', 0.042575273773064561),\n",
" ('hist.close.mutual fund account', 0.04227741916799619),\n",
" ('txn.tc.bill payment.telephone banking (ivr)', 0.04004123098162643),\n",
" ('lists.new to chequing obtm', 0.039549784629087578),\n",
" ('txn.call centre', 0.039487217293878184),\n",
" ('ct_retail.branch.create chequing appt', 0.03743009005249559),\n",
" ('hist.open.personal loan', 0.03654437474053697),\n",
" ('lists.frao renewal leads', 0.035223977892926556),\n",
" ('ct_retail.branch.attend other appt :: 11', 0.034271364694783084),\n",
" ('txn.td abm', 0.033599245528515544),\n",
" ('lists.mortgage x-sell offer', 0.032622974332723612),\n",
" ('lead.active', 0.031842565260593939),\n",
" ('txn.web', 0.031779305923660968),\n",
" ('ct.web_resl_other', 0.030989424568182048),\n",
" ('hist.open.financial planner', 0.02870516312357433),\n",
" ('txn.in-branch', 0.027648307719310122),\n",
" ('exit.inbound', 0.027095598880093195),\n",
" ('hist.close.proprietary master card', 0.02704166270359052),\n",
" ('lists.fef target banner 2', 0.027031454783748424),\n",
" ('lists.new to chequing welcome dme', 0.026557270600261532),\n",
" ('lists.seniors_rebate', 0.02518312885637029),\n",
" ('dme.edb music access awareness', 0.02230706280007733),\n",
" ('ct_web.chequing application', 0.020863297308921062),\n",
" ('hist.open.proprietary master card', 0.02078978397936123),\n",
" ('lists.fef target banner 1', 0.020547434707699985),\n",
" ('lists.mtg anniversary dme', 0.020127229401916837),\n",
" ('cei.5 - extremely likely', 0.01892769533187437),\n",
" ('lists.chequing accounts - right plan program', 0.018255936882020124),\n",
" ('dme.odp pay-as-you-go', 0.01785714285714286),\n",
" ('dme.rsp tfsa bonus rate offer', 0.017513461791632986),\n",
" ('ct_retail.branch.attend debt appt', 0.016923494429088144),\n",
" ('lists.heloc retention - mid term attritors', 0.015070478260662276),\n",
" ('ct_ivr.investments', 0.014964338720211755),\n",
" ('ct_phoneoutbound.sell chequing :: 11', 0.013986013986013996),\n",
" ('ecom.alert - customer is being impersonated', 0.013806750138602537),\n",
" ('dme.infinite aeroplan visa preapproved', 0.012896398583411756),\n",
" ('lists.aeroplan miles offer', 0.012251650457924857),\n",
" ('dme.authorized user offer', 0.0080000000000000002),\n",
" ('hist.open.tdfs prime loan', 0.0077058221744924502),\n",
" ('dme.new to chequing welcome dme', 0.0075187969924812035),\n",
" ('ct.resl_specialist', 0.0062833424491470259),\n",
" ('dme.mortgage renewal reminder', 0.0057730582111387091),\n",
" ('dme.vmt_fee_rebate', 0.0056151858477439887),\n",
" ('hist.open.investment advice account', 0.0040081471107128765),\n",
" ('txn.tc.bill payment.no source available', 0.0032543768670792055),\n",
" ('ct_retail.c3.loan details', 0.002725667984886963),\n",
" ('hist.close.tdfs prime loan', 0.001977517296578244),\n",
" ('lists.atm cheque imaging dm offer', 0.0018665291250283568),\n",
" ('ct.web_resl.td_lifeguide', 0.0017985611510791368),\n",
" ('ct.web_resl.td_lifeguide', 0.0017985611510791368),\n",
" ('ct.web_resl.td_lifeguide', 0.0017985611510791368),\n",
" ('hist.open.tdfs mortgage', 0.0017241379310344827),\n",
" ('lists.edb acct dormancy offer', 6.8270855548696607e-18),\n",
" ('lists.edb acct dormancy offer', 6.8270855548696607e-18),\n",
" ('lists.n2mortgage', 1.943093581001365e-18),\n",
" ('cei.3 - somewhat likely', 8.6736173798840355e-19),\n",
" ('lists.youth_right_plan', 7.453889935837843e-19),\n",
" ('apps.rlse.mortgage', 5.2854855908668341e-19),\n",
" ('dme.boat 1.9 percent for a period - gen', 4.9403196648607067e-19),\n",
" ('dme.infinite aeroplan visa ita', 4.3368086899420177e-19),\n",
" ('txn.tc.payroll deposit.in-branch', 3.1170812458958252e-19),\n",
" ('ct_ivr.branch.info', 2.3208702754767829e-19),\n",
" ('hist.close.busines visa', 2.0328790734103208e-19),\n",
" ('lists.alberta flood impact', 1.6299759406479921e-19),\n",
" ('dme.ezweb marketing no offer', 5.4210108624275222e-20),\n",
" ('dme.ezweb marketing no offer', 5.4210108624275222e-20),\n",
" ('dme.uloc pa frao and champion', 4.6586812098986519e-20),\n",
" ('ct_phoneoutbound.no sell mortgage', 4.5210383559698281e-20),\n",
" ('ct_phoneoutbound.sell mortgage', 4.0657581468206416e-20),\n",
" ('ct.web_resl.n2c', 2.7105054312137611e-20),\n",
" ('ct_retail.branch.create specialty appt', 2.4484546131569619e-20),\n",
" ('lists.n2heloc', 1.6940658945086007e-20),\n",
" ('ct_retail.branch.attend specialty appt', 1.4320150764518015e-20),\n",
" ('ct.resl_mort_affordability_calc_eng_fr', 1.3552527156068805e-20),\n",
" ('lists.open a savings account', 3.3881317890172014e-21),\n",
" ('lists.open a savings account', 3.3881317890172014e-21),\n",
" ('hist.open.busines visa', 1.6940658945086007e-21),\n",
" ('ecom.cause of concern - system limitations', 5.2939559203393771e-23),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0),\n",
" ('ct.web_resl_main_pages', 0.0)]"
]
}
],
"prompt_number": 18
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We cross validate with a 10 fold and draw the ROC and display the area under it."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import seaborn as sns\n",
"print(cross_validation.cross_val_score(classifier, X, Y, cv=10).mean())\n",
"y_score = classifier.decision_function(X)\n",
"fpr, tpr, _= metrics.roc_curve(Y, y_score, sample_weight = weights)\n",
"auc = metrics.auc(fpr, tpr, reorder=True)\n",
"plt.plot(fpr, tpr, label='ROC curve (area = %0.6f)' % auc)\n",
"#xscale('log')\n",
"legend()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0.968669615255\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 20,
"text": [
"<matplotlib.legend.Legend at 0x113ce8f98>"
]
},
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAecAAAFVCAYAAADVDycqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8XHW9//HXZE/aLF1SaEvpBpyytCwti2UToYpKZb38\nQAVFQAFZRISrcEW4oCC3bJdVoLIJV0VEvSBcdpAqIEIp66EtlC5Am6Zp9j3z+2PSkG6TpnRmTpLX\n8/HgwZz5nsz55NvJvOd7lu+JxeNxJElSdGRlugBJkrQ2w1mSpIgxnCVJihjDWZKkiDGcJUmKGMNZ\nkqSI2aRwDoJg7yAIntnA8zODIHg5CIK/B0FwypYvT5KkgafHcA6C4ALgdiB/nedzgWuAGcCBwHeD\nIBiRiiIlSRpINmXkvAA4Coit8/yOwIIwDKvDMGwFXgAO2ML1SZI04PQYzmEY/hFo20BTCVDdbbkW\nKN1CdUmSNGDlfIafrQaKuy0XA1XJfiAej8djsXUH4JJS5c/PL2TRRzVJ12lpa+f515YxrLRgvd1j\nW9rK6qYUb2HgmDR2SKZL0CbIyorxyzP37/Wf1mcJ53eB7YMgGALUk9il/V/JfiAWi1FRUfsZNqlN\nUV5ebD+nWF/o48bmNu7485ubvH5ldRPlZQUprAjKywqorG7mgF1HQizGzuOGbnTd0tICqnsZ5iWD\nchkzYvBnLTPy8nKzydoCA52+8D4eqHoTznGAIAiOBwaHYXh7EAQ/BP6PxO7x2WEYfpyCGiVthsbm\nxNGo3bYbznGHbJ903exYjKEl+URpz5bBoYFsk8I5DMNFwPTOx//T7fmHgYdTUpmkz6SppR2A0sF5\njCgrzHA1knrDSUikfqq5NRHOBXnZGa5EUm99lmPOkjKota2DjyvrN9q+ZEUdAAV5/plLfY1/tVIf\n88xry/jzCx9QU9+ySesX5vtnLvU1/tVKfUBVbXPXSPhvr39ETX0LI4cVUVXbzH6TR27053Jzs9hn\np63SVaakLcRwliKqIx5n6Yo6Xn5nBX998cO12oryc/j5qftkqDJJqWY4S2kWj8eprm+hoyO+Xltl\nTRNPvrKUWAxefmfFeu3HfH4iAONHlqS8TkmZYzhLW0hbe8d6z7W2dfDektXMW1hJc2s7r4QraGld\nf71kPr/7aEYOLeLgqduQlRWd65AHqldffYWLL/4J48dPIBaLUV9fz6hRo/nZzy4nJyeHqqoqbrrp\nOpYv/4SOjg5GjNiKs846l6FDhwHw+uuvcdddd9DW1kZTUyNf+crXOPLIYzL6O1VXr+a2227m/PMv\nzGgdzc1N/Od//pTVq1dTVFTERRddSllZ2Vrr/Pa3v+Hxxx8lLy+fo48+lhkzDqWxsZFLL72Iuro6\ncnJy+I//uJThw8tZunQJs2ZdQVtbG3l5+Vxyyc8pKSnhr3/9X/70pweJxzs44IAvcMIJ3+56/dde\n+xeXXXYxf/zjIwC88MLz3H33HWRn5/DVr36NmTOPYNWqSu6+ezbnnntByvrCcJZ6YVVNE2+8X0k7\nMeobEidkNTa38dhLi3v9WtMmjSAne/2wzc6Kcdj0cRTkZjO4KJfsLK943JjfP72Af767/h6Gz2LP\nSSM49gvbbbQ9FosxbdpeXHLJz7ueu/TS/+CFF57jwAO/wEUXnc/Xv34i++2XuA/QK6+8zAUXnMtt\nt93Fxx9/xPXXz+Lqq29kyJAhNDc3c/bZpzF69DbstVfmDlPcfvstHH30sRnb/hoPPfQHtttuB046\n6VSeeupx7r57Nuecc15X+/vvL+Cxx/7K7bffTTwe5+STv8nUqXvy5JOPM2nSTnz726fw6KMPc999\n93DOOedx1VU/57TTzmSnnXbhueeeZvHiRQwZMpQ//elBbrzxNnJycrjjjltpa2sjJyeH5cs/4Xe/\nu4/29sRliG1tbdx447Xccce9FBQUcPrp32G//Q5g6NBhFBUNYu7cV9lttz1S0heGswSsWN3IH55Z\nsNZzH69qYFlFPYX52UCMppY24uvviV7LzuPWn++4qq6FvSaNINi2jJJBeWw9tChSM3Gpd+LxOPFu\nb4TW1lYqK1dSUlJKGL7D4MHFXcEMMG3aXowePZq5c19l7txXOfTQwxgyJPE+yc/P59prb6SgYO1J\nYpYsWcwvf3k5bW1t5OcXcOmlv+Cmm67jkEO+xN57f44XX/w7Tz/9BBde+DOOPvowxo4dz/jx45kz\n52/cddf/UFBQwP3330tOTjYHHvgF/uu/fkFzczP5+flccMFFjBjx6UmC9fV1vPvuO0yYkPhC8uCD\nv+P555+lsbGRsrIyfvGLWTz++KM88shfOgPxe1RXV/P7399PVlYWU6bsxmmnncmKFcu5+uoraWlp\nobJyJaeeejr77//5ru0sW7aUK6+8bK3f84tf/DIzZx7RtfzGG6/zjW98C4C9957OXXfdsdb6ixYt\nYvfdp5KbmwvAhAkTeeutNzj22OPp6Ejskfrkk48pLi6mubmZ1aureOGF57nllhuYNGknTj/9LP7y\nl4eYNGlHLr/8Z1RWruTEE79DTk4Ozc3NXH31lVxwwUWcfPI3O7f3AaNHj2Hw4MSUsFOm7Mbcua9y\n0EGHMGPGl5g9+1eGs7SlvTa/grsfCynIzWbF6saNrtfY3N41X3N7R5y8nCym7rgV47rN4Zybk8XE\n0SWOctPs2C9sl3SUmyqvvvoKZ531PaqqqsjKinH44Uexxx7TeOqpJxg9epv11h81ahuWL/+EysqV\n7LBDsFZbUdGg9da/6abrOPHE77DXXvvwwgvPM3/+u8RisQ1+qauoWMGdd95PSUkJOTm5PPvsUxx6\n6Fd58sn/47rrbmLWrCs55pjj2Gef6bzyysvceuuNXHzxpyH51ltvsu22Y4HEF4+amhquu+5mYrEY\nP/zhWbzzzlvEYjFKSkq44oqrqamp5owzTmX27HvJz8/nsssu5p//fIlYLMZxx32T3XefyptvzmP2\n7F+tFc6jR2/DDTf8Kmm/1tfXdwVhUVER9fVrX8c/ceJ2/OY3d9LQ0EBrawtvvDGP/fY7EICsrCzO\nOed03n9/IddccyM1NdV88MH7nHvuBXz3u2dw5ZWX8eijD1NTU83rr7/GrbfeSVNTE2eccTK3334P\nN954LccffwLDh5evU8+n/z5FRYOor09cNTF27HjmzXs96e/zWRjO6tc6OuIsXlFLW3ucJctr+XB5\nHeGS1Sxf1dC1Tg2JKS6L8nP40XG7k91tV3NudtYGrxN23ueBbY89pnHppb+gpqaaH/zg+2y99SgA\nRowYwSeffLTe+kuWLGbPPfdm5coKli9fvlbb/PnvAXG23z5Ya/1ddpkM0DUKf+KJ/+tq7z5yLy0t\no6QkcYLgzJlHMGvWFYwdO46xY8dRUlLK++8v4N577+S++xK7gteMOteorl7NkCGJG5DEYjFycnK4\n5JILKSwsoqJiOW1tiTnax4xJBPjSpUtYvbqKH/3obAAaGhr46KNlTJ68K/fc82sefvjPxGKxrp9b\nY0Mj5xkzDuVrXzuya3nQoEFdgdzQ0NAV1GuMHTuOo446lvPOO4utttqanXbaZa1j0tdffwuLFy/i\n/PN/wD33/I6ioiJ2330qANOn788///kSEydux+67T6WwsJDCwkLGjRvPwoXzmTdvLsuWLQWgpqaG\nSy65iBNOOImGhk8/Kxoa6ikuTvR1dnY2OTmpi1DDWf3ak68s4bdPL9ho+5SJw/je13Z2og5tlpKS\nUi6++DLOPvs07rzzPiZP3pXKykrmzPkb++67PwAvvvh3PvpoKbvvPpVRo0bzk5+cx8EHf5GysjIa\nGhqYNesKTjrpVLbvdm+SsWPH8/bbbzFt2l488cRj1NTUkJeXx8qVFQC89967Xet2P0lwm23GEI/D\n/fff23WS2dix4zj++BPYZZcpvP/+At5+e+07lQ0ZMpS6usQXzQUL5vO3vz3HbbfdRVNTE6ecckLX\nF4Gszr1CI0eOZsSIrbjuupvJzs7m4Yf/zKRJOzF79q3MnHkk++wznUce+QuPPrr2bRc2ZeQ8efKu\n/OMfc9hxx5158cU57Lrr2ruMV69eTUNDPbfcMpu6ujpOO+077LTTLtx7752Ul4/g0EO/SkFBIdnZ\n2eTn5zNmzFhef30uu+66G3Pn/osJEyYyZcquPPTQA7S0tNDe3s4HH3zAhAnbcf/9D3Zt5/DDv8Ql\nl/yctrY2lixZQk1NDYWFhcyd+xrHH38ikPiClJ2duqlx/URSn9bc2s7Lby/nrUWrKOoM2Djw/NyP\nGF5WQG1DKwAHT92GgrxshpUWsO2IYsaNLN4it9zTwLPu7uVx48ZzzDH/j+uum8Vll13JL395Lf/9\n31dz7713ArDVVltx1VXXE4vF2HrrkZxxxtlcdNH5ZGVl0dDQwMyZR7DPPtPX2sb3v38OV131C+6+\nezaFhYX89KeXsWzZUq644j95/PFHu0axnRWt9bOHHfY1Zs++jT32mNb5Wj9g1qwraWlpprm5mR/8\n4Py11t9558nccssNAIwZM4bCwkK+//1TKS0tY4cdJrFy5cqu3xtgyJAhHHfcNzjzzFNpb+9g5MhR\nzJjxJQ466BBuuuk6Hnjgt+y88y7U1ia/j/iGHHnkMVx++SWcccYp5ObmcckllwPwu9/dx+jRY9hv\nvwNYvPhDTj31RGKxLM4442wGDRrMYYcdzuWXX8Ijj/yFjo4OLrzwZwD8+Mc/5Zprfkl7ezujRo3m\njDPOIScnh69+9XBOP/1k4vE4J510CsXFxetUkvhdc3JyOOuscznvvDPp6Ihz2GGHM3z4cAAWLlzA\nLrtM6fXvuKli8Z7OcNmy4u4KTL3+usu1oamVpRX1vL5wJfE4/OOtT6iuSz6F5ZDifMaMGMzZx0zZ\nomHcX/s4Suzj1FvTx7NmXcHhhx+11q51JXfzzdez//6fZ/LkXXtct7y8uNcfPo6cFQnvfljFspX1\nrKhqZMmK2vVu1tDa1s5bi6o2+vNHHTCBKROHkZvz6QlZWw0p8rpgaROcfPJp3Hbbzfz7v1+U6VL6\nhFWrKmloaNikYN5cjpz7oSiOOJavamDxijri8ThvL1oFwIJlNTS3tFFV20JHL96Hh+69LaOGDWLr\nYUWMH1mckTOko9jH/Y19nHr2cXo4clakNDS18ciLi3j0xeQTdAwvLaClrYPxWxez7+SR5OZksd02\npcTWOZaWnRUj33sTSxoADGcl1dzaTmV1EwuXVdPU0r5W2/sf19DY3Maij2uoaWglKxaj+2Hd9nXm\njt514jB2mZCYwnDcyGIGF+RSVpxPfq6BK0ndGc5K6qr7X+WDjzdtt1dhfjZbDyta67mODvjCHqOZ\nNmmEISxJm8hw1kbF43GWVtRTMiiP3bcfTlF+DhNGla61zoghhZSXFZCTnUVOtrNjSdKWYDhro5pa\n2mlt62DHscV869BJmS5HkgYMhzraqOr6xDXEJYPyMlyJJA0shrM2qqYznEsNZ0lKK8NZG1XjyFmS\nMsJw1kZVO3KWpIwwnLVRXceciwxnSUonw1kb1XXMebDhLEnpZDhrozzmLEmZYThro2oaWsjJjnXd\nJ1mSlB5+6motdY2t3PnXd1hd18yyztnBYlvwPsiSpJ4ZzgNcTX0LNQ0tPPjsQrKyYrw2f+Va7TuP\nG5qhyiRp4DKcB4iK1Y1c8/vXodt9k5dXNW50/R8dtxs7GcySlBGG8wDwwDMLePSlT++pvObs69JB\nedQ1tjJ+ZAlDS/I5eOo2jBw2iMGFuZkqVZKE4dzvtba1dwVzXk4Wvzztc5QOzs9wVZKkZAznfm5V\nbTMA++6yNScftlOGq5EkbQovpernVtUkwnlISUGGK5EkbSrDuZ9bVdMEwNASd2VLUl9hOPdza3Zr\nDy125CxJfYXh3M9VdY6chzlylqQ+wxPC+pG6xlaq61u47g/zIB6nrb2DtxdVATDUY86S1GcYzn1U\na1sHdY2tzF+6mrb2Dl6Y9zHvLl69wXV3GjeEQufHlqQ+w0/sPmJZRR1Vtc28tmAl9Y2tvPzOig2u\nN7Qkn713HsnOY8uYOKqUWAzycrPTXK0k6bMwnCOuta2dK+97jQ8+rtlg+w5jypg8YSgFeTkE25Yx\nevggRowooaKiNs2VSpK2FMM5YhYvr+UPzy6kuCgxxeY/3vqkq23HsUOYMKqEiaNKGV5awOjyQd4x\nSpL6IcM5gxqaWnnobx+Qkx1j+apGPlxeS1XnpU/r+sG/7cqUicPSXKEkKRMM5y2suaWd5VUNANQ2\ntPL+xzXkZCdGt4uX1/HS28uT/vyIskKGlRbw9Rk7UNB5rLi4KNfjxpI0gBjOn0FTSxvzFlaytKKe\nZRV1fLi8tmu6zJ4EY8oAqK5v4Yj9x1NeVkjZ4HyGFHs9siQNdIbzZmjv6GBFVSMX3f7SBtuHleSz\n+w7lADQ2t7HHDuVkdR4bLi7KY8KokrTVKknqewznXojH45x/y9/XGx1/+8uTyM3OIti2jML8HK8p\nliR9JqZIL6xY3dgVzDtsU0pWVoyzjp5iGEuStihTpRfmL6kG4OuHbM8h08ZkuBpJUn/ljS96YcGy\nxPSY229TluFKJEn9meHcC/OXVlOQl802IwZluhRJUj9mOG+i2oYWPq5sYOKoErKz7DZJUuokPeYc\nBEEWcDMwBWgGTgnDcGG39iOBC4E48OswDG9NYa0ZtWBZ4nizu7QlSanW0xDwCCAvDMPpwI+Bq9dp\nvwaYAewLnBcEQemWLzEaFixNhPN22/TbX1GSFBE9hfO+wGMAYRi+BExbp70VKAMKgRiJEXS/NH9p\nNVmxmBOISJJSrqdLqUqA7vcqbA+CICsMw47O5auBfwH1wINhGG74vobdlJcXb1ahmVBV28TjL35I\na3sHiz6pZcLoEsaMHpLpsjZJX+rnvso+Tj37OPXs42jqKZxrgO7/cl3BHATBtsCZwFigAfhNEATH\nhGH4h2Qv2JfuM/zgcwt55B8fdi3vsE1Zn6i/vLy4T9TZl9nHqWcfp559nB6b8wWop3CeA8wEHgiC\nYB9gXre2AqAdaA7DsCMIghUkdnH3G0tW1AGJ2zUW5ecwbqTfMCVJqddTOD8EzAiCYE7n8klBEBwP\nDA7D8PYgCO4G/h4EQROwALgrdaWmV2NzG28srKR0UJ73UZYkpVXScA7DMA6cvs7T73Vrvxa4NgV1\nZczyqgYeeGYhr75XAcD2Y/rVzgBJUh/gbBrreG7uR13BDHD8wdtnsBpJ0kDkjS+6icfjPDd3GQDX\nnrkvpYPzM1yRJGkgMpw7NTa3ccEtf6exuZ3xI4sNZklSxhjOwLKKOn46++Wu5ePclS1JyiDDGXjz\ng1Vdjy/9zl6MGTE4g9VIkgY6TwgDVqxuBOCSk/Y0mCVJGWc4AyuqEuFcXlaY4UokSRrAu7Xb2jt4\n9MUPqWts48NPaikpyqUwf8B2hyQpQgZsGj3xzyU89LcPupYnT3AWMElSNAzIcK6qbeYvcxYxuDCX\nc/5tCjlZWYwcVpTpsiRJAgZoOP/+mQU0t7Zz/CHbM3FUaabLkSRpLQPuhLBwcRUvvb2c8SOL2W/K\nyEyXI0nSegZUOLd3dPCbJ94jBnzziwFZsVimS5IkaT0DKpyffnUZyyrq2X/XkYwfWZLpciRJ2qB+\nf8y5rb2DZ19bRl1jK0+8spSi/ByOOnBipsuSJGmj+n04v/thFfc/Ob9r+RszdqCkKC+DFUmSlFy/\nDOeW1nba2juYv7Sa6/8wD4Av770tewTlTHB3tiQp4vpVONc0tHDrn94kXLya+DptX95nLIMLczNS\nlyRJvdEvwrmjI85bi1bxm8dDKlY3AbDbdsNpaG5j3NbF7DlphMEsSeoz+kU4//ap+Tz5r6UA7LPz\nVnz70Enk5WZnuCpJkjZPnw/nT1Y18PSry8jOinHhCVO9REqS1Of1+euc//jcQjricb73tZ0NZklS\nv9Cnw3nhR9W8ElYwfmQJU4PyTJcjSdIWEend2h98XENjcxtxoLGpjer6FlbXNVNdl/j/mx+sAuDY\ngyYScypOSVI/EclwXr6qgf95aj7zFlYmXS8vN4u9Jm1FsO2QNFUmSVLqRS6cF35Uzc/v+VfX8t47\nbcXIYUUU5OVQNjiP0kF5lA3Op3RwHgV5kStfkqTPLHLpdvej73Y9/vev7+6oWJI04EQqnJeuqGNp\nRT0AN517AIX5kSpPkqS0iMzZ2h9X1nPxr18GoHRwnsEsSRqwIhHOLa3tXHT7S13LV373cxmsRpKk\nzIpEOK+5cxTAf5+zP/l5Tr0pSRq4IhHO73xYBcCFJ0z1BhWSpAEv4+G8qqap6/F2o0szWIkkSdGQ\n8XD+0c1/B2DfXbbOcCWSJEVDxsN5zaSbR+w/IaN1SJIUFRm/XikWizFxdAnDSgsyXYokSZGQ0ZHz\nOx9W0RGPZ7IESZIiJ6Ph/PzrHwEwoqwwk2VIkhQpGQ3nl95eDsBxB2+fyTIkSYqUjIVzQ1Nb12Ov\nbZYk6VMZC+df/eUtAMaPLM5UCZIkRVJGwrm2oYU33q8EYO+dvL5ZkqTuMhLOl939StfjL+45JhMl\nSJIUWWkP5+bWdlZWJ6bs/P6Rk9O9eUmSIi/t4fxm5+5sgKlBebo3L0lS5KU9nO974j0AZk4fl+5N\nS5LUJ6Q1nNs74qyuawFg3ykj07lpSZL6jLSG8x+fmd/12FnBJEnasLSGc1YscQ+qGdM8Q1uSpI1J\n+25tgMkThqZzs5Ik9SlpDeeq2sQlVNlZsR7WlCRp4EprOFdUNQKQnZ3R+21IkhRpaU3J1bXNAGw9\ntCidm5UkqU/JSdYYBEEWcDMwBWgGTgnDcGG39j2Bq4EYsAw4MQzDlo29XpzEMeeigqSblSRpQOtp\n5HwEkBeG4XTgxySCGIAgCGLAbcC3wzDcH3gKGJ/sxbKzEpvLcbe2JEkb1VNK7gs8BhCG4UvAtG5t\nOwCVwA+DIHgWKAvDMEz2Yu0dHeTmGMySJCXTU1KWADXdlts7d3UDDAemAzcAhwAHB0FwULIXa2uP\nk+WZ2pIkJdXTwd8aoLjbclYYhh2djyuBBWtGy0EQPEZiZP3Mxl6svb2D3OwsysuLN7aKthD7OPXs\n49Szj1PPPo6mnsJ5DjATeCAIgn2Aed3a3gcGB0EwsfMksf2BO5K92Ief1FJclEtFRe1nqVk9KC8v\nto9TzD5OPfs49ezj9NicL0A9hfNDwIwgCOZ0Lp8UBMHxwOAwDG8PguBk4P7Ok8PmhGH4aLIXKyvO\n77qcSpIkbVjScA7DMA6cvs7T73VrfwbYe1M3trq2mW23GtyrAiVJGmjSfuq0I2dJkpJLeziPLnfk\nLElSMmkPZ69zliQpubQn5Zp7OkuSpA1Lfzg7CYkkSUllYOSc7i1KktS3OHKWJCli0h7OMY85S5KU\nVNrDubrO65wlSUom7eE8avigdG9SkqQ+xWPOkiRFTNrDOdtwliQpKSchkSQpYtytLUlSxDhyliQp\nYtIezg3NbenepCRJfUraw3nroUXp3qQkSX2Kx5wlSYoYb3whSVLEOLe2JEkR49nakiRFTAZGzune\noiRJfYsnhEmSFDHu1pYkKWLSHs5NLU5CIklSMmkP55JBeenepCRJfYqXUkmSFDFpD2dJkpSc4SxJ\nUsQYzpIkRUz6jzmne4OSJPUxjpwlSYoYw1mSpIgxnCVJihjDWZKkiPGuVJIkRYwjZ0mSIsZwliQp\nYgxnSZIixnCWJCliMhDOnhEmSVIyjpwlSYoYw1mSpIgxnCVJihjDWZKkiDGcJUmKGKfvlCQpYhw5\nS5IUMYazJEkRYzhLkhQxhrMkSRGT/hPC0r1BSZL6GEfOkiRFTE6yxiAIsoCbgSlAM3BKGIYLN7De\nbUBlGIY/SUmVkiQNID2NnI8A8sIwnA78GLh63RWCIPgesAsQ3/LlSZI08PQUzvsCjwGEYfgSMK17\nYxAE04G9gF/h4WRJkraInsK5BKjpttzeuaubIAhGAhcDZ9KbYDbCJUlKKukxZxLBXNxtOSsMw47O\nx8cAw4G/AlsDRUEQvBOG4T3JXrC0tIjy8uJkq2gLsI9Tzz5OPfs49ezjaOopnOcAM4EHgiDYB5i3\npiEMwxuAGwCCIPgWMKmnYAaorm6goqJ28ytWj8rLi+3jFLOPU88+Tj37OD025wtQT+H8EDAjCII5\nncsnBUFwPDA4DMPb11nXE8IkSdoCkoZzGIZx4PR1nn5vA+vdvSWLkiRpIMvADGGeESZJUjLOECZJ\nUsQYzpIkRYzhLElSxBjOkiRFTPrD2fPBJElKypGzJEkRYzhLkhQxhrMkSRFjOEuSFDEZmCFMkiQl\n48hZkqSIMZwlSYoYw1mSpIgxnCVJihhPCJMkKWIcOUuSFDGGsyRJEWM4S5IUMYazJEkRYzhLkhQx\nGbifs+drS5KUjCNnSZIixnCWJCliDGdJkiLGcJYkKWKcvlOSpIhx5CxJUsQYzpIkRYzhLElSxBjO\nkiRFTPpPCPOMMEmSknLkLElSxBjOkiRFjOEsSVLEGM6SJEWM4SxJUsQYzpIkRYzhLElSxBjOkiRF\njOEsSVLEZGCGMKcIkyQpGUfOkiRFjOEsSVLEGM6SJEWM4SxJUsQYzpIkRYzhLElSxBjOkiRFjOEs\nSVLEGM6SJEVMBmYIS/cWJUnqWxw5S5IUMYazJEkRk5OsMQiCLOBmYArQDJwShuHCbu3HA+cAbcAb\nwBlhGMZTV64kSf1fTyPnI4C8MAynAz8Grl7TEARBIXAZ8PkwDPcDSoHDUlWoJEkDRU/hvC/wGEAY\nhi8B07q1NQGfC8OwqXM5B2jc4hVKkjTAJN2tDZQANd2W24MgyArDsKNz93UFQBAEZwGDwjB8sqcN\nDikbRHl58WYXrE1jH6eefZx69nHq2cfR1FM41wDd/+WywjDsWLPQeUz6KmA74OhN2eDq1Q1UVOT2\ntk71Qnl5MRUVtZkuo1+zj1PPPk49+zg9NucLUE+7tecAXwEIgmAfYN467b8C8oEju+3eliRJn0FP\nI+eHgBlBEMzpXD6p8wztwcArwHeA54GngyAAuD4Mwz+lqlhJkgaCpOHceVz59HWefq/b4+wtXpEk\nSQNc+idypGD/AAAFGklEQVQhcfpOSZKScoYwSZIixnCWJCliDGdJkiLGcJYkKWLSfz/ndG9QkqQ+\nxpGzJEkRYzhLkhQxhrMkSRFjOEuSFDHOECZJUsQ4cpYkKWIMZ0mSIsZwliQpYgxnSZIiJgMzhHlG\nmCRJyThyliQpYgxnSZIixnCWJCliDGdJkiIm/SeEeT6YJElJOXKWJCliDGdJkiLGcJYkKWIMZ0mS\nIsZwliQpYgxnSZIixnCWJCliDGdJkiLGcJYkKWIMZ0mSIsbpOyVJihhHzpIkRYzhLElSxBjOkiRF\nTNrDubgwL92blCSpT8lJ58bu/OkXibe2pXOTkiT1OWkdOQ8vK0zn5iRJ6pM85ixJUsQYzpIkRYzh\nLElSxBjOkiRFjOEsSVLEGM6SJEWM4SxJUsQYzpIkRYzhLElSxBjOkiRFjOEsSVLEGM6SJEWM4SxJ\nUsQYzpIkRYzhLElSxOQkawyCIAu4GZgCNAOnhGG4sFv7TOCnQBvw6zAM70hhrZIkDQg9jZyPAPLC\nMJwO/Bi4ek1DEAS5wDXADOBA4LtBEIxIVaGSJA0UPYXzvsBjAGEYvgRM69a2I7AgDMPqMAxbgReA\nA1JSpSRJA0hP4VwC1HRbbu/c1b2mrbpbWy1QugVrkyRpQEp6zJlEMBd3W84Kw7Cj83H1Om3FQFUP\nrxcrLy/uYRVtCfZz6tnHqWcfp559HE09jZznAF8BCIJgH2Bet7Z3ge2DIBgSBEEeiV3a/0hJlZIk\nDSCxeDy+0cYgCGJ8erY2wEnAVGBwGIa3B0FwGHAxiZCfHYbhLSmuV5Kkfi9pOEuSpPRzEhJJkiLG\ncJYkKWIMZ0mSIsZwliQpYnq6znmzOCd36m1CHx8PnEOij98AzgjD0LP/eqGnPu623m1AZRiGP0lz\nif3CJryX9yQxdXAMWAacGIZhSyZq7as2oY+PBC4E4iQ+k2/NSKH9QBAEewNXhmF40DrP9yr3UjVy\ndk7u1EvWx4XAZcDnwzDcj8TMbYdlpMq+baN9vEYQBN8DdiHxoabNk+y9HANuA74dhuH+wFPA+IxU\n2bf19F5e85m8L3BeEATO9rgZgiC4ALgdyF/n+V7nXqrC2Tm5Uy9ZHzcBnwvDsKlzOQdoTG95/UKy\nPiYIgunAXsCvSIzqtHmS9fMOQCXwwyAIngXKwjAM015h35f0vQy0AmVAIYn3sl82N88C4CjW/zzo\nde6lKpydkzv1NtrHYRjGwzCsAAiC4CxgUBiGT2agxr5uo30cBMFIEhPwnInB/Fkl+7wYDkwHbgAO\nAQ4OguAg1FvJ+hgSI+l/AW8C/xuGYfd1tYnCMPwjid3W6+p17qUqnLf0nNxaX7I+JgiCrCAIZgEH\nA0enu7h+IlkfH0MiOP4K/Dvw9SAITkxzff1Fsn6uJDHiCMMwbCMx+lt31KeebbSPgyDYlsSXzLHA\nOGCrIAiOSXuF/Vuvcy9V4eyc3KmXrI8hsas1Hziy2+5t9c5G+zgMwxvCMJzWedLHlcD9YRjek5ky\n+7xk7+X3gcFBEEzsXN6fxOhOvZOsjwuAdqC5M7BXkNjFrS2n17mXkuk7nZM79ZL1MfBK53/Pd/uR\n68Mw/FNai+zjenofd1vvW0AQhuGF6a+y79uEz4s1X4BiwJwwDM/NTKV91yb08bnA10mcr7IAOLVz\nT4V6KQiCcSS+rE/vvGpms3LPubUlSYoYJyGRJCliDGdJkiLGcJYkKWIMZ0mSIsZwliQpYgxnSZIi\nxnCWJCli/j+D9uNU0X3FCQAAAABJRU5ErkJggg==\n",
"text": [
"<matplotlib.figure.Figure at 0x113ce8588>"
]
}
],
"prompt_number": 20
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# HMM"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"training_data = random.choice([x[1] for x in weights_sequences], 1000, replace=False, p=[x[0] for x in weights_sequences])\n",
"NB_HIDDEN_STATES = 5\n",
"training_data = [x for x in training_data if len(x) >= NB_HIDDEN_STATES] #Removing too short sequences\n",
"len(training_data)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# There is a silly un-feature in sklearn.hmm.MultinomialHMM where observations must span a continuous ranfe of integers.\n",
"# Because training_data is a subset of all the data, not every state is in it\n",
"# So we translate state numbers to other, continuous state numbers\n",
"translation = list(set([x for obs in training_data for x in obs]))\n",
"for i,obs in enumerate(training_data):\n",
" training_data[i] = [translation.index(x) for x in obs]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from hmmlearn.hmm import MultinomialHMM\n",
"hmm = MultinomialHMM(NB_HIDDEN_STATES)\n",
"hmm.fit(training_data)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"MultinomialHMM(algorithm='viterbi',\n",
" init_params='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',\n",
" n_components=5, n_iter=10,\n",
" params='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',\n",
" random_state=<mtrand.RandomState object at 0x112468978>,\n",
" startprob=None, startprob_prior=1.0, thresh=0.01, transmat=None,\n",
" transmat_prior=1.0)"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test_data = random.choice([x[1] for x in weights_sequences], 200, replace=False, p=[x[0] for x in weights_sequences])\n",
"for i,obs in enumerate(test_data):\n",
" def translate_or_None(x):\n",
" try:\n",
" return translation.index(x)\n",
" except ValueError:\n",
" return None\n",
" test_data[i] = [translate_or_None(x) for x in obs if translate_or_None(x) != None]\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"confusion_matrix = zeros((2,2))\n",
"for obs in test_data:\n",
" if obs[-1] == len(translation)-1: # BUY\n",
" actual = 1\n",
" obs = obs[:-1]\n",
" else:\n",
" actual = 0\n",
" hidden_states = hmm.predict(obs)\n",
" for i in range(0, NB_HIDDEN_STATES):\n",
" hmm.startprob_[i] = 1 if i == hidden_states[-1] else 0\n",
" next_states, foo = hmm.sample(100)\n",
" if len(translation)-1 in next_states:\n",
" predicted = 1\n",
" else:\n",
" predicted = 0\n",
" confusion_matrix[actual,predicted]+=1\n",
"confusion_matrix\n",
"#test_seq = weights_sequences[-1100][1][:-1] #ends in 250\n",
"#array(test_seq).reshape(-1,1)\n",
"#training_data[11][-1]\n",
"#hmm.predict(training_data[15])#[:-1])\n",
"#hmm.eval(array(test_seq).reshape(-1,1))\n",
"#hmm.predict_proba?\n",
"#hmm._generate_sample_from_state(3)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"array([[ 162., 14.],\n",
" [ 22., 2.]])"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"not_buy = [x[1] for x in counts_sequences if x[1][-1] != final_state]\n",
"buy = [x[1] for x in counts_sequences if x[1][-1] == final_state]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": true,
"input": [
"import seaborn as sns\n",
"buy_hist = hist([x for obs in buy for x in obs],final_state, color=\"#6495ED\", normed=True, alpha=.5, histtype=\"stepfilled\")[0]\n",
"not_buy_hist = hist([x for obs in not_buy for x in obs],final_state, color='#F08080', normed=True, alpha=.5, histtype=\"stepfilled\")[0]\n",
"legend(['Buy', 'Not buy'])\n",
"savefig('Buy_not_buy_distribs.pdf')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAe4AAAFVCAYAAAApGgzgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYXXd95/n3Xapu7Ztc8i55/4EhdtsYMDZxcLCBBhzc\nzUMmPD2EsSFhSQiZIU8PHaZp8pBtwgPdk04MHQfIpAlhcIgJdrCNAYOxjOVFtuVNP222JZW2Uqn2\n7a7zR5XkkihVSVW36uqU3q/n8WOd5XfOt373VH3u+d1zz0lVKhUkSVIypGtdgCRJOn4GtyRJCWJw\nS5KUIAa3JEkJYnBLkpQgBrckSQmSnWthCCEN3AZcBkwCH44xbpux/L3A/wlUgH+IMf7lfG0kSdLC\nzXfGfTNQH2O8Bvg08MVDC0IIGeDPgLcCbwI+HkJYNd0mN1sbSZK0OPMF97XAvQAxxvXAVYcWxBhL\nwKtijMNAN5AB8tNt7pmtjSRJWpz5grsNGJoxXZoeCgcgxlgOIfx74EngAWB0vjaSJGnh5vyMm6kA\nbp0xnY4xlmeuEGP85xDCncDfAb95PG2OVqlUKqlU6riLliQp4RYcevMF9zrgJuCOEMLVwMZDC0II\nbcBdwI0xxnwIYRQozdXmWFKpFL29wwv8EXQ8urtb7eNlYD8vvWr1cT6fByCbzZJOOyg4k8fx0uvu\nbp1/pWOYL7jvBG4MIaybnr4lhPB+oCXGeHsI4RvAgyGEAvA08I3p9Y5os+DqJGmJPHPv3XSmU9S9\n5nLOPf+CWpcjHbc5gzvGWAE+dtTszTOW3w7cPkvTo9tI0kmls7mZc3M59ta6EOkEOT4kSVKCGNyS\nJCWIwS1JUoIY3JIkJch8V5VLknTCSqUSg4MDVd1me3sHmUymqttMIoNbklR1g4MDfP/xgzS3dFRl\ne6MjA7zzKujqWlWV7SWZwS1JWhLNLR20tnct2/42bHicz372P3H++RdQqVQoFAr8wR98mosvDstW\nw3IwuCVJK0IqleKqq97A5z73JwA89tgj3H77V/iLv/ivNa6surw4TZK0IlQqFSqVyuHpoaEhOjs7\n+cQnPsKOHS8B8N3v/hNf+9rf8L3v3cltt/0/wNTn8R/84G9QKBRqUfYJM7glSSvGhg2P84lPfISP\nfvRW/vzPP88NN7xteknq8P9TqRQ33PB2Hnzwp5TLZdav/zlXXvl66urqalX2CXGoXJK0Ylx55VX8\n0R/9KQA7drzMRz5yC2vWrD28/NBZeVNTE1dccSXr1/+c73//Lm699bdqVfIJM7glSUtidKR6Xweb\n2taJXejW2dlFKpWipaWVAwd6WbNmLZs3b6K7ezUAN910M9/4xv/L0NAgF1xwUdVqXWoGtySp6trb\nO3jnVdXcYhft7XN/tSyVSh0eKk+nM4yNjfKJT/zvdHR08qUv/d+sXn0G3d3dpFJTw+aXXvpaenp2\n8d73/no1C11yBrckqeoymcyyf+f6iitex113/WDWZW9607W/MK9cLtPU1MgNN7x9qUurKi9OkySd\ncnbv7uFDH/pfeetb30ZTU1OtyzkhnnFLkk45Z511Nl//+jdrXcaCeMYtSVKCGNySJCWIQ+WSpKrz\n6WBLx+CWJFXd4OAAww8+QEdLS1W2NzAyAtdd79PBcKhckrREOlpa6Gprq8p/x/MGYMOGx3nHO97C\n/v37Ds/78pf/O/fcc/cx2wwNDXH//ff+wvzf/M3/ZWE/9DIwuCVJK0ZdXT1/+qd/dHj60M1WjmXr\n1s089NCDS11WVRnckqQVIZVKceWVV9He3s53vvPtX1j+j//4DX7rt36Tj370Vr785f8OwN///dfY\nsOFx7rrru0esWy6X+fznP8vv/u5v87nPfYbJyUm+//27+MpX/gqAyclJ3ve+X2N0dIRf//X3HH4q\n2W23/SU//vEPl/TnNLglSSvCofD81Kc+zbe//U16enYdXrZt21YeeOCHfOUrX+crX/kau3bt4OGH\nH+KDH/wQV155FTfddPMR2yoUCnzwg7fyV3/1N6xefTp33XXnrGfvzc0tXH75FTzyyMOUSiXWr/85\n1133liX9OQ1uSdKK0tbWzu/93qf44z/+L5TLZQB27HiJ17zmlw5flX755Vfw4ovbjrmNzs4u1qw5\nD4DXvvYydux4+ag1Xnnu90033cw999zN+vU/5/WvfyPZ7NJe921wS5KWxMDICAeHhqry38DIyAnt\n+9prf5k1a9YevjBt7drzeP75ZymVSlQqFZ566knWrFlLOp0+fKZ+RO0DA4fP2J966gkuuugS6uvr\n6es7AECMmw6ve9ll/4aenl3cffe/8O53v2eh3XXc/DqYJKnq2ts74LrrKVVpe62HtjmHVCp1xHD2\nJz/5KZ544jEALrjgIn71V2/gYx/7EJVKmcsuu4Jf/uW30Nu7n+3bt3LHHd/ife/7jVf219rC3/7t\nV9i/fx9nn30O73rXrzE+Ps6dd/4TH//4hwnh1TQ3v3Kl+9ve9g5+8pMfcd5551fpJ57j55ztnUYN\nVHp7h2tdw4rW3d2Kfbz07OelV60+3v6j+zg3l2Pv2Ws49/wLqlDZyuFxfOK++c3/SUdHB+98503H\ntX53d+vcl7vPwaFySZIW4U/+5HM8/vijvO1t/3ZZ9udQuSRJi/CZz3xuWffnGbckSQlicEuSlCAG\ntyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrck\nSQky59PBQghp4DbgMmAS+HCMcduM5e8HPgkUgWeAj8cYKyGEDcDg9GrbY4wfWoriJUk61cz3WM+b\ngfoY4zUhhDcCX5yeRwihEfg88NoY40QI4ZvAu0MI9wPEGK9fwrolSTolzTdUfi1wL0CMcT1w1Yxl\nE8CbYowT09NZYBy4HGgKIdwXQvjRdOBLkqQqmC+424ChGdOl6eFzYoyVGGMvQAjhE0BzjPGHwCjw\nhRjj24GPAv9wqI0kSVqc+YbKh4DWGdPpGGP50MR0IP8FcBHw3unZm4GtADHGLSGEPuBMoGeuHXV3\nt861WFVgHy8P+3npVaOP9zXnaMrl6Ops8jWbhX1y8povuNcBNwF3hBCuBjYetfx/MDVk/u9ijJXp\nebcwdTHb74QQzmLqrH3PfIX09g6fSN06Qd3drfbxMrCfl161+nh0dJKxIhzsH6PZ1+wIHsdLbzFv\njOYL7juBG0MI66anb5m+krwFeBy4FXgQ+HEIAeC/AV8Fvh5CePBQm5ln6ZIkaeHmDO7ps+iPHTV7\n84x/Z47R9AOLKUqSJM3Oi8YkSUoQg1uSpAQxuCVJShCDW5KkBDG4JUlKEINbkqQEMbglSUoQg1uS\npAQxuCVJShCDW5KkBDG4JUlKEINbkqQEMbglSUoQg1uSpAQxuCVJShCDW5KkBDG4JUlKEINbkqQE\nMbglSUoQg1uSpAQxuCVJShCDW5KkBDG4JUlKEINbkqQEMbglSUoQg1uSpAQxuCVJShCDW5KkBDG4\nJUlKEINbkqQEMbglSUoQg1uSpAQxuCVJShCDW5KkBDG4JUlKEINbkqQEMbglSUoQg1uSpAQxuCVJ\nShCDW5KkBDG4JUlKEINbkqQEMbglSUqQ7FwLQwhp4DbgMmAS+HCMcduM5e8HPgkUgWeAjwOpudpI\nkqSFm++M+2agPsZ4DfBp4IuHFoQQGoHPA2+JMb4ZaAfePd0mN1sbSZK0OPMF97XAvQAxxvXAVTOW\nTQBvijFOTE9np+ddC9xzjDaSJGkR5gvuNmBoxnRpevicGGMlxtgLEEL4BNAcY7x/rjaSJGlx5vyM\nm6kAbp0xnY4xlg9NTAfyXwAXAe89njbH0t3dOt8qWiT7eHnYz0uvGn28rzlHUy5HV2eTr9ks7JOT\n13zBvQ64CbgjhHA1sPGo5f+DqeHxfxdjrBxnm1n19g4fd9E6cd3drfbxMrCfl161+nh0dJKxIhzs\nH6PZ1+wIHsdLbzFvjOYL7juBG0MI66anb5m+krwFeBy4FXgQ+HEIAeC/zdZmwdVJkqQjzBnc02fR\nHztq9uYZ/84co+nRbSRJUhV40ZgkSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQli\ncEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBL\nkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KU\nIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAG\ntyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCDZuRaGENLAbcBlwCTw4RjjtqPWaQLuB26N\nMcbpeRuAwelVtscYP1TtwiVJOhXNGdzAzUB9jPGaEMIbgS9OzwMghHAV8BXgLKAyPa8BIMZ4/ZJU\nLEnSKWy+ofJrgXsBYozrgauOWl7PVJDHGfMuB5pCCPeFEH40HfiSJKkK5jvjbgOGZkyXQgjpGGMZ\nIMb4MEAIYWabUeALMcavhhAuBu4JIVxyqM2xdHe3nnDxOjH28fKwn5deNfp4X3OOplyOrs4mX7NZ\n2Ccnr/mCewiY+eql5wtgYDOwFSDGuCWE0AecCfTM1ai3d3iezWoxurtb7eNlYD8vvWr18ejoJGNF\nONg/RrOv2RE8jpfeYt4YzTdUvg54J0AI4Wpg43Fs8xamPgsnhHAWU2ftexZcoSRJOmy+M+47gRtD\nCOump28JIbwfaIkx3n6MNl8Fvh5CePBQm+M4S5ckScdhzuCOMVaAjx01e/Ms610/499F4ANVqU6S\nJB3BG7BIkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJ\nCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQli\ncEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBL\nkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCZKtdQHS\nyWjLhsfJFPK0rj2P7jPOqnU50opULpfZ+uzTAFz4msvIZDI1rigZPOPWcfnn+x7h2/c/x+at22td\nyrLIDA1ycQoGD/TVuhRpxSqXy+R6dtK0u4disVjrchLD4NZxaWjuov30i6jUuhBJOsXNOVQeQkgD\ntwGXAZPAh2OM245apwm4H7g1xhiPp40kSVqY+c64bwbqY4zXAJ8GvjhzYQjhKuBB4Hw4fDI2ZxtJ\nkrRw8wX3tcC9ADHG9cBVRy2vZyqo4wm0kSRJCzRfcLcBQzOmS9ND4QDEGB+OMe46kTaSJGnh5vs6\n2BDQOmM6HWMsL0Eburtb51tFi7SYPm5uyZFprKezo/mUeK32t+RoaszR0dF0wj/vqdA/tVaNPt7X\nnKMpl6Or88Rf41PBcvRJsVikv7GeTDpNd3cruVxuyfe5EswX3OuAm4A7QghXAxuPY5sLaUNv7/Dx\nrKYF6u5uXVQfj45Mkknn6U+NnhKv1cjIJGPlNAOVsRP6eRfbz5pftfp4dHSSsSIc7B+j2dfsCMt1\nHBeLRcbG82RSaXp7h8nl8ku+z5PFYt4YzRfcdwI3hhDWTU/fEkJ4P9ASY7z9eNssuDpJknSEOYM7\nxlgBPnbU7M2zrHf9PG0kSVIVeNGYJEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJ\nYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJw\nS5KUIAa3JKmmDvT188Cjm9nwzJZal5IIBrckqaYmi2UqLRcyPFGudSmJYHBLkpQgBrckSQlicEuz\nKBaL7N53gM3bXq51KZJ0BINbmkWxWGBwMkdPf60rkaQjGdySJCWIwS1JUoIY3JIkJYjBLUlSghjc\nkiQliMEtSVKCGNySJCWIwS1JUoIY3JIkJYjBLUlSghjckiQliMEtSVKCGNySJCWIwS1JUoJka12A\nJEkz3f2jxyinc1x7+dms6lpV63JOOga3JOmkUsq2Q8NpFAqFWpdyUnKoXNIpK5/PMzQ0yOTkZK1L\nkY6bwS3plLVjbz+Pbi3wzKYXa12KdNwcKpd0yspksjS3dZJKVWpdik7Q9mc3Up6cpPv8C2nv6qp1\nOctqzuAOIaSB24DLgEngwzHGbTOW3wT8Z6AIfC3G+LfT8zcAg9OrbY8xfmgJapcknaIq+/dyUWMj\n2/fvNbiPcjNQH2O8JoTwRuCL0/MIIdQBXwKuAsaAdSGEfwGGAWKM1y9Z1ZJ0kikWizy84QVSwDWv\nu5RMJlPrkrRCzfcZ97XAvQAxxvVMhfQhrwa2xhgHY4wF4CHgV4DLgaYQwn0hhB9NB74krWjlcpn9\no83sG22iXC7XuhytYPMFdxswNGO6ND18fmjZ4Ixlw0A7MAp8Icb4duCjwD/MaCNJkhZhvqHyIaB1\nxnQ6xnjoreTgUctagX5gM7AVIMa4JYTQB5wJ9My1o+7u1rkWqwoW08fNLTkyjfV0djSfEq9VS0sD\npUnINZ54v50K/VNr1ejjfc05JoYnaGzI0tGRXfQ28/k8zc31VCoVurtbqaurW3SNtbQcx3GxWKS/\nsZ6GXB3NTfW0Vhro7m6luTlHpb6OVatyx6xjX0sDTQ05ujpPjb9JM80X3OuAm4A7QghXAxtnLNsE\nXBxC6GTqLPs64AvALUxdzPY7IYSzmDoz3zNfIb29wydevY5bd3frovp4dGSSTDpPf2r0lHitRkYm\nyBbqmBwvnNDPu9h+1vyq1cejo5MUJ4uMTxQZGDix13k2+Xye0dE8UKG3dzjRwb1cx3GxWGRsPM/E\nZIHRsTyV4gS9vcOMjk5CqUBfX4G67Ox1jIxMMFZKcbA/mX+TFvNmY77gvhO4MYSwbnr6lhDC+4GW\nGOPtIYT/A7iPqSH3r8YY94QQvgp8PYTw4KE2M87SJUnSIswZ3DHGCvCxo2ZvnrH8buDuo9oUgQ9U\nq0BJkvQKLxqTJClBDG5JkhLEW57WwNanNpAaH6P57HM5Y83aWpejJbD5iUfJ5PO0rDmP088+p9bl\nSFpBPOOugdTQIJdk0owO9Ne6FC2RzPAQl2TSDPcfrHUpklYYg1uSpAQxuGtgf18/L+/uY9vLc96T\nRglWLpXY+vIe1j8VKRaLtS5H0gpicNfA6HiBUl0XQ+N+vX2lqlQgX2lgrNzmfaslVZXBLUlSghjc\nkqREGhkb44FHt/K9Hz5a61KWlV8Hk6RlUCgUAMhms6RSqRpXszKUK2UyzadTqWuvdSnzGh4Zpmf3\nfpqaGllzzlmL2pbBLbZsf5nRsUnWnN1NV2dnrcuRVqQ77n2MSraFX71iNWeecUaty9Ey2/bSHvaV\n11LZv33Rwe1Qudi+d4IDqQvZubu31qVIK1Zz6ypau7wZz6ksk8mSTi8+dg1uSZISxOCWJClBTorP\nuP/m2z9ncmSA37jpzbUu5YT8/J++RVMKzr3+Rrq6VtW6HC2zA319rHt6F+lKnpve+vpal3PSGh4e\nAqC5uaUqw4TSqe6kCO6WrjWkUnW1LuOErW5pYVUmg7fXODUVi0VoPpfyuNcGzOW7D2yCbCPvefMa\n2tpO/qt/daShoUF2PPwz8qS48h3vqnU54iQJbi3MoTtypVIpv16ik1ZTazupbGuty9ACFYtFzslk\n6CuVal2KphncCfbjh5+mP9/MWa2TXPv6X6p1OZKkZeAHTglWl2ui8/QLyWRztS5FkrRMDG5JkhLE\n4JYkKUEMbkmSEsSL0yRJqoLePbsZ3LeXpo4OzjrvgiXbj2fcCbZ/83McfOERJsbHFryNSqVCoZCn\nkJ+sYmWSdOoZ3N1DyE8wvrtnSffjGXeCdU4MsTrbxJ6JzIK3MTg4QOrpnzGYfYazfuWqKlYn1c6z\nD/6ExmKe7DlrWXtJqHU5K0LP3gPsfDSyZnUzF5znw1JqyTNucXpbO6uaW2pdhlQ1jZUyl7S0UJgY\nr3UpJ7VyuczmjU+yeeOTh2/odCwTBSg2X0T/0MJH+FQdBrekFWlv70E2vLCTuO3lWpdy0iqVSjT0\n7KKhZxcl74xWNTt69rHuic3s6NmzJNs3uCWtSBOFCuWGcxgcyde6FJ1ihsbLjDdcSG/f8JJs38+4\nV5hyuUylUiGVSvkkJklagQzuFea5h35K68gwo52reM2brq11OZK0Ih3o66NYLNLZ0UEut7y3nfaU\nbNq+fb388OfP89NHNta6lEVJV6A9nWGwf6DWpUjSivXTDTtZt7nMziX6HHsuBve0kfEx8g1rGc4n\n+4EdAwODHJxo4qV9Xk0rSUsll2ukqaU2z5c3uFegVDpNKuVLK0krkX/dJUlKEINb0pIZHBxgbHR4\n3pt7JN3enTvY/rOfMLj5iVqXsiADQ8M8+ewW9uzrrXUpOg4Gt6Ql8+L999K17XEmJ1b23bbGRke4\nOFdPB8Val7IgQ6N5DqYvYMee/lqXkijlcplt6x7k6fu+z+Tk8j3vweCeVi6XKRYLlErJ/MWTTkat\nTc20eztdrWDdaeimvKyjSn6Pe9quzS/QumeCXWODcP1ral2OFuHO+x4hlannhqsvpqWltdblSKec\nb931EKlMA++6Lvg7uAQM7mnpVJpzV62iv+TtERdi1/ZtTO7cwWR9PZfW+MYv5boOUvXt3ntZqpG6\n5m5S9S1UKpVal7IiOVSuqpgcHeGSxhy5U+i53n29+xns3U2xUKh1KdKK0LdjG9t++mMGXnq+1qWc\n1DzjXoTtO/YyUN9EqfNl3tC1qtblaJmNPvs0F43n2Np+Rq1LOWGbtrzI6HiBteecxmldXbUuRwu0\ne+9+9uwfoKu9ifPXJv8Z2en8BJc0dvBE+WCtSzmpecY9h1KpxOann2Tz07M/q7acypJt6GBi0gva\nlkKlUuGR7/x/PHHntxkfP/nuBJerr6cp11DrMo5bsVhkfHycfD7Py715+jMX0rPnQFX3USgUGB8f\np+AoxLxe3PQCW59+kpHhoQVvY8feAfozF7Jj/2gVK9PJzjPuOZRKJXY/8jjlSprm7jM4+6wza11S\nTeze8RKlZx7iYN1znPeWy5d13x3pFKliiYP9/Zzd2Lis+15ptj61gba+/fTV5aBhzZLsY8tjj9Ax\nPMhASxuXXnvdkuxjpaj07CQ3OMJdW/u45OI1vO6yS2pdkqokPzrC2LaNjBZ30pHNc84FF1V1+3MG\ndwghDdwGXAZMAh+OMW6bsfwm4D8DReBrMca/na9N0pSph/om8vlT96K1UqnERR3tvFisX/Z99x4c\noZJpZ/vmvZx15pmHH1maSqWWvZbjsXXjRrY+9TzZkSFytC3rvh976gV6BtK0109w/TW/+AYrm8mQ\nniyzpecgmYsvoXkJasjV1ZGeLPPcnv2MNm8iSYPw5XKZoaFBUqkU7e0dy7LPYhnSrecy4QDFgmUP\n7KB/f6Tn4lZaW5ppa6vN/cNnesPp3ZAZIwc07NrBj3YdpJiup+nMC6uy/fnOuG8G6mOM14QQ3gh8\ncXoeIYQ64EvAVcAYsC6E8D3gzUButjYrUSqVpn/Hi2xpqePiy6+odTlVNzIywuDAAKlK9b6juPHh\nn7Frew+Vjk7e9e63z7luKp0ik60nk83ywmPr6X/mOXan6nj7+26mrfXYwTjw9E/o6aunL1zKBa96\nddVqP5bBgwfp3fkyDPfRNpQnVX86mWwdTIxRLBZZ9+hT7B3KckZHZtZQrYYyGVq6LyQ9tvWY65Qq\nKVbnWhh96VkOHOyj+6K5v2O9+eknoVzmvNf8EvX1x/fGrVRJkW05g3wpWZ/EjYwMc/fPd1MpjPKB\nX3tD1bZbyOfp3bubwQN76Ti3u2rb3XDvvzKyu5eDazKcvsCsKhQKFAqFxN7ZLpPOcPXqVeweGuTh\nrRVaeZl3XHcZLz7/LJNDgzyzZ4TWrm5uuPYystnlH2AeGRtncKRAb9s5nH1+9b5mPN9v1rXAvQAx\nxvVMhfQhrwa2xhgHY4wF4CHguuk29xyjzaz6dm5lYN/OEy7+2U1b2fBMZGS0+p/vVCqV4/oqQ3tL\nK69Jwd4nNy7rnXOWy/qNL/LY9hKp+lf+MvQdPMhDj2/i0adeWNA2C/39XNzcTV/fJKOjozz34/vZ\n+MAP521Xn8lwettqqFvN8Mjcr3lXUxONhTSPP7VpQTWeqAO7d3HuwEHWVlJUSFFXnyOdnvr16t27\nh9atz5Pb/SLpuiM/E8/n8zxy/w+481vf4ennjx24M+3YvIl//sa3+c73fsqu3Sf+SMFzOzp4VVcH\nmcor12b07dvHtsfWs23jU0esm+nZSeueHgYGBpblqz2FQoGBgX4OHuxjYmLiuNvl83n27dvHpriZ\nvfv2HZ5fObiT/c89Oe81Ent3bGfXD+9lcssGGluqO1IyODhI5tmNtO09vtf3eLVns1zS0Q7FXxwN\nLBaLDAwOsmPnLvb3zn4dw5anNvCT7/0ru3onyVeaqlLTrm1b2PbzhxjeNTXIuvEH9/Dsv/4LA/1L\ne7FZOjMVygf7+3n2hS0MvvwSoVJmeLDESLn9mMdupVJhyzMb2fLM08f99dHZsiGfz/N83MruvfuO\nmF/Xeg7FutW0tFX34uXUXL+MIYTbge/EGO+dnn4ZOD/GWA4hvBn43Rjjb0wv+yNgB3D1sdocaz9/\n/Z/+tPLyvj0UW3JMjg7T0dzM/qFRMrkm6scGIAXpzi6KxQK5unrGBodpbmhkfLRAtq6BofIQnZ2d\nwNRwVzqdplwukyJFIT9JNpsllU5TLldIZ9IUCnnq63OUS6XDT9Hq3d3DW151FU9sjzStbmfvjh46\nWlZxfmcXdXX17DrwMmOFIumWdkbGRuhc1Un64CCXnncpAJt372C0Lk8mU0c6m6G3b4BsQxuV/DCd\n7W2kUlB8AdaPAAAIZElEQVSuVEinM4zv3cfrL7mSBzY9yarVnZTKZSBFJpMmPzFOc30Dw5MTNDY3\nM9LbR66hmXKqQqFcprmlEVIp0ukMqQMHWXvGBTy8czOr2ppoSKVhYoJXr7mUh7c+R9fqDiYKUzU1\nNGYZH8uTTqeYHBunrr6JiclxcvUZzqGe0UwTLw3tpauj7fAvQaVSZnD3PrpPW8PFp53GtgO97Bs/\nSLlcJtd1ASP9Paxuz5ECxocG+TerTqNnZIyRCvSNjLBqdRflUglSKVKkKBfyjAyN05bJ8po1F/Gj\n+DyrTmvm7GKJ4VKKfZUira3NU69fqUQ6nSbXP0Bn1xqeP7iHtkyJV3WdybP9IxTT49TX11FfV0el\nMlVrqVyGVJqh8Syn12V51erTuf+Fjaxe3UapUp4aKs5kKRWLQIVSuTx1bJCiVMzTnMlSLBYpFkq0\npVKctepcfrJ9K6evbqE4vW46naFcKlIuV2geHeb0rjU8vKeHzlyJy9pPo6XrDIqlCqnp98T3xefo\naE7xqsYmeitN9IyP0NaUgXKJTDZDqVxm9dgYxabVbB8bZlXr9D6KRTLZDMViiVR6qv8qlTLpTIb0\n8DAXtHfzRP845dIAHR0d5CfGSY/nKTR2M5Yfoqu1nnQmS2VslPq6LJPFIhSKXNKxioaW0wF4ePdu\nJid66WprZWJkhDevOYfn9+1nNNdAOp0mnU7TMTZKKZ/mufE83a1ZsqSYqKRIZ1M01k/1ff34CNlM\nlsliiQx1nN+5im1jRYbH+7mwuZ5SuY5N+QyNqSGy9fVkS0Vy6TRj5TLpuiyZdJZKpUylUiY/OER3\npo6XJlIUsxW62xpJpVOUgUq5TF19PXWVAoXRSYZKKTLZNA0N9QwPHOTS5hb2jGcYpcIZDbC6pZ2G\n5tPYt28bW0tlclloaGg8/DeiZXyMVfXNPD2RZbRvO2+/+AKe2T/BwVSK9sYSpFOkU2mmeh/K0/1f\nLpXIZKZem0w2MzXE3n+Q684+h00HRhnJ5CjkB0lnMlQyKSbGx3hT9yo29w4x0HoW6YkeWltbp/ff\nyIaJeopjvZy5qo3xiQmyDQ1QqZDJZCmXiof/Rk29/lPzoELzxARnNLTw5EQd44O7CV0t7OofoK6z\nG4YO0JWt58VyC5Dn9FXNVCqVV363Mhmax0Y5s7GZhvZz2N3fx65sMyMHtrJ6VQcNuQwTk1N/sicm\nxnltYwMvHRhidPVFDO7bxGldHZTyeTLpNMVUGipl6uvqGO8/yDVrzuWBbXspt69mTbGX0xob2The\ngHKRjtYWxicnqKvPUSoWyWaznFkqMjI6zr6OCxjreZ53hAtY99JeKu1nMjGym2xdlpamRipM1V+c\nnKShro66sVHOa++EhlVUSDM2OcbOgUEm27qp7N3E69ecww9f7iXd1EpnY4EUKbKZNOPDQ3Q2NHJg\nfIzGtnZW5yepz2bYUZn6+K0uk2F8cpL6hgaK+TzZbN1UP4yN0JlrYM+BfrJNXaTqGqhraGL1yE5S\nFdjRupbxvVt55yVrGRzN09A69Xs2NjHOhuFx2ju7Cb/0BnKVYd791tfT3d264M/75hs7GAJm3vYm\nPSOAB49a1goMzNNmVr/zZ3940nxg+eFl3NetVdrO780ybzl/joVajhoX28cfOY51fn8J97+UTuba\nlGwfWGT7/1CVKuB/q9J2TjbzDZWvA94JEEK4Gtg4Y9km4OIQQmcIoZ6pYfKH52kjSZIWYb6h8hSv\nXCEOcAvwOqAlxnh7COHdwGeZegPw1Rjjl2drE2PcvFQ/gCRJp5I5g1uSJJ1ckvV9DUmSTnEGtyRJ\nCWJwS5KUIAa3JEkJUtOHjKy0+5qfTEIIG5j6rj3AduDPgL8DysCzwO/EGL0ycQGmb+X75zHG60MI\nFzFLv4YQfgv4babu4//HMcZ/rVnBCXRUH18B3AVsmV58W4zxDvt44aZvWf01YC2QA/4YeAGP5ao5\nRh/vAu4GDn3TakHHcq3PuA/fCx34NFP3NdcihRAaAGKM10//9yGm7iv/hzHG64AU8J5a1phUIYT/\nCNzO1C8izNKvIYQzgE8A1wBvB/5s+l4HOg6z9PHrgC/NOJ7vsI8X7T8AvdPH7TuAv2bq76/HcvXM\n1sdXAl9c7LFc68d6HnEv9BDCvPc113G5HGgKIdzH1Gv8GeDKGOOD08vvAd4GfLdG9SXZVuDfA/9z\nenq2fi0B66bv4V8IIWxlalTp8eUuNqGO7uPXAZeEEN7D1Fn37wNvwD5ejDuAf5r+dxoo4LFcbbP1\n8euAsNhjudZn3G1M3SL1kNL08LkWZxT4Qozx7cBHgX84avkIUPtn3yVQjPGfmRrOOmTm7XqHmerX\nNl75mGLmfB2HWfp4PfAHMcZfYepjn//C1G2V7eMFijGOxhhHQgitTAXM/8WReeCxvEiz9PFngEep\nwrFc65A84fua67hsZjqsY4xbgD7g9BnLD91XXos383htY/b79bcC/ctZ1ApzZ4zxyUP/Bq7APl60\nEMK5wI+Bv48x/iMey1V3VB9/iyody7UObu9rvjRuYfp6gRDCWUwdCD8IIfzK9PJ/Czx4jLY6MU/O\n0q+PAr8cQsiFENqZegTus7UqcAW4N4Tw+ul/38DUEKJ9vAghhNOBHwD/Mcb4d9OzPZar6Bh9XJVj\nudafcd8J3BhCWDc9fUsti1lBvgp8PYRwKJxvYeqs+/bpix6e55XPXrQwh67I/xRH9ev0lbh/CfyM\nqTfHfxhj/MWHJms+h/r4o8BfhxAKwB7gt6eHIO3jhftDpoZjPxtC+Oz0vE8Cf+mxXDWz9fHvA/91\nscey9yqXJClBaj1ULkmSToDBLUlSghjckiQliMEtSVKCGNySJCWIwS1JUoIY3JIkJcj/DzkTKbkZ\nG6MnAAAAAElFTkSuQmCC\n",
"text": [
"<matplotlib.figure.Figure at 0x10b334198>"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Buyer specific states\n",
"[observed_states[i] for i in range(0, len(observed_states)) if buy_hist[i] > not_buy_hist[i] and \n",
" not_buy_hist[i]<0.02 and buy_hist[i] > 0.05]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"['apps.appl.mortgage', 'entry.apps', 'exit.apps']"
]
}
],
"prompt_number": 6
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment