Skip to content

Instantly share code, notes, and snippets.

@edouardklein
Last active August 29, 2015 14:15
Show Gist options
  • Save edouardklein/33f4be025f4d0b65c323 to your computer and use it in GitHub Desktop.
Save edouardklein/33f4be025f4d0b65c323 to your computer and use it in GitHub Desktop.
HMM
{
"metadata": {
"name": "",
"signature": "sha256:4d4b09fc52007a513388c088a50125d674ffd05d5be7e9d643dcf582d8e4d3dd"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data import\n",
"This is just boilerplate code to import the data from the csv. The only interesting things happen at the beginning of the second cell, where we drop the states that bear too much information."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import csv\n",
"import itertools\n",
"import pandas as pd\n",
"\n",
"#First pass let us create the set of all observed states\n",
"with open('Mike/10-21-2014 approve to not fund.csv','r') as f_not_approved,\\\n",
" open('Mike/approve to fund.csv','r') as f_approved:\n",
" approved = csv.reader(f_approved)\n",
" not_approved = csv.reader(f_not_approved)\n",
" #Discarding first three lines\n",
" [next(approved) for i in range(0,3)]\n",
" [next(not_approved) for i in range(0,3)]\n",
" observed_states = set()\n",
" for row in [x for x in not_approved][:14368]: # Ignoring lines after 14368 because of text overflow error in file\n",
" states = row[3].split('->')\n",
" observed_states.update(states)\n",
" for row in approved:\n",
" states = row[6].split('->')\n",
" observed_states.update(states)\n",
"observed_states = list(observed_states)\n",
"print(pd.DataFrame(observed_states)) "
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 0\n",
"0 txn.electronic\n",
"1 lists.mortgage renewal reminder\n",
"2 dme.boat 1.9 percent for a period - gen\n",
"3 ecom.alert - insufficient cif id - update moci\n",
"4 dme.rsp tfsa bonus rate offer\n",
"5 lists.new to mortgage contact\n",
"6 ct.resl_asktd_mort_eng \n",
"7 lists.chq_right_plan\n",
"8 dme.tdw active trader tech launch\n",
"9 entry.transaction\n",
"10 hist.open.business chequing account\n",
"11 ecom.alert - customer is being impersonated\n",
"12 ecom.cause of concern - service delays\n",
"13 apps.appl.mortgage\n",
"14 dme.mortgage renewal reminder\n",
"15 dme.new to chequing welcome dme\n",
"16 ecom.alert - counterfeit cheques\n",
"17 lists.fef target banner 2\n",
"18 lists.chq_early_tenure_engagement\n",
"19 ct.resl_specialist\n",
"20 ct_ivr.disconnect :: 11\n",
"21 ct.resl_mort_tool_eng\n",
"22 exit.transaction\n",
"23 ct_retail.c3.visa activity\n",
"24 txn.tc.bill payment.in-branch\n",
"25 ct_retail.c3.general acct info\n",
"26 lists.mortgage x-sell offer\n",
"27 ct_phoneoutbound.did not reach\n",
"28 ct.web_french\n",
"29 ct_ivr.direct.marketing\n",
".. ...\n",
"220 txn.tc.bill payment.no source available\n",
"221 ct_ivr.branch.info\n",
"222 ct_retail.c3.loan details\n",
"223 hist.close.business chequing account\n",
"224 exit.lifecycle\n",
"225 ct_phoneoutbound.self general :: 11\n",
"226 apps.exception denied\n",
"227 lists.uloc leads\n",
"228 ct.resl_mortgage_refinancer_eng\n",
"229 ct_retail.branch.create investment appt\n",
"230 lists.frao renewal leads\n",
"231 lists.mortgage offer\n",
"232 ct_retail.branch.attend other appt :: 11\n",
"233 lists.rsi onboard program\n",
"234 ct.resl_mortgage_renewer_eng\n",
"235 lists.mortgage renewal ebank\n",
"236 txn.tc.pre-authorized debit.no source available\n",
"237 ct.web_resl.td_lifeguide\n",
"238 hist.open.personal loan\n",
"239 dme.edb acct dormancy offer\n",
"240 entry.web\n",
"241 hist.close.tdfs prime loan\n",
"242 ct_retail.branch.attend investment appt\n",
"243 lists.new 2 resl heloc welcome call\n",
"244 ecom.cause of concern - other\n",
"245 ct_web.creditcard\n",
"246 ct_ivr_billpay\n",
"247 hist.open.proprietary master card\n",
"248 apps.recommend exception denied\n",
"249 hist.close.proprietary master card\n",
"\n",
"[250 rows x 1 columns]\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Second pass to put the sequences observed states in a list\n",
"#Additional state 'approved'\n",
"final_state = len(observed_states)\n",
"#Removing \n",
"observed_states.remove('apps.approved')\n",
"counts_sequences = [] # [[count, [sequence]], ...]\n",
"with open('Mike/10-21-2014 approve to not fund.csv','r') as f_not_approved,\\\n",
" open('Mike/approve to fund.csv','r') as f_approved:\n",
" approved = csv.reader(f_approved)\n",
" not_approved = csv.reader(f_not_approved)\n",
" #Discarding first three lines\n",
" [next(approved) for i in range(0,3)]\n",
" [next(not_approved) for i in range(0,3)]\n",
" for row in [x for x in not_approved][:14368]: # Ignoring lines after 14368 because of text overflow error in file\n",
" sequence = []\n",
" for state in row[3].split('->'):\n",
" try:\n",
" sequence.append(observed_states.index(state)) \n",
" except ValueError:\n",
" #Silently drop removed states\n",
" pass\n",
" count = int(row[1])\n",
" counts_sequences.append([count, sequence])\n",
" for row in approved:\n",
" sequence = []\n",
" for state in row[6].split('->'):\n",
" try:\n",
" sequence.append(observed_states.index(state)) \n",
" except ValueError:\n",
" #Silently drop removed states\n",
" pass\n",
" sequence.append(final_state)\n",
" count = int(row[1])\n",
" counts_sequences.append([count, sequence])\n",
"nb_sequences = sum([x[0] for x in counts_sequences])\n",
"nb_sequences\n",
"weights_sequences = [[x[0]/nb_sequences,x[1]] for x in counts_sequences]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Linear SVC\n",
"\n",
"We will run the linear SVC from scikit learn: http://scikit-learn.org/stable/modules/svm.html#classification\n",
"\n",
"The features are as we discussed : a vector of 250 components with the frequency of occupancy of each state."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def obs_to_density(obs):\n",
" answer,_ = histogram(obs, range=[0, final_state-1], bins=final_state, density=True)\n",
" return answer\n",
"X = array([obs_to_density(x[1]) for x in weights_sequences])\n",
"Y = array([1 if x[1][-1] == final_state else 0 for x in weights_sequences])\n",
"weights = [x[0] for x in weights_sequences]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I switched to l1-penalty to have sparse features to eliminate those that have too much information. We can switch back to l2 if needed. We try this for non weighted until the performance gets bad, then we'll try different techniques to get it back up.\n",
"\n",
"I display the features by order of importance"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from sklearn.svm import LinearSVC\n",
"from sklearn import cross_validation\n",
"from sklearn import metrics\n",
"classifier = LinearSVC(loss='l1')\n",
"classifier.fit(X,Y)\n",
"sorted_coeffs = sorted(abs(classifier.coef_[0]), reverse=True)\n",
"[(observed_states[list(abs(classifier.coef_[0])).index(score)], score) for score in sorted(abs(classifier.coef_[0]), reverse=True)]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"[('entry.apps', 7.9141618063555059),\n",
" ('entry.lifecycle', 7.274091131277979),\n",
" ('apps.pending', 6.6197249279621682),\n",
" ('apps.approved with conditions', 5.3334831535216418),\n",
" ('exit.lifecycle', 5.1903168529208648),\n",
" ('apps.appl.mortgage', 4.6393011827982171),\n",
" ('exit.web', 4.5191442606366223),\n",
" ('exit.branch', 4.4713258905694104),\n",
" ('entry.web', 4.4147841679362188),\n",
" ('entry.transaction', 4.3376345002466206),\n",
" ('entry.branch', 4.3375464416278824),\n",
" ('ct_web.chequing_admin', 3.5156643526364637),\n",
" ('ecom.general comment', 3.052682850148384),\n",
" ('apps.rec. approve', 2.6926319761555804),\n",
" ('apps.conditional waiver', 2.5473697917340674),\n",
" ('ct_web.investment', 2.3812129719142479),\n",
" ('exit.transaction', 2.2608900390010955),\n",
" ('ct_ivr.account.info', 1.279775813201715),\n",
" ('exit.marketing', 1.0567176656407871),\n",
" ('exit.marketing', 1.0567176656407871),\n",
" ('ct_retail.branch.attend mortgage appt', 0.85667396120969841),\n",
" ('ct_retail.c3.general acct info', 0.76800508776539533),\n",
" ('lists.facebook custom audiences test', 0.74178171932626202),\n",
" ('ct.web_resl_mortcalc', 0.7310752021240563),\n",
" ('ct.resl_mort_calc_eng', 0.7310752021240513),\n",
" ('ct.web_resl_easy_web_mortgage', 0.71138815096387131),\n",
" ('ct_retail.c3.general acct changes', 0.69506504317848239),\n",
" ('ct_retail.c3.mortgage activity', 0.67481310829872188),\n",
" ('hist.open.personal demand chequing account', 0.65325643530514288),\n",
" ('ct_ivr_billpay', 0.60487650169330831),\n",
" ('ct_retail.c3.visa activity', 0.59684553007543162),\n",
" ('ct_retail.branch.attend other appt :: 11', 0.57552018326444576),\n",
" ('apps.appl.home equity line of credit', 0.52281653099664172),\n",
" ('txn.tc.bill payment.in-branch', 0.52037010183453047),\n",
" ('apps.exception denied', 0.51193330702676121),\n",
" ('exit.apps', 0.51047655967554184),\n",
" ('txn.web', 0.48316273543652205),\n",
" ('lists.direct deposit no offer', 0.41853396305196316),\n",
" ('entry.outbound', 0.39134321704965064),\n",
" ('entry.outbound', 0.39134321704965064),\n",
" ('ct_retail.branch.attend chequing appt', 0.39069078506107197),\n",
" ('ct.web_resl.productsandservices', 0.38873857146333968),\n",
" ('ct_web.contact us', 0.38221307957909739),\n",
" ('ct_retail.branch.create investment appt', 0.36305206036310689),\n",
" ('hist.open.personal demand savings account', 0.35256685984160091),\n",
" ('ct_retail.branch.create other appt', 0.34938300045148768),\n",
" ('ct.web_french', 0.3244153247939589),\n",
" ('ct_retail.branch.create mortgage appt', 0.31066998416938002),\n",
" ('txn.no source available', 0.28470316066159052),\n",
" ('ct_phoneoutbound.no sell', 0.25886920401156849),\n",
" ('ct_ivr.nonmembers', 0.23925006586216954),\n",
" ('txn.tc.bill payment.td abm', 0.2174586625822664),\n",
" ('txn.td abm', 0.19860034066941801),\n",
" ('apps.decline', 0.19350577585847523),\n",
" ('ct.web_resl_secure_personal_credit', 0.18959295083389496),\n",
" ('ct.resl_mortgage_calc_landing', 0.18256767427729523),\n",
" ('ct.resl_mobile_mort', 0.18228327779507636),\n",
" ('ct.resl_mobile_mort', 0.18228327779507636),\n",
" ('ct.resl_mobile_mortgage_fre', 0.17236327283382297),\n",
" ('ct_retail.branch.attend investment appt', 0.17171861972227861),\n",
" ('ct.resl_mort_first_time_home_buyer_eng', 0.16572298399421398),\n",
" ('ct_web_mortgage', 0.15922123520248282),\n",
" ('ct_web.chequing-information', 0.14659279981297596),\n",
" ('ct.resl_mortgage_refinancer_eng', 0.14445103462265446),\n",
" ('ct_retail.branch.create chequing appt', 0.14281729191453613),\n",
" ('txn.call centre', 0.14078733073710062),\n",
" ('lists.mortgage offer', 0.13404603791976602),\n",
" ('txn.tc.bill payment.telephone banking (ivr)', 0.12612119490320345),\n",
" ('hist.open.mutual fund account', 0.12481673996673705),\n",
" ('lists.uloc pa frao and champion', 0.12381645817790009),\n",
" ('ct_web.insurance :: 21', 0.12071805699992093),\n",
" ('hist.open.line of credit account', 0.11813380414353668),\n",
" ('lists.mortgage renewal leads', 0.11316252193703555),\n",
" ('txn.agent', 0.11284343483340681),\n",
" ('hist.open.proprietary visa card', 0.10748244307409137),\n",
" ('txn.electronic', 0.10728826087782084),\n",
" ('lists.uloc champion and frao offer', 0.106481116859269),\n",
" ('lists.new to chequing email', 0.10057815249797282),\n",
" ('ct_retail.branch.create debt appt', 0.098704036931606814),\n",
" ('txn.tc.bill payment.call centre', 0.098067837783999259),\n",
" ('apps.rec. decline', 0.097864248104138393),\n",
" ('lists.vmt_fee_rebate', 0.093129730588205298),\n",
" ('exit.inbound', 0.092755479245991229),\n",
" ('hist.close.line of credit account', 0.090192953042464427),\n",
" ('hist.open.retail mortgage', 0.089948853885927327),\n",
" ('ct_retail.branch.attend debt appt', 0.088119158003869061),\n",
" ('ct.chq_info_fre', 0.087107706366170753),\n",
" ('txn.tc.pre-authorized debit.no source available', 0.086644929133744664),\n",
" ('lists.resl retention - mid term attritors', 0.08375009600518879),\n",
" ('ct_ivr.transfers', 0.082438796100364739),\n",
" ('lists.rsp tfsa bonus rate offer', 0.081624723709582639),\n",
" ('exit.cei', 0.077867759624134072),\n",
" ('exit.cei', 0.077867759624134072),\n",
" ('hist.open.investment advice account', 0.071006896171371806),\n",
" ('ct_ivr.investments', 0.069668546861202496),\n",
" ('hist.close.personal demand savings account', 0.068984672081077447),\n",
" ('apps.recommend exception denied', 0.064956946587947609),\n",
" ('lists.marketing no offer 2010', 0.064488492818801146),\n",
" ('ecom.alert - insufficient cif id - update moci', 0.064410534700786642),\n",
" ('hist.close.mutual fund account', 0.063377420504047793),\n",
" ('ecom.history comment', 0.061306714481957215),\n",
" ('hist.close.personal demand chequing account', 0.057684723507677407),\n",
" ('entry.inbound', 0.054139476774566341),\n",
" ('lists.rsp tfsa bonus rate xsell', 0.053218118214119439),\n",
" ('lists.new to chequing welcome dm', 0.052987744002481629),\n",
" ('txn.in-branch', 0.052441661925992096),\n",
" ('hist.open.personal loan', 0.051752662015851277),\n",
" ('ct_retail.c3.cheque order', 0.049489311711317534),\n",
" ('lists.mortgage renewal ebank', 0.04944207981611172),\n",
" ('lists.mortgage renewal reminder', 0.048463995403741446),\n",
" ('dme.edb music access awareness', 0.04831936573550772),\n",
" ('ct_ivr.main_entry', 0.046641400186690322),\n",
" ('apps.rlse.home equity line of credit', 0.046340831212045019),\n",
" ('ct_retail.branch.create appt credit card', 0.046043104596160615),\n",
" ('ct_retail.branch.attend appt credit card', 0.046043104596159332),\n",
" ('lists.uloc leads', 0.041603399296745426),\n",
" ('hist.close.proprietary visa card', 0.038940000660960046),\n",
" ('ct.resl_mortgage_renewer_eng', 0.037323775024893258),\n",
" ('cei.4 - very likely', 0.036473047323627397),\n",
" ('lists.new to chequing obtm', 0.032493090603185823),\n",
" ('ct.web_resl_other', 0.032341218838421074),\n",
" ('ct_phoneoutbound.did not reach', 0.031904372110534047),\n",
" ('ct_ivr.transfer_call', 0.0318604794443441),\n",
" ('lists.new to mortgage contact', 0.029947452203866302),\n",
" ('ct.chq_info_eng', 0.029392939775459686),\n",
" ('lists.new to chequing welcome dme', 0.02929797147360258),\n",
" ('ct_phoneoutbound.call back', 0.029013120080317444),\n",
" ('txn.telephone banking (ivr)', 0.028833723834489559),\n",
" ('lists.frao renewal leads', 0.028686173264486248),\n",
" ('ecom.alert - free format', 0.028618748498266712),\n",
" ('ct_web.creditcard', 0.027545421660196506),\n",
" ('ecom.cause of concern - systems or equipment failure',\n",
" 0.027135569304244128),\n",
" ('cei.3 - somewhat likely', 0.026421475375185081),\n",
" ('hist.open.combined term deposit and savings account', 0.02530976433554943),\n",
" ('lists.mortgage x-sell offer', 0.024371342329440097),\n",
" ('hist.open.discount brokerage', 0.024287793066255999),\n",
" ('dme.infinite aeroplan visa preapproved', 0.022790560501943932),\n",
" ('lead.completed/closed', 0.022584693472517601),\n",
" ('ct_ivr.disconnect :: 11', 0.021042370525470554),\n",
" ('dme.rsp tfsa bonus rate offer', 0.016880544237347757),\n",
" ('hist.close.retail mortgage', 0.01661224511171569),\n",
" ('cei.5 - extremely likely', 0.014973236925320897),\n",
" ('txn.tc.bill payment.web', 0.014870579260589311),\n",
" ('dme.mortgage estatement communication', 0.01363415876732307),\n",
" ('ecom.cause of concern - customer service issue', 0.012642143666240018),\n",
" ('ct_web.chequing application', 0.012445547439463269),\n",
" ('lists.chequing accounts - right plan program', 0.011941593562744162),\n",
" ('hist.open.proprietary master card', 0.011757764175435901),\n",
" ('ct.web_resl_intelliresponse', 0.011221575624748514),\n",
" ('lists.mortgage estatement communication', 0.010559238204577801),\n",
" ('lead.active', 0.009756109637433839),\n",
" ('hist.close.tdaf prime loan', 0.0097189222275955826),\n",
" ('ct_retail.branch.create small bus bank appt', 0.0086732767455660678),\n",
" ('ct_retail.branch.attend small bus bank appt', 0.008673276745565825),\n",
" ('lists.prime plus 0.5 percent', 0.0085813338825387435),\n",
" ('ct_phoneoutbound.self general :: 11', 0.0082976534236117189),\n",
" ('ct.resl_asktd_mort_eng ', 0.0081466969407018586),\n",
" ('ct_ivr.direct.marketing', 0.0079573097863902507),\n",
" ('lists.rsi onboard program', 0.0071715433161215628),\n",
" ('hist.open.term deposit account', 0.0068768223579247813),\n",
" ('ct.resl_general_prod_eng', 0.0068673448504618963),\n",
" ('ct.resl_specialist', 0.0061793961617130589),\n",
" ('lists.heloc retention - mid term attritors', 0.0056405644988307829),\n",
" ('lists.mtg anniversary dme', 0.0050965282449595573),\n",
" ('hist.open.financial planner', 0.004502314189493423),\n",
" ('lists.new 2 resl heloc welcome call', 0.0039528191506182945),\n",
" ('lists.n2heloc', 0.0039528191506181835),\n",
" ('lists.fef target banner 1', 0.0036912355303566789),\n",
" ('dme.vmt_fee_rebate', 0.0032245889965944862),\n",
" ('ct_retail.branch.attend specialty appt', 0.0029704617285710826),\n",
" ('ct_retail.branch.create specialty appt', 0.0029704617285710613),\n",
" ('dme.mortgage renewal reminder', 0.002288716413817068),\n",
" ('hist.open.tdfs mortgage', 0.0017046113145280577),\n",
" ('ct.web_resl.td_lifeguide', 0.0017017221428085546),\n",
" ('ct.resl_mort_tool_eng', 0.0017017221428085247),\n",
" ('ct.wbe_resl.mortgagetool', 0.0017017221428085063),\n",
" ('dme.new to chequing welcome dme', 0.0012306198589794521),\n",
" ('hist.open.tdaf prime loan', 0.0011881846914284389),\n",
" ('txn.tc.bill payment.no source available', 0.00094273808850424314),\n",
" ('ct.web_resl.n2c', 0.00010935584619130081),\n",
" ('ecom.package - td private banking', 1.0079692072326174e-18),\n",
" ('dme.uloc pa frao and champion', 9.0801931945660996e-19),\n",
" ('hist.close.busines visa', 8.4703294725430034e-19),\n",
" ('ct_retail.c3.loan details', 5.1499603193061461e-19),\n",
" ('lists.mortgage elsewhere ofi leads', 4.5739779151732218e-19),\n",
" ('ct_ivr.branch.info', 4.5062152793928778e-19),\n",
" ('ct_phoneoutbound.sell mortgage', 2.1684043449710089e-19),\n",
" ('ct_phoneoutbound.sell mortgage', 2.1684043449710089e-19),\n",
" ('hist.open.tdfs prime loan', 1.8973538018496328e-19),\n",
" ('hist.close.tdfs prime loan', 1.3552527156068805e-19),\n",
" ('hist.open.busines visa', 1.1519648082658485e-19),\n",
" ('lists.fef target banner 2', 1.0842021724855044e-19),\n",
" ('lists.fef target banner 2', 1.0842021724855044e-19),\n",
" ('lists.fef target banner 2', 1.0842021724855044e-19),\n",
" ('lists.fef target banner 2', 1.0842021724855044e-19),\n",
" ('lists.chq_right_plan', 1.0164395367051604e-19),\n",
" ('ct_phoneoutbound.no sell mortgage', 6.9456701674852628e-20),\n",
" ('ecom.alert - o/s third party req to pay', 6.0986372202309624e-20),\n",
" ('ct.chq_small_biz_chq', 2.7105054312137611e-20),\n",
" ('lists.open a savings account', 1.3552527156068805e-20),\n",
" ('ct.resl_mort_affordability_calc_eng_fr', 6.7762635780344027e-21),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0),\n",
" ('dme.boat 1.9 percent for a period - gen', 0.0)]"
]
}
],
"prompt_number": 8
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We cross validate with a 10 fold and draw the ROC and display the area under it."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import seaborn as sns\n",
"print(cross_validation.cross_val_score(classifier, X, Y, cv=10).mean())\n",
"y_score = classifier.decision_function(X)\n",
"fpr, tpr, _= metrics.roc_curve(Y, y_score, sample_weight = weights)\n",
"auc = metrics.auc(fpr, tpr, reorder=True)\n",
"plt.plot(fpr, tpr, label='ROC curve (area = %0.6f)' % auc)\n",
"xscale('log')\n",
"legend()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0.96818730086\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 15,
"text": [
"<matplotlib.legend.Legend at 0x118168978>"
]
},
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAFbCAYAAAAa+83qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmclXXd//HXmRVGZlhkBGVRE7pScUkE11yzH1rdaqUJ\ndYtW6l1pWZp3au6SkankmlukZaGVpZm5hBreZgtpq/gVRBCRUVCWgWHWc35/nOE4wzIzwJyZLzOv\n5+NRnOuc73Wdz3zg4ft8r3PN90plMhkkSVKcCrq7AEmStGkGtSRJETOoJUmKmEEtSVLEDGpJkiJm\nUEuSFLGi9gYkSfJD4KPA2yGEvTYx5kbgWKAGOC2E8GKnVilJUi/VkRn1dGDCpl5MkuQ4YFQIYTRw\nJnBbJ9UmSVKv125QhxCeBZa3MeS/gHuax/4ZGJAkyZDOKU+SpN6tM76jHgYsarH9BjC8E44rSVKv\n11kXk6XW23ZdUkmSOkG7F5N1wGJgRIvt4c3PbVImk8mkUutnuyRpa2QyGZYuX0tjOg2Z7Iwpk8mw\n7pYOmUym+bnsY1o8zkDzPtnxby5bQ3FhQXY7nX0+3bxzy+Ou+zOdgaUraujXt5i6hiZW1zSQzmRI\np7Ovz3tjBYP7980915TOkM5keL2qmpraBkqKC1lRXdclfdp9l0G5x6kUtMyjdQ9TzfPPllGVSr33\nPKn3ZqipVBvPNe936ecP3OLQ64ygfhg4G5iRJMmBwIoQwltt7ZBKpVi6tLoT3lqbUllZbo+7gH3O\nv9h6nMlkqG9Is3ptA02ZzHtBlc4+TmegrqGJuoYm3l1VS1FhQS6Y6uqbWFVTT9/SItLNQZX9E9bU\nNvD6W9Vs16eYNWsbaGzKkGlO21y4Nm+8W13L2romSksKW53OrK1v6q62dEho43KnxsY0O25fRn1D\nE8cduHMuIVO5/9tECLZ8AWhqyjCwvDQXwOteT5GiIAW77lRBn5LOiL6u05Ffz/oZcDgwOEmSRcBl\nQDFACOH2EMKjSZIclyTJPGANcHo+C5akrZXJZFhT28hby2uoq2+isSnDu9W1ZNIZ5ry+guLCAlat\nqWPlmmyozn1jJRVlxdQ1pqnrojAsLS7MzdDWzeTW5VNhQQGFBWkAdhjYt9V+S1esZd9RlRQVrhuf\nat4/e6B1j1PNB08BZWUlrF3b8N5sMgX1DWnKy4opLyvZIPQK1oVoi7rWPa5taGL7ilLKSospLiog\n1Tw+lYKyPtnnCpqPV1CQoiCVorAg+1gbl+qm21xmYvqE3BPFNgvpqexz/nVmj+974hVmvvDGZu+X\nSmVntMMr+5FKQU1tI2V9ihhe2a85ELOztVRBigJSpApgRXUdA8pLqRzQlz4lhbkwqm9IU1FWkt2v\nOagKUlBQkKKwoIAdty+juKiAosKuW4/Kf8f5V1lZ3q2nviVpm9AypAeWl1LZvw8DK/owYod+FBWk\nqKlrZMigMspKi9h1xwpKiwspKS7Aa2rUnQxqST3Cqpp6Fr21mqZ0hhWr61i6Yi1LV6xlxep66uqb\nWFPbAMD7Rwzgm5/Zr5urlTrOoJYUvXQmw/P/rmLJOzU0NqVZ9PZqllfX0a9vMWvrGlm8bE2b+5eW\nFFJcWMBOg7fjkDFDu6hqqXMY1JKis7y6jt/PXsSTsxfR2NT2dTQlxQUUFxXQ0Jhmj10GssOAvuww\nsIzysmJGDe/PoPJSiosKu6hyqfMZ1JK63YrVdTz3ryX88g/z6VtaxNq6xg3GDCwvZb/3V3LIXkMp\nKiygrLSIQRV9uqFaqWsZ1JLyIp3JUL2mnner63KLXqRb/N5xJpMhDWTSGa5/4B+5/dbWNTJ0UBn9\n+hZz7IEjGb/XMBpq6/NS42GHjWe33UaRTqcZNmwE3/rWFZSVlQEwf/6rTJt2LcuWLSWdzjBhwnGc\ndtoXcvs+//xz3H337dTW1lJSUsx++43j7LPPzUudW+rVV+dx//33cdFFl3V3KRtVX1/P1Vdfxiuv\nvExFRX+uvPIahg7dcYNxM2c+wb33TiedbuLggz/EF794DgBVVUu45porWbFiBRUVFVx66VVUVu7A\nCy/M5qabrs/tv3DhQq688tsceujhueemTbuW3/72Nzz55CwAnnjid9x3371AhrKyMs4770JGjRoN\nwAMP/IxHHvk1mUyGj3/8RE4+eSIAN910PYcddiT77PPBfLUIMKgldYJ0JsPipWu48zf/oaS4kDeX\nrdmixTdOP/YDHLjnUIqL3vvVpAHlpSzNU1CXlvZh+vSfAjBlyuU89NCDTJz4WerqarnwwvM4//wL\nGTfuAOrqarn44gt48MGf84lPnMT8+fOYNu1arr32+4wcuTPpdJqHH36wU2tramqisHDrTtn/9Kf3\n8qlPfbrD4xsbGykq6rpYeOSRh6io6M+MGb9i5swnuO22G7niimtajVm5cgW33nojP/zhT+jffwBT\nplzO3/72V8aOHcfNN0/j2GM/xoQJH+WFF2bzgx/czCWXXMl+++2f+3tdtWoVp5xyIuPGHZg75ssv\nv0R1dXWrq/l32mkYt9xyJ/369eNPf/oj3/3uFO6440fMnz+PRx75NXfeeS9FRUWcd945HHLIhxg2\nbDgnnPApbr75BoNaUjwymQzP/auKlWvqWPT2asr7lvBa1Srmv7lqg7FFhQUUFqZIRgygvG8xA9at\nFtW8QMa6RTBSqRQNjWlGDunH2GSHbvipsvbccwzz5s0D4MknH2Pvvfdl3LgDgGygf+1rF3DOOWfx\niU+cxH333cvkyZ9n5MidASgoKOCEEz61wTFramqYNu1aQpgDpPjc587k8MOP5JhjPsSTTz4LwNNP\n/57nn3+Oiy66jClTLqekpIS5c19hr732Ydasp5k+/af069cPgFNOOZHbbvshANdddw1vvVUFwFe+\nch577bVPq/eur6/nP//5N5dcciUAL730b2688Xrq6+soLS3lwgsvY+TInXn00d/w/POzWLVqNel0\nmmuv/T7XXz+V116bT1NTI5/73JkceujhLFnyJldffRlr164F4Otfv4AxY/beqp7/3//N4vOfPwuA\nww8/ihtu+O4GY958czHDh4+kf/8BAIwdO45nnnmKsWPHsXDha3z1q+cB8MEPjuXCC8/bYP+nn/49\nBx10MKWlpUD2A9Ctt97IZZddzaxZz+TGtfxZ9thjDEuXvg3AggUL2GOPMbn99913P/7wh6eYNOlU\nRowYSVXVEqqrqykvL9+qXrTFoJbUrnQ6w3X3/505Cze9BGRZaRE7Dy3n1AkJQwaWbdbxH3hqHjNm\nZv+3vsLCFE3tXFC2MeM+sAMnHzWqQ2Obmpr461//zNix4wF47bXXSJIPtBozbNhw1q5dS03NGl57\nbT6TJp3a7nF/9KO7KC8v5557ZgBQXb1uUZGW60C3/h3tZcuWcvvt00mlUqTTaWbNeprjjvs4//nP\nv9lxx50YOHAgl19+MSefPIm9996Xqqoqzj//HH7yk5+3Os4rr4TcBwmAXXbZlVtuuZPCwkL++tc/\nc8cdt3D11dlgnDNnDtOn/4zy8nJuv/0W9t9/PBdddBnV1dWceeZk9t//AAYNGsQNN9xCSUkJixa9\nzhVXfIu77rp3g5/5y18+g5qaDa/CP/vsrzF27Lj1fta3GTIke1fkoqIittuuH6tWraSion9uzLBh\nI1i0aCFVVUsYPLiSZ599hqam7NmaUaPezzPPPMVJJ53CrFlPU1NTw6pVq6ioqMjtP3PmE0yc+Nnc\n9i9/+QCHHno4228/eIMa13nkkYc48MBDAHjf+3bjzjtvZdWqlZSUlPL888+x++575saOHp3w73//\nk4MOOmSTx9taBrWkjVpYVc0/X13GfxYs55VFK1q9dtIRu7Hz0HL6lhbRp6SQsj7F9N+upJsq3XL1\n9XWcfvokli5dyo477sgJJ3wy91pnLNr4t7/9lSuvfO9UbnuzrlQqxZFHfjgX3kcffQzTp9/Fccd9\nnJkzH+foo48BYPbsv7Bw4Wu5/WpqaqitraVPn/curnvrrSWtwqi6upqrrrqMxYsXkUqlcmEHcPDB\nB+dq+8tf/sRzz83iZz/7MQANDQ28/XYVgwYN5oYbpjJv3lwKCgpYtOj1jf4Mt9xyZ4d601EVFRWc\nd943ufTSCykoKGDMmL1ZvDi7cM2Xv3wuN9wwld/97jfss89+VFbuQGGLFd2WLVvG/PmvMn78Qc3b\nS3nmmZncdNPtbGpVzhdemM2jjz7MrbfeDWQ/4HzmM5P52tfOpm/fvowenbRa7nTw4EqqqpZ06s+8\nPoNaUk4mk+H3s9/gZzPnbvT1Ew97Hx8/eJdOf9+Tjxq1ydlvPpe3LCkpZfr0n1JXV8vXv34Ozz77\nBw4//Eh23XVX/v73F1uNXbz4Dfr27UtZ2Xbsuuv7ePnll9htt/Zn7BsLhJaz6Lq61neMahm2e+65\nF4sXL2LFihU8++wsTjvtjHVH5Y477qG4uLiNd07R8o7Dd931A/bffxzXXPM9qqqWcM45Z+VeW3cB\n3TpTplzLiBEjWz139923s/32g7nkkqtoamriqKMO3ui7fulLX2Dt2poNnv/yl89l//3Ht3pu8OAd\nqKqqYvDgShobG1mzZnWr2fQ6hxzyIQ455EMAPPTQg7nv7gcPHsyUKdcC2Q8rf/jDU2y3Xb/cfk89\n9SSHH35kbvzcuYHFixdxyiknAlBXV8spp3yCGTOy1xfMmzeXqVOv5rrrbmo1K//Yx47nYx87HoDb\nb78ldxYAsn+/+V64zqCWeoE1tQ3MWbB8ozeKr3q3hkVvr2b2y29vdN8vnziGygF9GTkkf9/BdbfS\n0j6ce+75XHHFtzjssCM45pgJ3HvvdGbP/gv77z+eurpavv/97/GZz0wGYNKkU7n44m+w9977MmLE\nyOaLyX7VakYOMG7cATz44AN85SvZ707XfZc5aNAgFi5cwIgRI5k16+lW4dJSKpXisMOO4KabrmPX\nXXfNhce4cQfy85/PYNKk/wayATR6dNJq36FDd+Sdd97Jba9Zs4bBgysB+O1vH95kL8aPP5Bf/GIG\nX/vaBQC88srLvP/9H6CmZg2VldlrCB577Lek0+mN7n/rrXdt8tjrO/TQw3jssUcYM2Yvnnlm5gan\nxtdZvvxdBg4cxKpVq/j1r3/BVVdNBbIXmpWXV1BQUMCPfzydj370v1rt9/vfP567QhzgoIMO5aGH\nHs9tH3PMYbmQrqqq4uKLv8Ell1zF8OEjWh1n3ftXVVUxa9bT3HHHj3KvvfPOMj74wbEd/pm3hEEt\n9VCvv1XNb55bwN9eWbrZ+x6+7058+qhR29ztADdXy5nt6NEJw4YN56mnnuTooz/Cd75zHTfccC3X\nXz+VdDrNhAkf5ZOfPBmA3XYbxVe+ch6XX34xdXW1QCo342tp8uTPc/31Uzn11E9TUFDI5z53Jocd\ndgT/8z9nc8EF5zJgwEA+8IHdcxdorV8TwFFHfYQzzjiViy++PPfcueeez/XXT2Xy5Ik0NTWx7777\ncf7532y136hRo3n99YW57UmTTmXKlMu45567OeigQ1n3PXkqlWr1nqed9gVuvPE6Jk8+hXQ6zU47\nDWPq1Bs48cSTuPjiC3jssUc54ICD6Nt3865D2JiPfex4rrrqUk455UQqKvpzxRXfzr12+umTcldu\nf//71zFv3tzm58/IBemLL/6NH/zgFlIp2HffsZx33v/m9l+y5E2WLVvaZoi2/Lnvuecuqqurue66\n7FcVRUVF3Hln9jv4b33rf1m5cmXzVd/fbPXBau7cwLnnfmNrW9Em757VQ3k3nK4Ra58f+eMCHpw1\nv9VzQwb25YA9hlBetuF3yUWFKT44upKKCL9njrXH24IpUy7nhBM+xZ57jmlznD3eMq+/vpBbbpnG\n1Kk3tDvWu2dJaqVlSN/41Q9R1qcodw9h9R4TJ36WGTPuazeotWUeeuiXTJo0Oe/vY1BLPVBJUQH1\njWnu/t8jvUVjL/a+942KdlWynuCcc77eJe/TdXcml9R1UrDL0HJDWuoBnFFLPcg7K2v5yROB+oaN\nX5EradtjUEvbsDkLl/PXl9+GTIZn/v5mq9divDBM0uYzqKVtUDqT4a13a3jgqXksfKv11bq77ljB\nh8cO58A9h2xib0nbEoNa2gbMfWMFM2bOpbyshH+++k6r10qKCrj0tHGkUjBkYFmr5Q0lbfsMamkb\n8J37Xthg7elhg7dj1PD+7L7zQHYavF33FCYp7wxqqQu9vWItf/zXElavbaC0pPlew5nmFZkzkGle\n5HNdKGcy8I95y3Lb0845lNLiQoqLCpw5S72EQS3l2eq1Dbz4ylLCohX88d9VW3ycjx60sxeISb2Q\nQS3lwfLqOv7w98XMWbicuW+sbPVaUWGKk44cxa5DK7J33UlBKrfuMrk/Uy3uWTygXwn9+5V2VfmS\nImJQS53kb2EpP34iUFiQYnl13Qavn3TEbgwZVMbeu21PUaFrDUnqGINa2gINjU3U1jdRsrqO6pp6\nVtU0cMuv/pV7vXJAHxoa05x+3O4MLC9l2ODtXCVM0hYxqKXN8MqiFfxs5lwWVm38TkMlRQV894sH\n+12ypE5jUEsdtKqmngeenpcL6cKCFAeMGUpdXSP1DWkKC1Je8CWp0xnUUgc0NqW56PY/UVPXCMCU\nMw5gx+238z6+kvLOoJY6oKExTU1dI4P79+HDY4czdFBZd5ckqZcwqKXNMGzwdnxk/MjuLkNSL+Lv\niEiSFDGDWpKkiHnqW2rHnAXv8uMnXunuMiT1Uga1ojTrH2/y1vKa7i4DgN/96fXc4913HtiNlUjq\njQxqRae6pp4f/e7l7i6jlVQKvvelQxhY7nrbkrqWQa28akqnWVPb2Oq5JcvWMP/NVbCJFTVXrq4H\nYI9dBnLih96X7xI7pP92JYa0pG5hUCuvpt73IvMWr2x/4EYMGVTGbsP6d3JFkrRtMaiVV0veWUPf\n0kL23GXQBq8dsteOm9wvlUrx/hGGtCQZ1Mq77Sv68KUT9+ruMiRpm2RQq8Mamzb8vrk9a2ob/W5X\nkraCQa2cmtoG0plNv3759L/w7qq6zT6u92GWpC1nUG+mt5fXcPdv51BX39TdpbSpqKiQxsaO1/j6\n26s7PHbcB3bYrFo2d7wk6T3dEtQrV9dRXVPfHW+91f7x6jvMfWMlxUUFFBXGO1NMpVJkMm1Mj9fT\np6SQ2vom9t5te4oLN72y7EFjhrLf+ys7o0RJUgd0S1B/9rLHuuNtO9XkCQkHj9n0VcvdzfskS1LP\n0C1BffDeO1JXt3kXJcWkT0khY3bdvrvLkCT1At0S1BdOHu9sT5KkDvA2l5IkRcygliQpYga1JEkR\nM6glSYqYQS1JUsTaveo7SZIJwDSgELgrhDB1vdcHAz8BhjYf73shhB91fqmSJPU+bc6okyQpBG4G\nJgB7ABOTJNl9vWFnAy+GEPYFjgCuS5LEpUklSeoE7Z36Hg/MCyEsCCE0ADOA49cbswSoaH5cAbwT\nQth2VzORJCki7c18hwGLWmy/ARyw3pg7gaeSJHkTKAdO7rzyJEnq3doL6o7c1eEi4O8hhCOSJNkN\neDJJkn1CCG0uPVZZWd7RGrWF7HHXsM/5Z4/zzx7Hq72gXgyMaLE9guysuqWDgSkAIYRXkyR5DUiA\n2W0d2CVE88ubcnQN+5x/9jj/7HH+bc0HofaCejYwOkmSXYA3gU8DE9cb8zLwYeC5JEmGkA3p+Vtc\nkSRJymnzYrLmi8LOBh4HXgLuDyHMSZLkrCRJzmoe9m1g/yRJ/gH8HrgghPBuPouWJKm3SGUyHfka\nutNlPM2SX57K6hr2Of/scf7Z4/yrrCxPbem+rkwmSVLEDGpJkiJmUEuSFDGDWpKkiBnUkiRFzKCW\nJCliBrUkSREzqCVJiphBLUlSxAxqSZIiZlBLkhQxg1qSpIgZ1JIkRcygliQpYga1JEkRM6glSYqY\nQS1JUsQMakmSImZQS5IUMYNakqSIGdSSJEXMoJYkKWIGtSRJETOoJUmKmEEtSVLEDGpJkiJmUEuS\nFDGDWpKkiBnUkiRFzKCWJCliBrUkSREzqCVJiphBLUlSxAxqSZIiZlBLkhQxg1qSpIgZ1JIkRcyg\nliQpYga1JEkRM6glSYqYQS1JUsQMakmSImZQS5IUMYNakqSIGdSSJEXMoJYkKWIGtSRJETOoJUmK\nmEEtSVLEDGpJkiJmUEuSFDGDWpKkiBW1NyBJkgnANKAQuCuEMHUjY44AbgCKgWUhhCM6t0xJknqn\nNmfUSZIUAjcDE4A9gIlJkuy+3pgBwC3Ax0MIY4BP5alWSZJ6nfZOfY8H5oUQFoQQGoAZwPHrjZkE\n/DKE8AZACGFZ55cpSVLv1N6p72HAohbbbwAHrDdmNFCcJMnTQDnw/RDCjzuvREmSeq/2ZtSZDhyj\nGNgPOA74f8AlSZKM3trCJElS+zPqxcCIFtsjyM6qW1pE9gKytcDaJElmAfsAc9s6cGVl+WaWqs1l\nj7uGfc4/e5x/9jhe7QX1bGB0kiS7AG8CnwYmrjfmIeDm5gvPSsmeGr++vTdeurR6s4tVx1VWltvj\nLmCf888e5589zr+t+SDU5qnvEEIjcDbwOPAScH8IYU6SJGclSXJW85iXgceAfwJ/Bu4MIby0xRVJ\nkqScVCbTka+hO13GT2/55SfkrmGf888e5589zr/KyvLUlu7rymSSJEXMoJYkKWIGtSRJETOoJUmK\nmEEtSVLEDGpJkiJmUEuSFDGDWpKkiBnUkiRFzKCWJCliBrUkSREzqCVJiphBLUlSxAxqSZIiZlBL\nkhQxg1qSpIgZ1JIkRcygliQpYga1JEkRM6glSYqYQS1JUsQMakmSImZQS5IUMYNakqSIGdSSJEXM\noJYkKWIGtSRJETOoJUmKmEEtSVLEDGpJkiJmUEuSFDGDWpKkiBnUkiRFzKCWJCliBrUkSREzqCVJ\niphBLUlSxAxqSZIiZlBLkhQxg1qSpIgZ1JIkRcygliQpYga1JEkRM6glSYqYQS1JUsQMakmSImZQ\nS5IUMYNakqSIGdSSJEXMoJYkKWIGtSRJETOoJUmKWFF7A5IkmQBMAwqBu0IIUzcxbhzwPHByCOHB\nTq1SkqReqs0ZdZIkhcDNwARgD2BikiS7b2LcVOAxIJWHOiVJ6pXaO/U9HpgXQlgQQmgAZgDHb2Tc\nOcAvgKWdXJ8kSb1ae0E9DFjUYvuN5udykiQZRja8b2t+KtNp1UmS1Mu1F9QdCd1pwDdDCBmyp709\n9S1JUidp72KyxcCIFtsjyM6qWxoLzEiSBGAwcGySJA0hhIfbOnBlZflmlqrNZY+7hn3OP3ucf/Y4\nXqlMZtOT5iRJioAAHA28CfwFmBhCmLOJ8dOB33Tgqu/M0qXVW1axOqSyshx7nH/2Of/scf7Z4/yr\nrCzf4rPNbZ76DiE0AmcDjwMvAfeHEOYkSXJWkiRnbembSpKkjmlzRp1HzqjzzE/IXcM+5589zj97\nnH95m1FLkqTuZVBLkhQxg1qSpIgZ1JIkRcygliQpYga1JEkRM6glSYqYQS1JUsQMakmSImZQS5IU\nMYNakqSIGdSSJEXMoJYkKWIGtSRJETOoJUmKmEEtSVLEDGpJkiJmUEuSFDGDWpKkiBnUkiRFzKCW\nJCliBrUkSREzqCVJiphBLUlSxAxqSZIiZlBLkhQxg1qSpIgZ1JIkRcygliQpYga1JEkRM6glSYqY\nQS1JUsQMakmSImZQS5IUMYNakqSIGdSSJEXMoJYkKWIGtSRJETOoJUmKmEEtSVLEDGpJkiJmUEuS\nFDGDWpKkiBnUkiRFzKCWJCliBrUkSREzqCVJiphBLUlSxAxqSZIiZlBLkhQxg1qSpIgZ1JIkRcyg\nliQpYkUdGZQkyQRgGlAI3BVCmLre658BLgBSQDXwxRDCPzu5VkmSep12Z9RJkhQCNwMTgD2AiUmS\n7L7esPnAYSGEvYGrgDs6u1BJknqjjsyoxwPzQggLAJIkmQEcD8xZNyCE8HyL8X8GhndijZIk9Vod\n+Y56GLCoxfYbzc9tyueBR7emKEmSlNWRGXWmowdLkuRI4HPAIe2Nraws7+hhtYXscdewz/lnj/PP\nHserI0G9GBjRYnsE2Vl1K0mS7A3cCUwIISxv76BLl1Z3tEZtgcrKcnvcBexz/tnj/LPH+bc1H4Q6\nEtSzgdFJkuwCvAl8GpjYckCSJCOBB4HPhhDmbXE1kiSplXa/ow4hNAJnA48DLwH3hxDmJElyVpIk\nZzUPuxQYCNyWJMmLSZL8JW8VS5LUi6QymQ5/Bd2ZMp5myS9PZXUN+5x/9jj/7HH+VVaWp7Z0X1cm\nkyQpYga1JEkRM6glSYqYQS1JUsQMakmSImZQS5IUMYNakqSIGdSSJEXMoJYkKWIGtSRJETOoJUmK\nmEEtSVLEDGpJkiJmUEuSFDGDWpKkiBnUkiRFzKCWJCliBrUkSREzqCVJiphBLUlSxAxqSZIiZlBL\nkhQxg1qSpIgZ1JIkRcygliQpYga1JEkRM6glSYqYQS1JUsQMakmSImZQS5IUMYNakqSIGdSSJEXM\noJYkKWIGtSRJETOoJUmKmEEtSVLEDGpJkiJmUEuSFDGDWpKkiBnUkiRFzKCWJCliBrUkSREzqCVJ\niphBLUlSxAxqSZIiZlBLkhQxg1qSpIgZ1JIkRcygliQpYga1JEkRM6glSYqYQS1JUsSK2huQJMkE\nYBpQCNwVQpi6kTE3AscCNcBpIYQXO7tQSZJ6ozZn1EmSFAI3AxOAPYCJSZLsvt6Y44BRIYTRwJnA\nbXmqVZKkXqe9U9/jgXkhhAUhhAZgBnD8emP+C7gHIITwZ2BAkiRDOr1SSZJ6ofaCehiwqMX2G83P\ntTdm+NaXJkmS2gvqTAePk9rC/SRJUhvau5hsMTCixfYIsjPmtsYMb36uLanKyvIOFagtZ4+7hn3O\nP3ucf/Y4Xu3NqGcDo5Mk2SVJkhLg08DD6415GDgVIEmSA4EVIYS3Or1SSZJ6oTaDOoTQCJwNPA68\nBNwfQpiTJMlZSZKc1TzmUWB+kiTzgNuBL+W5ZkmSeo1UJuPXyZIkxcqVySRJiphBLUlSxAxqSZIi\n1u5a311A586sAAAC9ElEQVQhSZJ9gJuAV4F7QgjPdG9FPVfzqnGPhBDGdXctPVGSJGPJXoCZAi4I\nIbzdzSX1OEmSHE32N1DKgO+GEP7ZzSX1WEmSHAVMDCGc0d219DRJkhxMdtltgK+GEFZuamwsM+rx\nwBKgEfhPN9fS030DWNDdRfRgpcC5wG+Bg7q5lp6qbwjhTOB7wEe6u5ieKkmS3YB9gT7dXUsPdQbZ\noL6b7AfPTYolqP8P+ALwXeD8bq6lx0qS5IvAT4Da7q6lpwoh/JHsDWzOB/7ezeX0SCGER5Ik2Q74\nCvCjbi6nxwohvBpCuL676+jBCkMI9WQnqTu2NTBvp76TJDkA+E4I4cgkSQqAW4G9gTrgCyGEV5Mk\nuRIYTXbRlCXAinzW1BNtZp93aH5tfJIknwwh/LLbCt+GbGaPrye7UNCxwGXAV7up7G1KB3t8FTCK\nbE+/A1waQljWbUVvgzazz18MIazoxnK3WR3pM1DTvJDYTkBVW8fLSygmSXIB8FlgdfNTJwAlIYSD\nm3+A64ATQgiXNo8/iOx31A3AFfmoqSfa3D632O9eQ7pjtuDf8pHAD4F6sgsAqR2b0eNLmsffAwwG\nrkmS5Nf+W+6Yze2ztkxH+wzcQfa/EUXAWW0dM1+z13nAJ4AfN28fCjwG2VthJkmyf8vBIYTngefz\nVEtPtll9XieEcGrXlNcjbO6/5aeBp7u0wm3f5vZ4cteW12Ns6X8v/rtryusxOtTnEMILwOkdOWBe\nvqMOITxI9sKwdcqBVS22m5pPB2gr2Of8s8f5Z4+7hn3uGvnoc1f9pawiW2zufUMI6S56797EPuef\nPc4/e9w17HPX2Oo+d1VQPwccB7k7bPl7j/lhn/PPHuefPe4a9rlrbHWf832F9bo7fvwKOCZJkuea\ntzt0Xl4dZp/zzx7nnz3uGva5a3Ran717liRJEfPCAUmSImZQS5IUMYNakqSIGdSSJEXMoJYkKWIG\ntSRJETOoJUmKmEEtSVLEDGpJkiJmUEuSFLH/D2GevbkYXhhFAAAAAElFTkSuQmCC\n",
"text": [
"<matplotlib.figure.Figure at 0x11832a6d8>"
]
}
],
"prompt_number": 15
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# HMM"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"training_data = random.choice([x[1] for x in weights_sequences], 1000, replace=False, p=[x[0] for x in weights_sequences])\n",
"NB_HIDDEN_STATES = 5\n",
"training_data = [x for x in training_data if len(x) >= NB_HIDDEN_STATES] #Removing too short sequences\n",
"len(training_data)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# There is a silly un-feature in sklearn.hmm.MultinomialHMM where observations must span a continuous ranfe of integers.\n",
"# Because training_data is a subset of all the data, not every state is in it\n",
"# So we translate state numbers to other, continuous state numbers\n",
"translation = list(set([x for obs in training_data for x in obs]))\n",
"for i,obs in enumerate(training_data):\n",
" training_data[i] = [translation.index(x) for x in obs]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from hmmlearn.hmm import MultinomialHMM\n",
"hmm = MultinomialHMM(NB_HIDDEN_STATES)\n",
"hmm.fit(training_data)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"MultinomialHMM(algorithm='viterbi',\n",
" init_params='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',\n",
" n_components=5, n_iter=10,\n",
" params='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',\n",
" random_state=<mtrand.RandomState object at 0x112468978>,\n",
" startprob=None, startprob_prior=1.0, thresh=0.01, transmat=None,\n",
" transmat_prior=1.0)"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test_data = random.choice([x[1] for x in weights_sequences], 200, replace=False, p=[x[0] for x in weights_sequences])\n",
"for i,obs in enumerate(test_data):\n",
" def translate_or_None(x):\n",
" try:\n",
" return translation.index(x)\n",
" except ValueError:\n",
" return None\n",
" test_data[i] = [translate_or_None(x) for x in obs if translate_or_None(x) != None]\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"confusion_matrix = zeros((2,2))\n",
"for obs in test_data:\n",
" if obs[-1] == len(translation)-1: # BUY\n",
" actual = 1\n",
" obs = obs[:-1]\n",
" else:\n",
" actual = 0\n",
" hidden_states = hmm.predict(obs)\n",
" for i in range(0, NB_HIDDEN_STATES):\n",
" hmm.startprob_[i] = 1 if i == hidden_states[-1] else 0\n",
" next_states, foo = hmm.sample(100)\n",
" if len(translation)-1 in next_states:\n",
" predicted = 1\n",
" else:\n",
" predicted = 0\n",
" confusion_matrix[actual,predicted]+=1\n",
"confusion_matrix\n",
"#test_seq = weights_sequences[-1100][1][:-1] #ends in 250\n",
"#array(test_seq).reshape(-1,1)\n",
"#training_data[11][-1]\n",
"#hmm.predict(training_data[15])#[:-1])\n",
"#hmm.eval(array(test_seq).reshape(-1,1))\n",
"#hmm.predict_proba?\n",
"#hmm._generate_sample_from_state(3)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"array([[ 162., 14.],\n",
" [ 22., 2.]])"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"not_buy = [x[1] for x in counts_sequences if x[1][-1] != final_state]\n",
"buy = [x[1] for x in counts_sequences if x[1][-1] == final_state]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": true,
"input": [
"import seaborn as sns\n",
"buy_hist = hist([x for obs in buy for x in obs],final_state, color=\"#6495ED\", normed=True, alpha=.5, histtype=\"stepfilled\")[0]\n",
"not_buy_hist = hist([x for obs in not_buy for x in obs],final_state, color='#F08080', normed=True, alpha=.5, histtype=\"stepfilled\")[0]\n",
"legend(['Buy', 'Not buy'])\n",
"savefig('Buy_not_buy_distribs.pdf')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAe4AAAFVCAYAAAApGgzgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYXXd95/n3Xapu7Ztc8i55/4EhdtsYMDZxcLCBBhzc\nzUMmPD2EsSFhSQiZIU8PHaZp8pBtwgPdk04MHQfIpAlhcIgJdrCNAYOxjOVFtuVNP222JZW2Uqn2\n7a7zR5XkkihVSVW36uqU3q/n8WOd5XfOt373VH3u+d1zz0lVKhUkSVIypGtdgCRJOn4GtyRJCWJw\nS5KUIAa3JEkJYnBLkpQgBrckSQmSnWthCCEN3AZcBkwCH44xbpux/L3A/wlUgH+IMf7lfG0kSdLC\nzXfGfTNQH2O8Bvg08MVDC0IIGeDPgLcCbwI+HkJYNd0mN1sbSZK0OPMF97XAvQAxxvXAVYcWxBhL\nwKtijMNAN5AB8tNt7pmtjSRJWpz5grsNGJoxXZoeCgcgxlgOIfx74EngAWB0vjaSJGnh5vyMm6kA\nbp0xnY4xlmeuEGP85xDCncDfAb95PG2OVqlUKqlU6riLliQp4RYcevMF9zrgJuCOEMLVwMZDC0II\nbcBdwI0xxnwIYRQozdXmWFKpFL29wwv8EXQ8urtb7eNlYD8vvWr1cT6fByCbzZJOOyg4k8fx0uvu\nbp1/pWOYL7jvBG4MIaybnr4lhPB+oCXGeHsI4RvAgyGEAvA08I3p9Y5os+DqJGmJPHPv3XSmU9S9\n5nLOPf+CWpcjHbc5gzvGWAE+dtTszTOW3w7cPkvTo9tI0kmls7mZc3M59ta6EOkEOT4kSVKCGNyS\nJCWIwS1JUoIY3JIkJch8V5VLknTCSqUSg4MDVd1me3sHmUymqttMIoNbklR1g4MDfP/xgzS3dFRl\ne6MjA7zzKujqWlWV7SWZwS1JWhLNLR20tnct2/42bHicz372P3H++RdQqVQoFAr8wR98mosvDstW\nw3IwuCVJK0IqleKqq97A5z73JwA89tgj3H77V/iLv/ivNa6surw4TZK0IlQqFSqVyuHpoaEhOjs7\n+cQnPsKOHS8B8N3v/hNf+9rf8L3v3cltt/0/wNTn8R/84G9QKBRqUfYJM7glSSvGhg2P84lPfISP\nfvRW/vzPP88NN7xteknq8P9TqRQ33PB2Hnzwp5TLZdav/zlXXvl66urqalX2CXGoXJK0Ylx55VX8\n0R/9KQA7drzMRz5yC2vWrD28/NBZeVNTE1dccSXr1/+c73//Lm699bdqVfIJM7glSUtidKR6Xweb\n2taJXejW2dlFKpWipaWVAwd6WbNmLZs3b6K7ezUAN910M9/4xv/L0NAgF1xwUdVqXWoGtySp6trb\nO3jnVdXcYhft7XN/tSyVSh0eKk+nM4yNjfKJT/zvdHR08qUv/d+sXn0G3d3dpFJTw+aXXvpaenp2\n8d73/no1C11yBrckqeoymcyyf+f6iitex113/WDWZW9607W/MK9cLtPU1MgNN7x9qUurKi9OkySd\ncnbv7uFDH/pfeetb30ZTU1OtyzkhnnFLkk45Z511Nl//+jdrXcaCeMYtSVKCGNySJCWIQ+WSpKrz\n6WBLx+CWJFXd4OAAww8+QEdLS1W2NzAyAtdd79PBcKhckrREOlpa6Gprq8p/x/MGYMOGx3nHO97C\n/v37Ds/78pf/O/fcc/cx2wwNDXH//ff+wvzf/M3/ZWE/9DIwuCVJK0ZdXT1/+qd/dHj60M1WjmXr\n1s089NCDS11WVRnckqQVIZVKceWVV9He3s53vvPtX1j+j//4DX7rt36Tj370Vr785f8OwN///dfY\nsOFx7rrru0esWy6X+fznP8vv/u5v87nPfYbJyUm+//27+MpX/gqAyclJ3ve+X2N0dIRf//X3HH4q\n2W23/SU//vEPl/TnNLglSSvCofD81Kc+zbe//U16enYdXrZt21YeeOCHfOUrX+crX/kau3bt4OGH\nH+KDH/wQV155FTfddPMR2yoUCnzwg7fyV3/1N6xefTp33XXnrGfvzc0tXH75FTzyyMOUSiXWr/85\n1133liX9OQ1uSdKK0tbWzu/93qf44z/+L5TLZQB27HiJ17zmlw5flX755Vfw4ovbjrmNzs4u1qw5\nD4DXvvYydux4+ag1Xnnu90033cw999zN+vU/5/WvfyPZ7NJe921wS5KWxMDICAeHhqry38DIyAnt\n+9prf5k1a9YevjBt7drzeP75ZymVSlQqFZ566knWrFlLOp0+fKZ+RO0DA4fP2J966gkuuugS6uvr\n6es7AECMmw6ve9ll/4aenl3cffe/8O53v2eh3XXc/DqYJKnq2ts74LrrKVVpe62HtjmHVCp1xHD2\nJz/5KZ544jEALrjgIn71V2/gYx/7EJVKmcsuu4Jf/uW30Nu7n+3bt3LHHd/ife/7jVf219rC3/7t\nV9i/fx9nn30O73rXrzE+Ps6dd/4TH//4hwnh1TQ3v3Kl+9ve9g5+8pMfcd5551fpJ57j55ztnUYN\nVHp7h2tdw4rW3d2Kfbz07OelV60+3v6j+zg3l2Pv2Ws49/wLqlDZyuFxfOK++c3/SUdHB+98503H\ntX53d+vcl7vPwaFySZIW4U/+5HM8/vijvO1t/3ZZ9udQuSRJi/CZz3xuWffnGbckSQlicEuSlCAG\ntyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrck\nSQky59PBQghp4DbgMmAS+HCMcduM5e8HPgkUgWeAj8cYKyGEDcDg9GrbY4wfWoriJUk61cz3WM+b\ngfoY4zUhhDcCX5yeRwihEfg88NoY40QI4ZvAu0MI9wPEGK9fwrolSTolzTdUfi1wL0CMcT1w1Yxl\nE8CbYowT09NZYBy4HGgKIdwXQvjRdOBLkqQqmC+424ChGdOl6eFzYoyVGGMvQAjhE0BzjPGHwCjw\nhRjj24GPAv9wqI0kSVqc+YbKh4DWGdPpGGP50MR0IP8FcBHw3unZm4GtADHGLSGEPuBMoGeuHXV3\nt861WFVgHy8P+3npVaOP9zXnaMrl6Ops8jWbhX1y8povuNcBNwF3hBCuBjYetfx/MDVk/u9ijJXp\nebcwdTHb74QQzmLqrH3PfIX09g6fSN06Qd3drfbxMrCfl161+nh0dJKxIhzsH6PZ1+wIHsdLbzFv\njOYL7juBG0MI66anb5m+krwFeBy4FXgQ+HEIAeC/AV8Fvh5CePBQm5ln6ZIkaeHmDO7ps+iPHTV7\n84x/Z47R9AOLKUqSJM3Oi8YkSUoQg1uSpAQxuCVJShCDW5KkBDG4JUlKEINbkqQEMbglSUoQg1uS\npAQxuCVJShCDW5KkBDG4JUlKEINbkqQEMbglSUoQg1uSpAQxuCVJShCDW5KkBDG4JUlKEINbkqQE\nMbglSUoQg1uSpAQxuCVJShCDW5KkBDG4JUlKEINbkqQEMbglSUoQg1uSpAQxuCVJShCDW5KkBDG4\nJUlKEINbkqQEMbglSUoQg1uSpAQxuCVJShCDW5KkBDG4JUlKEINbkqQEMbglSUoQg1uSpAQxuCVJ\nShCDW5KkBDG4JUlKEINbkqQEMbglSUqQ7FwLQwhp4DbgMmAS+HCMcduM5e8HPgkUgWeAjwOpudpI\nkqSFm++M+2agPsZ4DfBp4IuHFoQQGoHPA2+JMb4ZaAfePd0mN1sbSZK0OPMF97XAvQAxxvXAVTOW\nTQBvijFOTE9np+ddC9xzjDaSJGkR5gvuNmBoxnRpevicGGMlxtgLEEL4BNAcY7x/rjaSJGlx5vyM\nm6kAbp0xnY4xlg9NTAfyXwAXAe89njbH0t3dOt8qWiT7eHnYz0uvGn28rzlHUy5HV2eTr9ks7JOT\n13zBvQ64CbgjhHA1sPGo5f+DqeHxfxdjrBxnm1n19g4fd9E6cd3drfbxMrCfl161+nh0dJKxIhzs\nH6PZ1+wIHsdLbzFvjOYL7juBG0MI66anb5m+krwFeBy4FXgQ+HEIAeC/zdZmwdVJkqQjzBnc02fR\nHztq9uYZ/84co+nRbSRJUhV40ZgkSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQli\ncEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBL\nkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KU\nIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAG\ntyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCDZuRaGENLAbcBlwCTw4RjjtqPWaQLuB26N\nMcbpeRuAwelVtscYP1TtwiVJOhXNGdzAzUB9jPGaEMIbgS9OzwMghHAV8BXgLKAyPa8BIMZ4/ZJU\nLEnSKWy+ofJrgXsBYozrgauOWl7PVJDHGfMuB5pCCPeFEH40HfiSJKkK5jvjbgOGZkyXQgjpGGMZ\nIMb4MEAIYWabUeALMcavhhAuBu4JIVxyqM2xdHe3nnDxOjH28fKwn5deNfp4X3OOplyOrs4mX7NZ\n2Ccnr/mCewiY+eql5wtgYDOwFSDGuCWE0AecCfTM1ai3d3iezWoxurtb7eNlYD8vvWr18ejoJGNF\nONg/RrOv2RE8jpfeYt4YzTdUvg54J0AI4Wpg43Fs8xamPgsnhHAWU2ftexZcoSRJOmy+M+47gRtD\nCOump28JIbwfaIkx3n6MNl8Fvh5CePBQm+M4S5ckScdhzuCOMVaAjx01e/Ms610/499F4ANVqU6S\nJB3BG7BIkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJ\nCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQli\ncEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBL\nkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCZKtdQHS\nyWjLhsfJFPK0rj2P7jPOqnU50opULpfZ+uzTAFz4msvIZDI1rigZPOPWcfnn+x7h2/c/x+at22td\nyrLIDA1ycQoGD/TVuhRpxSqXy+R6dtK0u4disVjrchLD4NZxaWjuov30i6jUuhBJOsXNOVQeQkgD\ntwGXAZPAh2OM245apwm4H7g1xhiPp40kSVqY+c64bwbqY4zXAJ8GvjhzYQjhKuBB4Hw4fDI2ZxtJ\nkrRw8wX3tcC9ADHG9cBVRy2vZyqo4wm0kSRJCzRfcLcBQzOmS9ND4QDEGB+OMe46kTaSJGnh5vs6\n2BDQOmM6HWMsL0Eburtb51tFi7SYPm5uyZFprKezo/mUeK32t+RoaszR0dF0wj/vqdA/tVaNPt7X\nnKMpl6Or88Rf41PBcvRJsVikv7GeTDpNd3cruVxuyfe5EswX3OuAm4A7QghXAxuPY5sLaUNv7/Dx\nrKYF6u5uXVQfj45Mkknn6U+NnhKv1cjIJGPlNAOVsRP6eRfbz5pftfp4dHSSsSIc7B+j2dfsCMt1\nHBeLRcbG82RSaXp7h8nl8ku+z5PFYt4YzRfcdwI3hhDWTU/fEkJ4P9ASY7z9eNssuDpJknSEOYM7\nxlgBPnbU7M2zrHf9PG0kSVIVeNGYJEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJ\nYnBLkpQgBrckSQlicEuSlCAGtyRJCWJwS5KUIAa3JEkJYnBLkpQgBrckSQlicEuSlCAGtyRJCWJw\nS5KUIAa3JKmmDvT188Cjm9nwzJZal5IIBrckqaYmi2UqLRcyPFGudSmJYHBLkpQgBrckSQlicEuz\nKBaL7N53gM3bXq51KZJ0BINbmkWxWGBwMkdPf60rkaQjGdySJCWIwS1JUoIY3JIkJYjBLUlSghjc\nkiQliMEtSVKCGNySJCWIwS1JUoIY3JIkJYjBLUlSghjckiQliMEtSVKCGNySJCWIwS1JUoJka12A\nJEkz3f2jxyinc1x7+dms6lpV63JOOga3JOmkUsq2Q8NpFAqFWpdyUnKoXNIpK5/PMzQ0yOTkZK1L\nkY6bwS3plLVjbz+Pbi3wzKYXa12KdNwcKpd0yspksjS3dZJKVWpdik7Q9mc3Up6cpPv8C2nv6qp1\nOctqzuAOIaSB24DLgEngwzHGbTOW3wT8Z6AIfC3G+LfT8zcAg9OrbY8xfmgJapcknaIq+/dyUWMj\n2/fvNbiPcjNQH2O8JoTwRuCL0/MIIdQBXwKuAsaAdSGEfwGGAWKM1y9Z1ZJ0kikWizy84QVSwDWv\nu5RMJlPrkrRCzfcZ97XAvQAxxvVMhfQhrwa2xhgHY4wF4CHgV4DLgaYQwn0hhB9NB74krWjlcpn9\no83sG22iXC7XuhytYPMFdxswNGO6ND18fmjZ4Ixlw0A7MAp8Icb4duCjwD/MaCNJkhZhvqHyIaB1\nxnQ6xnjoreTgUctagX5gM7AVIMa4JYTQB5wJ9My1o+7u1rkWqwoW08fNLTkyjfV0djSfEq9VS0sD\npUnINZ54v50K/VNr1ejjfc05JoYnaGzI0tGRXfQ28/k8zc31VCoVurtbqaurW3SNtbQcx3GxWKS/\nsZ6GXB3NTfW0Vhro7m6luTlHpb6OVatyx6xjX0sDTQ05ujpPjb9JM80X3OuAm4A7QghXAxtnLNsE\nXBxC6GTqLPs64AvALUxdzPY7IYSzmDoz3zNfIb29wydevY5bd3frovp4dGSSTDpPf2r0lHitRkYm\nyBbqmBwvnNDPu9h+1vyq1cejo5MUJ4uMTxQZGDix13k2+Xye0dE8UKG3dzjRwb1cx3GxWGRsPM/E\nZIHRsTyV4gS9vcOMjk5CqUBfX4G67Ox1jIxMMFZKcbA/mX+TFvNmY77gvhO4MYSwbnr6lhDC+4GW\nGOPtIYT/A7iPqSH3r8YY94QQvgp8PYTw4KE2M87SJUnSIswZ3DHGCvCxo2ZvnrH8buDuo9oUgQ9U\nq0BJkvQKLxqTJClBDG5JkhLEW57WwNanNpAaH6P57HM5Y83aWpejJbD5iUfJ5PO0rDmP088+p9bl\nSFpBPOOugdTQIJdk0owO9Ne6FC2RzPAQl2TSDPcfrHUpklYYg1uSpAQxuGtgf18/L+/uY9vLc96T\nRglWLpXY+vIe1j8VKRaLtS5H0gpicNfA6HiBUl0XQ+N+vX2lqlQgX2lgrNzmfaslVZXBLUlSghjc\nkqREGhkb44FHt/K9Hz5a61KWlV8Hk6RlUCgUAMhms6RSqRpXszKUK2UyzadTqWuvdSnzGh4Zpmf3\nfpqaGllzzlmL2pbBLbZsf5nRsUnWnN1NV2dnrcuRVqQ77n2MSraFX71iNWeecUaty9Ey2/bSHvaV\n11LZv33Rwe1Qudi+d4IDqQvZubu31qVIK1Zz6ypau7wZz6ksk8mSTi8+dg1uSZISxOCWJClBTorP\nuP/m2z9ncmSA37jpzbUu5YT8/J++RVMKzr3+Rrq6VtW6HC2zA319rHt6F+lKnpve+vpal3PSGh4e\nAqC5uaUqw4TSqe6kCO6WrjWkUnW1LuOErW5pYVUmg7fXODUVi0VoPpfyuNcGzOW7D2yCbCPvefMa\n2tpO/qt/daShoUF2PPwz8qS48h3vqnU54iQJbi3MoTtypVIpv16ik1ZTazupbGuty9ACFYtFzslk\n6CuVal2KphncCfbjh5+mP9/MWa2TXPv6X6p1OZKkZeAHTglWl2ui8/QLyWRztS5FkrRMDG5JkhLE\n4JYkKUEMbkmSEsSL0yRJqoLePbsZ3LeXpo4OzjrvgiXbj2fcCbZ/83McfOERJsbHFryNSqVCoZCn\nkJ+sYmWSdOoZ3N1DyE8wvrtnSffjGXeCdU4MsTrbxJ6JzIK3MTg4QOrpnzGYfYazfuWqKlYn1c6z\nD/6ExmKe7DlrWXtJqHU5K0LP3gPsfDSyZnUzF5znw1JqyTNucXpbO6uaW2pdhlQ1jZUyl7S0UJgY\nr3UpJ7VyuczmjU+yeeOTh2/odCwTBSg2X0T/0MJH+FQdBrekFWlv70E2vLCTuO3lWpdy0iqVSjT0\n7KKhZxcl74xWNTt69rHuic3s6NmzJNs3uCWtSBOFCuWGcxgcyde6FJ1ihsbLjDdcSG/f8JJs38+4\nV5hyuUylUiGVSvkkJklagQzuFea5h35K68gwo52reM2brq11OZK0Ih3o66NYLNLZ0UEut7y3nfaU\nbNq+fb388OfP89NHNta6lEVJV6A9nWGwf6DWpUjSivXTDTtZt7nMziX6HHsuBve0kfEx8g1rGc4n\n+4EdAwODHJxo4qV9Xk0rSUsll2ukqaU2z5c3uFegVDpNKuVLK0krkX/dJUlKEINb0pIZHBxgbHR4\n3pt7JN3enTvY/rOfMLj5iVqXsiADQ8M8+ewW9uzrrXUpOg4Gt6Ql8+L999K17XEmJ1b23bbGRke4\nOFdPB8Val7IgQ6N5DqYvYMee/lqXkijlcplt6x7k6fu+z+Tk8j3vweCeVi6XKRYLlErJ/MWTTkat\nTc20eztdrWDdaeimvKyjSn6Pe9quzS/QumeCXWODcP1ral2OFuHO+x4hlannhqsvpqWltdblSKec\nb931EKlMA++6Lvg7uAQM7mnpVJpzV62iv+TtERdi1/ZtTO7cwWR9PZfW+MYv5boOUvXt3ntZqpG6\n5m5S9S1UKpVal7IiOVSuqpgcHeGSxhy5U+i53n29+xns3U2xUKh1KdKK0LdjG9t++mMGXnq+1qWc\n1DzjXoTtO/YyUN9EqfNl3tC1qtblaJmNPvs0F43n2Np+Rq1LOWGbtrzI6HiBteecxmldXbUuRwu0\ne+9+9uwfoKu9ifPXJv8Z2en8BJc0dvBE+WCtSzmpecY9h1KpxOann2Tz07M/q7acypJt6GBi0gva\nlkKlUuGR7/x/PHHntxkfP/nuBJerr6cp11DrMo5bsVhkfHycfD7Py715+jMX0rPnQFX3USgUGB8f\np+AoxLxe3PQCW59+kpHhoQVvY8feAfozF7Jj/2gVK9PJzjPuOZRKJXY/8jjlSprm7jM4+6wza11S\nTeze8RKlZx7iYN1znPeWy5d13x3pFKliiYP9/Zzd2Lis+15ptj61gba+/fTV5aBhzZLsY8tjj9Ax\nPMhASxuXXnvdkuxjpaj07CQ3OMJdW/u45OI1vO6yS2pdkqokPzrC2LaNjBZ30pHNc84FF1V1+3MG\ndwghDdwGXAZMAh+OMW6bsfwm4D8DReBrMca/na9N0pSph/om8vlT96K1UqnERR3tvFisX/Z99x4c\noZJpZ/vmvZx15pmHH1maSqWWvZbjsXXjRrY+9TzZkSFytC3rvh976gV6BtK0109w/TW/+AYrm8mQ\nniyzpecgmYsvoXkJasjV1ZGeLPPcnv2MNm8iSYPw5XKZoaFBUqkU7e0dy7LPYhnSrecy4QDFgmUP\n7KB/f6Tn4lZaW5ppa6vN/cNnesPp3ZAZIwc07NrBj3YdpJiup+nMC6uy/fnOuG8G6mOM14QQ3gh8\ncXoeIYQ64EvAVcAYsC6E8D3gzUButjYrUSqVpn/Hi2xpqePiy6+odTlVNzIywuDAAKlK9b6juPHh\nn7Frew+Vjk7e9e63z7luKp0ik60nk83ywmPr6X/mOXan6nj7+26mrfXYwTjw9E/o6aunL1zKBa96\nddVqP5bBgwfp3fkyDPfRNpQnVX86mWwdTIxRLBZZ9+hT7B3KckZHZtZQrYYyGVq6LyQ9tvWY65Qq\nKVbnWhh96VkOHOyj+6K5v2O9+eknoVzmvNf8EvX1x/fGrVRJkW05g3wpWZ/EjYwMc/fPd1MpjPKB\nX3tD1bZbyOfp3bubwQN76Ti3u2rb3XDvvzKyu5eDazKcvsCsKhQKFAqFxN7ZLpPOcPXqVeweGuTh\nrRVaeZl3XHcZLz7/LJNDgzyzZ4TWrm5uuPYystnlH2AeGRtncKRAb9s5nH1+9b5mPN9v1rXAvQAx\nxvVMhfQhrwa2xhgHY4wF4CHguuk29xyjzaz6dm5lYN/OEy7+2U1b2fBMZGS0+p/vVCqV4/oqQ3tL\nK69Jwd4nNy7rnXOWy/qNL/LY9hKp+lf+MvQdPMhDj2/i0adeWNA2C/39XNzcTV/fJKOjozz34/vZ\n+MAP521Xn8lwettqqFvN8Mjcr3lXUxONhTSPP7VpQTWeqAO7d3HuwEHWVlJUSFFXnyOdnvr16t27\nh9atz5Pb/SLpuiM/E8/n8zxy/w+481vf4ennjx24M+3YvIl//sa3+c73fsqu3Sf+SMFzOzp4VVcH\nmcor12b07dvHtsfWs23jU0esm+nZSeueHgYGBpblqz2FQoGBgX4OHuxjYmLiuNvl83n27dvHpriZ\nvfv2HZ5fObiT/c89Oe81Ent3bGfXD+9lcssGGluqO1IyODhI5tmNtO09vtf3eLVns1zS0Q7FXxwN\nLBaLDAwOsmPnLvb3zn4dw5anNvCT7/0ru3onyVeaqlLTrm1b2PbzhxjeNTXIuvEH9/Dsv/4LA/1L\ne7FZOjMVygf7+3n2hS0MvvwSoVJmeLDESLn9mMdupVJhyzMb2fLM08f99dHZsiGfz/N83MruvfuO\nmF/Xeg7FutW0tFX34uXUXL+MIYTbge/EGO+dnn4ZOD/GWA4hvBn43Rjjb0wv+yNgB3D1sdocaz9/\n/Z/+tPLyvj0UW3JMjg7T0dzM/qFRMrkm6scGIAXpzi6KxQK5unrGBodpbmhkfLRAtq6BofIQnZ2d\nwNRwVzqdplwukyJFIT9JNpsllU5TLldIZ9IUCnnq63OUS6XDT9Hq3d3DW151FU9sjzStbmfvjh46\nWlZxfmcXdXX17DrwMmOFIumWdkbGRuhc1Un64CCXnncpAJt372C0Lk8mU0c6m6G3b4BsQxuV/DCd\n7W2kUlB8AdaPAAAIZElEQVSuVEinM4zv3cfrL7mSBzY9yarVnZTKZSBFJpMmPzFOc30Dw5MTNDY3\nM9LbR66hmXKqQqFcprmlEVIp0ukMqQMHWXvGBTy8czOr2ppoSKVhYoJXr7mUh7c+R9fqDiYKUzU1\nNGYZH8uTTqeYHBunrr6JiclxcvUZzqGe0UwTLw3tpauj7fAvQaVSZnD3PrpPW8PFp53GtgO97Bs/\nSLlcJtd1ASP9Paxuz5ECxocG+TerTqNnZIyRCvSNjLBqdRflUglSKVKkKBfyjAyN05bJ8po1F/Gj\n+DyrTmvm7GKJ4VKKfZUira3NU69fqUQ6nSbXP0Bn1xqeP7iHtkyJV3WdybP9IxTT49TX11FfV0el\nMlVrqVyGVJqh8Syn12V51erTuf+Fjaxe3UapUp4aKs5kKRWLQIVSuTx1bJCiVMzTnMlSLBYpFkq0\npVKctepcfrJ9K6evbqE4vW46naFcKlIuV2geHeb0rjU8vKeHzlyJy9pPo6XrDIqlCqnp98T3xefo\naE7xqsYmeitN9IyP0NaUgXKJTDZDqVxm9dgYxabVbB8bZlXr9D6KRTLZDMViiVR6qv8qlTLpTIb0\n8DAXtHfzRP845dIAHR0d5CfGSY/nKTR2M5Yfoqu1nnQmS2VslPq6LJPFIhSKXNKxioaW0wF4ePdu\nJid66WprZWJkhDevOYfn9+1nNNdAOp0mnU7TMTZKKZ/mufE83a1ZsqSYqKRIZ1M01k/1ff34CNlM\nlsliiQx1nN+5im1jRYbH+7mwuZ5SuY5N+QyNqSGy9fVkS0Vy6TRj5TLpuiyZdJZKpUylUiY/OER3\npo6XJlIUsxW62xpJpVOUgUq5TF19PXWVAoXRSYZKKTLZNA0N9QwPHOTS5hb2jGcYpcIZDbC6pZ2G\n5tPYt28bW0tlclloaGg8/DeiZXyMVfXNPD2RZbRvO2+/+AKe2T/BwVSK9sYSpFOkU2mmeh/K0/1f\nLpXIZKZem0w2MzXE3n+Q684+h00HRhnJ5CjkB0lnMlQyKSbGx3hT9yo29w4x0HoW6YkeWltbp/ff\nyIaJeopjvZy5qo3xiQmyDQ1QqZDJZCmXiof/Rk29/lPzoELzxARnNLTw5EQd44O7CV0t7OofoK6z\nG4YO0JWt58VyC5Dn9FXNVCqVV363Mhmax0Y5s7GZhvZz2N3fx65sMyMHtrJ6VQcNuQwTk1N/sicm\nxnltYwMvHRhidPVFDO7bxGldHZTyeTLpNMVUGipl6uvqGO8/yDVrzuWBbXspt69mTbGX0xob2The\ngHKRjtYWxicnqKvPUSoWyWaznFkqMjI6zr6OCxjreZ53hAtY99JeKu1nMjGym2xdlpamRipM1V+c\nnKShro66sVHOa++EhlVUSDM2OcbOgUEm27qp7N3E69ecww9f7iXd1EpnY4EUKbKZNOPDQ3Q2NHJg\nfIzGtnZW5yepz2bYUZn6+K0uk2F8cpL6hgaK+TzZbN1UP4yN0JlrYM+BfrJNXaTqGqhraGL1yE5S\nFdjRupbxvVt55yVrGRzN09A69Xs2NjHOhuFx2ju7Cb/0BnKVYd791tfT3d264M/75hs7GAJm3vYm\nPSOAB49a1goMzNNmVr/zZ3940nxg+eFl3NetVdrO780ybzl/joVajhoX28cfOY51fn8J97+UTuba\nlGwfWGT7/1CVKuB/q9J2TjbzDZWvA94JEEK4Gtg4Y9km4OIQQmcIoZ6pYfKH52kjSZIWYb6h8hSv\nXCEOcAvwOqAlxnh7COHdwGeZegPw1Rjjl2drE2PcvFQ/gCRJp5I5g1uSJJ1ckvV9DUmSTnEGtyRJ\nCWJwS5KUIAa3JEkJUtOHjKy0+5qfTEIIG5j6rj3AduDPgL8DysCzwO/EGL0ycQGmb+X75zHG60MI\nFzFLv4YQfgv4babu4//HMcZ/rVnBCXRUH18B3AVsmV58W4zxDvt44aZvWf01YC2QA/4YeAGP5ao5\nRh/vAu4GDn3TakHHcq3PuA/fCx34NFP3NdcihRAaAGKM10//9yGm7iv/hzHG64AU8J5a1phUIYT/\nCNzO1C8izNKvIYQzgE8A1wBvB/5s+l4HOg6z9PHrgC/NOJ7vsI8X7T8AvdPH7TuAv2bq76/HcvXM\n1sdXAl9c7LFc68d6HnEv9BDCvPc113G5HGgKIdzH1Gv8GeDKGOOD08vvAd4GfLdG9SXZVuDfA/9z\nenq2fi0B66bv4V8IIWxlalTp8eUuNqGO7uPXAZeEEN7D1Fn37wNvwD5ejDuAf5r+dxoo4LFcbbP1\n8euAsNhjudZn3G1M3SL1kNL08LkWZxT4Qozx7cBHgX84avkIUPtn3yVQjPGfmRrOOmTm7XqHmerX\nNl75mGLmfB2HWfp4PfAHMcZfYepjn//C1G2V7eMFijGOxhhHQgitTAXM/8WReeCxvEiz9PFngEep\nwrFc65A84fua67hsZjqsY4xbgD7g9BnLD91XXos383htY/b79bcC/ctZ1ApzZ4zxyUP/Bq7APl60\nEMK5wI+Bv48x/iMey1V3VB9/iyody7UObu9rvjRuYfp6gRDCWUwdCD8IIfzK9PJ/Czx4jLY6MU/O\n0q+PAr8cQsiFENqZegTus7UqcAW4N4Tw+ul/38DUEKJ9vAghhNOBHwD/Mcb4d9OzPZar6Bh9XJVj\nudafcd8J3BhCWDc9fUsti1lBvgp8PYRwKJxvYeqs+/bpix6e55XPXrQwh67I/xRH9ev0lbh/CfyM\nqTfHfxhj/MWHJms+h/r4o8BfhxAKwB7gt6eHIO3jhftDpoZjPxtC+Oz0vE8Cf+mxXDWz9fHvA/91\nscey9yqXJClBaj1ULkmSToDBLUlSghjckiQliMEtSVKCGNySJCWIwS1JUoIY3JIkJcj/DzkTKbkZ\nG6MnAAAAAElFTkSuQmCC\n",
"text": [
"<matplotlib.figure.Figure at 0x10b334198>"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Buyer specific states\n",
"[observed_states[i] for i in range(0, len(observed_states)) if buy_hist[i] > not_buy_hist[i] and \n",
" not_buy_hist[i]<0.02 and buy_hist[i] > 0.05]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"['apps.appl.mortgage', 'entry.apps', 'exit.apps']"
]
}
],
"prompt_number": 6
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment