Skip to content

Instantly share code, notes, and snippets.

@ledmaster
Last active August 18, 2022 01:16
Show Gist options
  • Save ledmaster/91dac40fded676128ac516989114eeb0 to your computer and use it in GitHub Desktop.
Save ledmaster/91dac40fded676128ac516989114eeb0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(\"chemical-dependence-treatment-program-admissions-beginning-2007.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location</th>\n",
" <th>Program Category</th>\n",
" <th>Service Type</th>\n",
" <th>Age Group</th>\n",
" <th>Primary Substance Group</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>Suffolk</td>\n",
" <td>Residential</td>\n",
" <td>Intensive Residential</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>Onondaga</td>\n",
" <td>Crisis</td>\n",
" <td>Med Sup Withdrawal - Inpatient</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>New York</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpat Chem Depend for Youth</td>\n",
" <td>Under 18</td>\n",
" <td>Marijuana incl Hashish</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>Orange</td>\n",
" <td>Crisis</td>\n",
" <td>Medically Monitored Withdrawal</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>Suffolk</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpatient Rehabilitation</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location Program Category Service Type Age Group Primary Substance Group Admissions\n",
"0 2009 Suffolk Residential Intensive Residential Under 18 Cocaine incl Crack 1\n",
"1 2008 Onondaga Crisis Med Sup Withdrawal - Inpatient Under 18 Alcohol 5\n",
"2 2009 New York Outpatient Outpat Chem Depend for Youth Under 18 Marijuana incl Hashish 18\n",
"3 2007 Orange Crisis Medically Monitored Withdrawal Under 18 Cocaine incl Crack 2\n",
"4 2007 Suffolk Outpatient Outpatient Rehabilitation Under 18 Alcohol 2"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2007 6549\n",
"2008 6519\n",
"2009 6540\n",
"2010 6655\n",
"2011 6623\n",
"2012 6700\n",
"2013 6619\n",
"2014 6507\n",
"2015 6506\n",
"2016 6428\n",
"2017 6817\n",
"Name: Year, dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Year'].value_counts().sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df_train = data[data['Year'] <= 2013]\n",
"df_val = data[data['Year'] > 2013]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestRegressor"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# One Hot Encoder"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OneHotEncoder(cols=['County of Program Location', 'Program Category', 'Service Type', 'Age Group', 'Primary Substance Group'],\n",
" drop_invariant=False, handle_missing='value',\n",
" handle_unknown='value', return_df=True, use_cat_names=True,\n",
" verbose=0)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from category_encoders.one_hot import OneHotEncoder\n",
"enc = OneHotEncoder(cols=['County of Program Location', 'Program Category', 'Service Type', 'Age Group', 'Primary Substance Group'], use_cat_names=True)\n",
"enc.fit(df_train)\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location</th>\n",
" <th>Program Category</th>\n",
" <th>Service Type</th>\n",
" <th>Age Group</th>\n",
" <th>Primary Substance Group</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>Suffolk</td>\n",
" <td>Residential</td>\n",
" <td>Intensive Residential</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>Onondaga</td>\n",
" <td>Crisis</td>\n",
" <td>Med Sup Withdrawal - Inpatient</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>New York</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpat Chem Depend for Youth</td>\n",
" <td>Under 18</td>\n",
" <td>Marijuana incl Hashish</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>Orange</td>\n",
" <td>Crisis</td>\n",
" <td>Medically Monitored Withdrawal</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>Suffolk</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpatient Rehabilitation</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location Program Category Service Type Age Group Primary Substance Group Admissions\n",
"0 2009 Suffolk Residential Intensive Residential Under 18 Cocaine incl Crack 1\n",
"1 2008 Onondaga Crisis Med Sup Withdrawal - Inpatient Under 18 Alcohol 5\n",
"2 2009 New York Outpatient Outpat Chem Depend for Youth Under 18 Marijuana incl Hashish 18\n",
"3 2007 Orange Crisis Medically Monitored Withdrawal Under 18 Cocaine incl Crack 2\n",
"4 2007 Suffolk Outpatient Outpatient Rehabilitation Under 18 Alcohol 2"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"61"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train['County of Program Location'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"df_train_ohe = enc.transform(df_train)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location</th>\n",
" <th>Program Category</th>\n",
" <th>Service Type</th>\n",
" <th>Age Group</th>\n",
" <th>Primary Substance Group</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>Suffolk</td>\n",
" <td>Residential</td>\n",
" <td>Intensive Residential</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>Onondaga</td>\n",
" <td>Crisis</td>\n",
" <td>Med Sup Withdrawal - Inpatient</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>New York</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpat Chem Depend for Youth</td>\n",
" <td>Under 18</td>\n",
" <td>Marijuana incl Hashish</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>Orange</td>\n",
" <td>Crisis</td>\n",
" <td>Medically Monitored Withdrawal</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>Suffolk</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpatient Rehabilitation</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location Program Category Service Type Age Group Primary Substance Group Admissions\n",
"0 2009 Suffolk Residential Intensive Residential Under 18 Cocaine incl Crack 1\n",
"1 2008 Onondaga Crisis Med Sup Withdrawal - Inpatient Under 18 Alcohol 5\n",
"2 2009 New York Outpatient Outpat Chem Depend for Youth Under 18 Marijuana incl Hashish 18\n",
"3 2007 Orange Crisis Medically Monitored Withdrawal Under 18 Cocaine incl Crack 2\n",
"4 2007 Suffolk Outpatient Outpatient Rehabilitation Under 18 Alcohol 2"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location_Suffolk</th>\n",
" <th>County of Program Location_Onondaga</th>\n",
" <th>County of Program Location_New York</th>\n",
" <th>County of Program Location_Orange</th>\n",
" <th>County of Program Location_Dutchess</th>\n",
" <th>County of Program Location_Queens</th>\n",
" <th>County of Program Location_Bronx</th>\n",
" <th>County of Program Location_Greene</th>\n",
" <th>County of Program Location_Ontario</th>\n",
" <th>County of Program Location_Ulster</th>\n",
" <th>County of Program Location_Jefferson</th>\n",
" <th>County of Program Location_Sullivan</th>\n",
" <th>County of Program Location_Kings</th>\n",
" <th>County of Program Location_Oneida</th>\n",
" <th>County of Program Location_Niagara</th>\n",
" <th>County of Program Location_Broome</th>\n",
" <th>County of Program Location_Columbia</th>\n",
" <th>County of Program Location_Monroe</th>\n",
" <th>County of Program Location_Nassau</th>\n",
" <th>County of Program Location_Albany</th>\n",
" <th>County of Program Location_Westchester</th>\n",
" <th>County of Program Location_Putnam</th>\n",
" <th>County of Program Location_Fulton</th>\n",
" <th>County of Program Location_Richmond</th>\n",
" <th>County of Program Location_Rockland</th>\n",
" <th>County of Program Location_Erie</th>\n",
" <th>County of Program Location_Steuben</th>\n",
" <th>County of Program Location_Schenectady</th>\n",
" <th>County of Program Location_Delaware</th>\n",
" <th>County of Program Location_Cattaraugus</th>\n",
" <th>County of Program Location_Montgomery</th>\n",
" <th>County of Program Location_Warren</th>\n",
" <th>County of Program Location_Seneca</th>\n",
" <th>County of Program Location_Genesee</th>\n",
" <th>County of Program Location_Chemung</th>\n",
" <th>County of Program Location_Madison</th>\n",
" <th>County of Program Location_Chautauqua</th>\n",
" <th>County of Program Location_Orleans</th>\n",
" <th>County of Program Location_Herkimer</th>\n",
" <th>County of Program Location_Rensselaer</th>\n",
" <th>County of Program Location_Tompkins</th>\n",
" <th>County of Program Location_Tioga</th>\n",
" <th>County of Program Location_Oswego</th>\n",
" <th>County of Program Location_St Lawrence</th>\n",
" <th>County of Program Location_Chenango</th>\n",
" <th>County of Program Location_Livingston</th>\n",
" <th>County of Program Location_Yates</th>\n",
" <th>County of Program Location_Cortland</th>\n",
" <th>County of Program Location_Allegany</th>\n",
" <th>County of Program Location_Cayuga</th>\n",
" <th>County of Program Location_Saratoga</th>\n",
" <th>County of Program Location_Washington</th>\n",
" <th>County of Program Location_Schoharie</th>\n",
" <th>County of Program Location_Wayne</th>\n",
" <th>County of Program Location_Wyoming</th>\n",
" <th>County of Program Location_Franklin</th>\n",
" <th>County of Program Location_Essex</th>\n",
" <th>County of Program Location_Lewis</th>\n",
" <th>County of Program Location_Otsego</th>\n",
" <th>County of Program Location_Clinton</th>\n",
" <th>County of Program Location_Schuyler</th>\n",
" <th>Program Category_Residential</th>\n",
" <th>Program Category_Crisis</th>\n",
" <th>Program Category_Outpatient</th>\n",
" <th>Program Category_Inpatient</th>\n",
" <th>Program Category_Opioid Treatment Program</th>\n",
" <th>Service Type_Intensive Residential</th>\n",
" <th>Service Type_Med Sup Withdrawal - Inpatient</th>\n",
" <th>Service Type_Outpat Chem Depend for Youth</th>\n",
" <th>Service Type_Medically Monitored Withdrawal</th>\n",
" <th>Service Type_Outpatient Rehabilitation</th>\n",
" <th>Service Type_Community Residential</th>\n",
" <th>Service Type_Outpatient Clinic</th>\n",
" <th>Service Type_Inpatient Rehabilitation</th>\n",
" <th>Service Type_Methadone Clinic</th>\n",
" <th>Service Type_Supportive Living</th>\n",
" <th>Service Type_OTP Intensive Residential</th>\n",
" <th>Service Type_Medically Managed Detoxification</th>\n",
" <th>Service Type_Res Rehab for Youth</th>\n",
" <th>Service Type_Meth to Abst - Residential</th>\n",
" <th>Service Type_Specialized OP - TBI</th>\n",
" <th>Service Type_Med Sup Withdrawal - Outpatient</th>\n",
" <th>Service Type_Specialized Services OP Rehab</th>\n",
" <th>Service Type_Specialized OP - Mobile</th>\n",
" <th>Service Type_Long Term Res CD/Youth</th>\n",
" <th>Service Type_Limited Outpatient/KEEP</th>\n",
" <th>Service Type_Short Term Res CD/Youth</th>\n",
" <th>Service Type_Non-Med Sup Chem Dep OP</th>\n",
" <th>Age Group_Under 18</th>\n",
" <th>Age Group_55 and Older</th>\n",
" <th>Age Group_18 thru 24</th>\n",
" <th>Age Group_25 thru 34</th>\n",
" <th>Age Group_45 thru 54</th>\n",
" <th>Age Group_35 thru 44</th>\n",
" <th>Primary Substance Group_Cocaine incl Crack</th>\n",
" <th>Primary Substance Group_Alcohol</th>\n",
" <th>Primary Substance Group_Marijuana incl Hashish</th>\n",
" <th>Primary Substance Group_Heroin</th>\n",
" <th>Primary Substance Group_All Others</th>\n",
" <th>Primary Substance Group_Other Opioids</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location_Suffolk County of Program Location_Onondaga ... Primary Substance Group_All Others Primary Substance Group_Other Opioids Admissions\n",
"0 2009 1 0 ... 0 0 1\n",
"1 2008 0 1 ... 0 0 5\n",
"2 2009 0 0 ... 0 0 18\n",
"3 2007 0 0 ... 0 0 2\n",
"4 2007 1 0 ... 0 0 2\n",
"\n",
"[5 rows x 102 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train_ohe.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"df_val_ohe = enc.transform(df_val)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location_Suffolk</th>\n",
" <th>County of Program Location_Onondaga</th>\n",
" <th>County of Program Location_New York</th>\n",
" <th>County of Program Location_Orange</th>\n",
" <th>County of Program Location_Dutchess</th>\n",
" <th>County of Program Location_Queens</th>\n",
" <th>County of Program Location_Bronx</th>\n",
" <th>County of Program Location_Greene</th>\n",
" <th>County of Program Location_Ontario</th>\n",
" <th>County of Program Location_Ulster</th>\n",
" <th>County of Program Location_Jefferson</th>\n",
" <th>County of Program Location_Sullivan</th>\n",
" <th>County of Program Location_Kings</th>\n",
" <th>County of Program Location_Oneida</th>\n",
" <th>County of Program Location_Niagara</th>\n",
" <th>County of Program Location_Broome</th>\n",
" <th>County of Program Location_Columbia</th>\n",
" <th>County of Program Location_Monroe</th>\n",
" <th>County of Program Location_Nassau</th>\n",
" <th>County of Program Location_Albany</th>\n",
" <th>County of Program Location_Westchester</th>\n",
" <th>County of Program Location_Putnam</th>\n",
" <th>County of Program Location_Fulton</th>\n",
" <th>County of Program Location_Richmond</th>\n",
" <th>County of Program Location_Rockland</th>\n",
" <th>County of Program Location_Erie</th>\n",
" <th>County of Program Location_Steuben</th>\n",
" <th>County of Program Location_Schenectady</th>\n",
" <th>County of Program Location_Delaware</th>\n",
" <th>County of Program Location_Cattaraugus</th>\n",
" <th>County of Program Location_Montgomery</th>\n",
" <th>County of Program Location_Warren</th>\n",
" <th>County of Program Location_Seneca</th>\n",
" <th>County of Program Location_Genesee</th>\n",
" <th>County of Program Location_Chemung</th>\n",
" <th>County of Program Location_Madison</th>\n",
" <th>County of Program Location_Chautauqua</th>\n",
" <th>County of Program Location_Orleans</th>\n",
" <th>County of Program Location_Herkimer</th>\n",
" <th>County of Program Location_Rensselaer</th>\n",
" <th>County of Program Location_Tompkins</th>\n",
" <th>County of Program Location_Tioga</th>\n",
" <th>County of Program Location_Oswego</th>\n",
" <th>County of Program Location_St Lawrence</th>\n",
" <th>County of Program Location_Chenango</th>\n",
" <th>County of Program Location_Livingston</th>\n",
" <th>County of Program Location_Yates</th>\n",
" <th>County of Program Location_Cortland</th>\n",
" <th>County of Program Location_Allegany</th>\n",
" <th>County of Program Location_Cayuga</th>\n",
" <th>County of Program Location_Saratoga</th>\n",
" <th>County of Program Location_Washington</th>\n",
" <th>County of Program Location_Schoharie</th>\n",
" <th>County of Program Location_Wayne</th>\n",
" <th>County of Program Location_Wyoming</th>\n",
" <th>County of Program Location_Franklin</th>\n",
" <th>County of Program Location_Essex</th>\n",
" <th>County of Program Location_Lewis</th>\n",
" <th>County of Program Location_Otsego</th>\n",
" <th>County of Program Location_Clinton</th>\n",
" <th>County of Program Location_Schuyler</th>\n",
" <th>Program Category_Residential</th>\n",
" <th>Program Category_Crisis</th>\n",
" <th>Program Category_Outpatient</th>\n",
" <th>Program Category_Inpatient</th>\n",
" <th>Program Category_Opioid Treatment Program</th>\n",
" <th>Service Type_Intensive Residential</th>\n",
" <th>Service Type_Med Sup Withdrawal - Inpatient</th>\n",
" <th>Service Type_Outpat Chem Depend for Youth</th>\n",
" <th>Service Type_Medically Monitored Withdrawal</th>\n",
" <th>Service Type_Outpatient Rehabilitation</th>\n",
" <th>Service Type_Community Residential</th>\n",
" <th>Service Type_Outpatient Clinic</th>\n",
" <th>Service Type_Inpatient Rehabilitation</th>\n",
" <th>Service Type_Methadone Clinic</th>\n",
" <th>Service Type_Supportive Living</th>\n",
" <th>Service Type_OTP Intensive Residential</th>\n",
" <th>Service Type_Medically Managed Detoxification</th>\n",
" <th>Service Type_Res Rehab for Youth</th>\n",
" <th>Service Type_Meth to Abst - Residential</th>\n",
" <th>Service Type_Specialized OP - TBI</th>\n",
" <th>Service Type_Med Sup Withdrawal - Outpatient</th>\n",
" <th>Service Type_Specialized Services OP Rehab</th>\n",
" <th>Service Type_Specialized OP - Mobile</th>\n",
" <th>Service Type_Long Term Res CD/Youth</th>\n",
" <th>Service Type_Limited Outpatient/KEEP</th>\n",
" <th>Service Type_Short Term Res CD/Youth</th>\n",
" <th>Service Type_Non-Med Sup Chem Dep OP</th>\n",
" <th>Age Group_Under 18</th>\n",
" <th>Age Group_55 and Older</th>\n",
" <th>Age Group_18 thru 24</th>\n",
" <th>Age Group_25 thru 34</th>\n",
" <th>Age Group_45 thru 54</th>\n",
" <th>Age Group_35 thru 44</th>\n",
" <th>Primary Substance Group_Cocaine incl Crack</th>\n",
" <th>Primary Substance Group_Alcohol</th>\n",
" <th>Primary Substance Group_Marijuana incl Hashish</th>\n",
" <th>Primary Substance Group_Heroin</th>\n",
" <th>Primary Substance Group_All Others</th>\n",
" <th>Primary Substance Group_Other Opioids</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>2015</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>2014</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>2014</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>2015</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>2014</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location_Suffolk County of Program Location_Onondaga ... Primary Substance Group_All Others Primary Substance Group_Other Opioids Admissions\n",
"12 2015 1 0 ... 0 0 1\n",
"18 2014 0 0 ... 0 0 1\n",
"23 2014 0 0 ... 0 0 1\n",
"39 2015 0 0 ... 0 0 1\n",
"50 2014 0 1 ... 0 0 2\n",
"\n",
"[5 rows x 102 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_val_ohe.head()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"X_train = df_train_ohe.drop(\"Admissions\", axis=1)\n",
"X_val = df_val_ohe.drop(\"Admissions\", axis=1)\n",
"\n",
"y_train = df_train_ohe[\"Admissions\"]\n",
"y_val = df_val_ohe[\"Admissions\"]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location_Suffolk</th>\n",
" <th>County of Program Location_Onondaga</th>\n",
" <th>County of Program Location_New York</th>\n",
" <th>County of Program Location_Orange</th>\n",
" <th>County of Program Location_Dutchess</th>\n",
" <th>County of Program Location_Queens</th>\n",
" <th>County of Program Location_Bronx</th>\n",
" <th>County of Program Location_Greene</th>\n",
" <th>County of Program Location_Ontario</th>\n",
" <th>County of Program Location_Ulster</th>\n",
" <th>County of Program Location_Jefferson</th>\n",
" <th>County of Program Location_Sullivan</th>\n",
" <th>County of Program Location_Kings</th>\n",
" <th>County of Program Location_Oneida</th>\n",
" <th>County of Program Location_Niagara</th>\n",
" <th>County of Program Location_Broome</th>\n",
" <th>County of Program Location_Columbia</th>\n",
" <th>County of Program Location_Monroe</th>\n",
" <th>County of Program Location_Nassau</th>\n",
" <th>County of Program Location_Albany</th>\n",
" <th>County of Program Location_Westchester</th>\n",
" <th>County of Program Location_Putnam</th>\n",
" <th>County of Program Location_Fulton</th>\n",
" <th>County of Program Location_Richmond</th>\n",
" <th>County of Program Location_Rockland</th>\n",
" <th>County of Program Location_Erie</th>\n",
" <th>County of Program Location_Steuben</th>\n",
" <th>County of Program Location_Schenectady</th>\n",
" <th>County of Program Location_Delaware</th>\n",
" <th>County of Program Location_Cattaraugus</th>\n",
" <th>County of Program Location_Montgomery</th>\n",
" <th>County of Program Location_Warren</th>\n",
" <th>County of Program Location_Seneca</th>\n",
" <th>County of Program Location_Genesee</th>\n",
" <th>County of Program Location_Chemung</th>\n",
" <th>County of Program Location_Madison</th>\n",
" <th>County of Program Location_Chautauqua</th>\n",
" <th>County of Program Location_Orleans</th>\n",
" <th>County of Program Location_Herkimer</th>\n",
" <th>County of Program Location_Rensselaer</th>\n",
" <th>County of Program Location_Tompkins</th>\n",
" <th>County of Program Location_Tioga</th>\n",
" <th>County of Program Location_Oswego</th>\n",
" <th>County of Program Location_St Lawrence</th>\n",
" <th>County of Program Location_Chenango</th>\n",
" <th>County of Program Location_Livingston</th>\n",
" <th>County of Program Location_Yates</th>\n",
" <th>County of Program Location_Cortland</th>\n",
" <th>County of Program Location_Allegany</th>\n",
" <th>County of Program Location_Cayuga</th>\n",
" <th>County of Program Location_Saratoga</th>\n",
" <th>County of Program Location_Washington</th>\n",
" <th>County of Program Location_Schoharie</th>\n",
" <th>County of Program Location_Wayne</th>\n",
" <th>County of Program Location_Wyoming</th>\n",
" <th>County of Program Location_Franklin</th>\n",
" <th>County of Program Location_Essex</th>\n",
" <th>County of Program Location_Lewis</th>\n",
" <th>County of Program Location_Otsego</th>\n",
" <th>County of Program Location_Clinton</th>\n",
" <th>County of Program Location_Schuyler</th>\n",
" <th>Program Category_Residential</th>\n",
" <th>Program Category_Crisis</th>\n",
" <th>Program Category_Outpatient</th>\n",
" <th>Program Category_Inpatient</th>\n",
" <th>Program Category_Opioid Treatment Program</th>\n",
" <th>Service Type_Intensive Residential</th>\n",
" <th>Service Type_Med Sup Withdrawal - Inpatient</th>\n",
" <th>Service Type_Outpat Chem Depend for Youth</th>\n",
" <th>Service Type_Medically Monitored Withdrawal</th>\n",
" <th>Service Type_Outpatient Rehabilitation</th>\n",
" <th>Service Type_Community Residential</th>\n",
" <th>Service Type_Outpatient Clinic</th>\n",
" <th>Service Type_Inpatient Rehabilitation</th>\n",
" <th>Service Type_Methadone Clinic</th>\n",
" <th>Service Type_Supportive Living</th>\n",
" <th>Service Type_OTP Intensive Residential</th>\n",
" <th>Service Type_Medically Managed Detoxification</th>\n",
" <th>Service Type_Res Rehab for Youth</th>\n",
" <th>Service Type_Meth to Abst - Residential</th>\n",
" <th>Service Type_Specialized OP - TBI</th>\n",
" <th>Service Type_Med Sup Withdrawal - Outpatient</th>\n",
" <th>Service Type_Specialized Services OP Rehab</th>\n",
" <th>Service Type_Specialized OP - Mobile</th>\n",
" <th>Service Type_Long Term Res CD/Youth</th>\n",
" <th>Service Type_Limited Outpatient/KEEP</th>\n",
" <th>Service Type_Short Term Res CD/Youth</th>\n",
" <th>Service Type_Non-Med Sup Chem Dep OP</th>\n",
" <th>Age Group_Under 18</th>\n",
" <th>Age Group_55 and Older</th>\n",
" <th>Age Group_18 thru 24</th>\n",
" <th>Age Group_25 thru 34</th>\n",
" <th>Age Group_45 thru 54</th>\n",
" <th>Age Group_35 thru 44</th>\n",
" <th>Primary Substance Group_Cocaine incl Crack</th>\n",
" <th>Primary Substance Group_Alcohol</th>\n",
" <th>Primary Substance Group_Marijuana incl Hashish</th>\n",
" <th>Primary Substance Group_Heroin</th>\n",
" <th>Primary Substance Group_All Others</th>\n",
" <th>Primary Substance Group_Other Opioids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location_Suffolk ... Primary Substance Group_All Others Primary Substance Group_Other Opioids\n",
"0 2009 1 ... 0 0\n",
"1 2008 0 ... 0 0\n",
"2 2009 0 ... 0 0\n",
"3 2007 0 ... 0 0\n",
"4 2007 1 ... 0 0\n",
"\n",
"[5 rows x 101 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
" max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=6,\n",
" oob_score=False, random_state=22, verbose=0, warm_start=False)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mdl = RandomForestRegressor(n_jobs=6, n_estimators=100, random_state=22)\n",
"mdl.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"p_ohe = mdl.predict(X_val)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 3.78, 3.06, 18.99, 3.19, 36.27])"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"p_ohe[:5]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"14.194968009749408"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics import mean_absolute_error\n",
"mean_absolute_error(y_val, p_ohe)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Ordinal"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location</th>\n",
" <th>Program Category</th>\n",
" <th>Service Type</th>\n",
" <th>Age Group</th>\n",
" <th>Primary Substance Group</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>Suffolk</td>\n",
" <td>Residential</td>\n",
" <td>Intensive Residential</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>Onondaga</td>\n",
" <td>Crisis</td>\n",
" <td>Med Sup Withdrawal - Inpatient</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>New York</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpat Chem Depend for Youth</td>\n",
" <td>Under 18</td>\n",
" <td>Marijuana incl Hashish</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>Orange</td>\n",
" <td>Crisis</td>\n",
" <td>Medically Monitored Withdrawal</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>Suffolk</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpatient Rehabilitation</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location Program Category Service Type Age Group Primary Substance Group Admissions\n",
"0 2009 Suffolk Residential Intensive Residential Under 18 Cocaine incl Crack 1\n",
"1 2008 Onondaga Crisis Med Sup Withdrawal - Inpatient Under 18 Alcohol 5\n",
"2 2009 New York Outpatient Outpat Chem Depend for Youth Under 18 Marijuana incl Hashish 18\n",
"3 2007 Orange Crisis Medically Monitored Withdrawal Under 18 Cocaine incl Crack 2\n",
"4 2007 Suffolk Outpatient Outpatient Rehabilitation Under 18 Alcohol 2"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"from category_encoders.ordinal import OrdinalEncoder\n",
"enc = OrdinalEncoder(cols=['County of Program Location', 'Program Category', 'Service Type', 'Age Group', 'Primary Substance Group'])\n",
"enc.fit(df_train)\n",
"df_train_ord = enc.transform(df_train)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location</th>\n",
" <th>Program Category</th>\n",
" <th>Service Type</th>\n",
" <th>Age Group</th>\n",
" <th>Primary Substance Group</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location Program Category Service Type Age Group Primary Substance Group Admissions\n",
"0 2009 1 1 1 1 1 1\n",
"1 2008 2 2 2 1 2 5\n",
"2 2009 3 3 3 1 3 18\n",
"3 2007 4 2 4 1 1 2\n",
"4 2007 1 3 5 1 2 2"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train_ord.head()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"df_val_ord = enc.transform(df_val)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location</th>\n",
" <th>Program Category</th>\n",
" <th>Service Type</th>\n",
" <th>Age Group</th>\n",
" <th>Primary Substance Group</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>2015</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>2014</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>2014</td>\n",
" <td>26</td>\n",
" <td>2</td>\n",
" <td>2.0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>2015</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>4.0</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>2014</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2.0</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location Program Category Service Type Age Group Primary Substance Group Admissions\n",
"12 2015 1 3 5.0 1 4 1\n",
"18 2014 4 2 4.0 1 3 1\n",
"23 2014 26 2 2.0 1 2 1\n",
"39 2015 9 2 4.0 1 4 1\n",
"50 2014 2 2 2.0 1 4 2"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_val_ord.head()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
" max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=6,\n",
" oob_score=False, random_state=22, verbose=0, warm_start=False)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train = df_train_ord.drop(\"Admissions\", axis=1)\n",
"X_val = df_val_ord.drop(\"Admissions\", axis=1)\n",
"\n",
"y_train = df_train_ord[\"Admissions\"]\n",
"y_val = df_val_ord[\"Admissions\"]\n",
"\n",
"mdl = RandomForestRegressor(n_jobs=6, n_estimators=100, random_state=22)\n",
"mdl.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"p_ord = mdl.predict(X_val)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"14.790480996267807"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics import mean_absolute_error\n",
"mean_absolute_error(y_val, p_ord)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Target Encoding"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location</th>\n",
" <th>Program Category</th>\n",
" <th>Service Type</th>\n",
" <th>Age Group</th>\n",
" <th>Primary Substance Group</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>Suffolk</td>\n",
" <td>Residential</td>\n",
" <td>Intensive Residential</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>Onondaga</td>\n",
" <td>Crisis</td>\n",
" <td>Med Sup Withdrawal - Inpatient</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>New York</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpat Chem Depend for Youth</td>\n",
" <td>Under 18</td>\n",
" <td>Marijuana incl Hashish</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>Orange</td>\n",
" <td>Crisis</td>\n",
" <td>Medically Monitored Withdrawal</td>\n",
" <td>Under 18</td>\n",
" <td>Cocaine incl Crack</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>Suffolk</td>\n",
" <td>Outpatient</td>\n",
" <td>Outpatient Rehabilitation</td>\n",
" <td>Under 18</td>\n",
" <td>Alcohol</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location Program Category Service Type Age Group Primary Substance Group Admissions\n",
"0 2009 Suffolk Residential Intensive Residential Under 18 Cocaine incl Crack 1\n",
"1 2008 Onondaga Crisis Med Sup Withdrawal - Inpatient Under 18 Alcohol 5\n",
"2 2009 New York Outpatient Outpat Chem Depend for Youth Under 18 Marijuana incl Hashish 18\n",
"3 2007 Orange Crisis Medically Monitored Withdrawal Under 18 Cocaine incl Crack 2\n",
"4 2007 Suffolk Outpatient Outpatient Rehabilitation Under 18 Alcohol 2"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"from category_encoders.target_encoder import TargetEncoder\n",
"enc = TargetEncoder(cols=['County of Program Location', 'Program Category', 'Service Type', 'Age Group', 'Primary Substance Group'],\n",
" min_samples_leaf=200)\n",
"enc.fit(df_train, df_train['Admissions'])\n",
"df_train_trg = enc.transform(df_train)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>County of Program Location</th>\n",
" <th>Program Category</th>\n",
" <th>Service Type</th>\n",
" <th>Age Group</th>\n",
" <th>Primary Substance Group</th>\n",
" <th>Admissions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2009</td>\n",
" <td>84.365213</td>\n",
" <td>12.540684</td>\n",
" <td>26.125614</td>\n",
" <td>25.444371</td>\n",
" <td>33.193930</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>40.103075</td>\n",
" <td>68.736003</td>\n",
" <td>63.489893</td>\n",
" <td>25.444371</td>\n",
" <td>96.876860</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009</td>\n",
" <td>145.838906</td>\n",
" <td>57.269020</td>\n",
" <td>45.819998</td>\n",
" <td>25.444371</td>\n",
" <td>48.186532</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2007</td>\n",
" <td>29.188417</td>\n",
" <td>68.736003</td>\n",
" <td>42.527388</td>\n",
" <td>25.444371</td>\n",
" <td>33.193930</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2007</td>\n",
" <td>84.365213</td>\n",
" <td>57.269020</td>\n",
" <td>14.000000</td>\n",
" <td>25.444371</td>\n",
" <td>96.876860</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year County of Program Location Program Category Service Type Age Group Primary Substance Group Admissions\n",
"0 2009 84.365213 12.540684 26.125614 25.444371 33.193930 1\n",
"1 2008 40.103075 68.736003 63.489893 25.444371 96.876860 5\n",
"2 2009 145.838906 57.269020 45.819998 25.444371 48.186532 18\n",
"3 2007 29.188417 68.736003 42.527388 25.444371 33.193930 2\n",
"4 2007 84.365213 57.269020 14.000000 25.444371 96.876860 2"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train_trg.head()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"df_val_trg = enc.transform(df_val)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
" max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=6,\n",
" oob_score=False, random_state=22, verbose=0, warm_start=False)"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train = df_train_trg.drop(\"Admissions\", axis=1)\n",
"X_val = df_val_trg.drop(\"Admissions\", axis=1)\n",
"\n",
"y_train = df_train_trg[\"Admissions\"]\n",
"y_val = df_val_trg[\"Admissions\"]\n",
"\n",
"mdl = RandomForestRegressor(n_jobs=6, n_estimators=100, random_state=22)\n",
"mdl.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"p_trg = mdl.predict(X_val)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"14.180224851897199"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics import mean_absolute_error\n",
"mean_absolute_error(y_val, p_trg)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\u001b[0;31mInit signature:\u001b[0m\n",
"\u001b[0mTargetEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'verbose=0'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'cols=None'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'drop_invariant=False'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'return_df=True'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"handle_missing='value'\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"handle_unknown='value'\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'min_samples_leaf=1'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'smoothing=1.0'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mDocstring:\u001b[0m \n",
"Target encoding for categorical features.\n",
"\n",
"For the case of categorical target: features are replaced with a blend of posterior probability of the target\n",
"given particular categorical value and the prior probability of the target over all the training data.\n",
"\n",
"For the case of continuous target: features are replaced with a blend of the expected value of the target\n",
"given particular categorical value and the expected value of the target over all the training data.\n",
"\n",
"Parameters\n",
"----------\n",
"\n",
"verbose: int\n",
" integer indicating verbosity of the output. 0 for none.\n",
"cols: list\n",
" a list of columns to encode, if None, all string columns will be encoded.\n",
"drop_invariant: bool\n",
" boolean for whether or not to drop columns with 0 variance.\n",
"return_df: bool\n",
" boolean for whether to return a pandas DataFrame from transform (otherwise it will be a numpy array).\n",
"handle_unknown: str\n",
" options are 'error', 'return_nan' and 'value', defaults to 'value', which will impute the target mean.\n",
"min_samples_leaf: int\n",
" minimum samples to take category average into account.\n",
"smoothing: float\n",
" smoothing effect to balance categorical average vs prior. Higher value means stronger regularization.\n",
" The value must be strictly bigger than 0.\n",
"\n",
"Example\n",
"-------\n",
">>> from category_encoders import *\n",
">>> import pandas as pd\n",
">>> from sklearn.datasets import load_boston\n",
">>> bunch = load_boston()\n",
">>> y = bunch.target\n",
">>> X = pd.DataFrame(bunch.data, columns=bunch.feature_names)\n",
">>> enc = TargetEncoder(cols=['CHAS', 'RAD']).fit(X, y)\n",
">>> numeric_dataset = enc.transform(X)\n",
">>> print(numeric_dataset.info())\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 506 entries, 0 to 505\n",
"Data columns (total 13 columns):\n",
"CRIM 506 non-null float64\n",
"ZN 506 non-null float64\n",
"INDUS 506 non-null float64\n",
"CHAS 506 non-null float64\n",
"NOX 506 non-null float64\n",
"RM 506 non-null float64\n",
"AGE 506 non-null float64\n",
"DIS 506 non-null float64\n",
"RAD 506 non-null float64\n",
"TAX 506 non-null float64\n",
"PTRATIO 506 non-null float64\n",
"B 506 non-null float64\n",
"LSTAT 506 non-null float64\n",
"dtypes: float64(13)\n",
"memory usage: 51.5 KB\n",
"None\n",
"\n",
"References\n",
"----------\n",
"\n",
".. [1] A Preprocessing Scheme for High-Cardinality Categorical Attributes in Classification and Prediction Problems, from\n",
"https://dl.acm.org/citation.cfm?id=507538\n",
"\u001b[0;31mFile:\u001b[0m ~/anaconda3/lib/python3.6/site-packages/category_encoders/target_encoder.py\n",
"\u001b[0;31mType:\u001b[0m type\n",
"\u001b[0;31mSubclasses:\u001b[0m \n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"?TargetEncoder"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment