Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tatamiya/28431be826bcb6d0e94756ff8bac427e to your computer and use it in GitHub Desktop.
Save tatamiya/28431be826bcb6d0e94756ff8bac427e to your computer and use it in GitHub Desktop.
Display Feature Importances in a Recommendation Algorithm (LightFM)
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[What's This Notebook?]\n",
"\n",
"- Display feature importances in a recommendation algorithm.\n",
" - linear coefficients base\n",
" - purmutation importances\n",
"\n",
"[Settings]\n",
"- Algorithm: LightFM\n",
" - https://github.com/lyst/lightfm\n",
" - https://tatamiya-practice.hatenablog.com/entry/2020/03/21/203332\n",
"- Data: MovieLens\n",
" - https://grouplens.org/datasets/movielens/"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/tatamiya/.pyenv/versions/3.7.4/envs/recommend/lib/python3.7/site-packages/lightfm/_lightfm_fast.py:9: UserWarning: LightFM was compiled without OpenMP support. Only a single thread will be used.\n",
" warnings.warn('LightFM was compiled without OpenMP support. '\n"
]
}
],
"source": [
"from lightfm import LightFM, evaluation\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.sparse as sp"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'1.0.1'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.__version__"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movieId</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Toy Story (1995)</td>\n",
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Jumanji (1995)</td>\n",
" <td>Adventure|Children|Fantasy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Grumpier Old Men (1995)</td>\n",
" <td>Comedy|Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Waiting to Exhale (1995)</td>\n",
" <td>Comedy|Drama|Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Father of the Bride Part II (1995)</td>\n",
" <td>Comedy</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movieId title \\\n",
"0 1 Toy Story (1995) \n",
"1 2 Jumanji (1995) \n",
"2 3 Grumpier Old Men (1995) \n",
"3 4 Waiting to Exhale (1995) \n",
"4 5 Father of the Bride Part II (1995) \n",
"\n",
" genres \n",
"0 Adventure|Animation|Children|Comedy|Fantasy \n",
"1 Adventure|Children|Fantasy \n",
"2 Comedy|Romance \n",
"3 Comedy|Drama|Romance \n",
"4 Comedy "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_movies = pd.read_csv('data/ml-latest-small/movies.csv')\n",
"df_movies.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dataset\n",
"- create user-item interaction matrix"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>userId</th>\n",
" <th>movieId</th>\n",
" <th>rating</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4.0</td>\n",
" <td>964982703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4.0</td>\n",
" <td>964981247</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>4.0</td>\n",
" <td>964982224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>47</td>\n",
" <td>5.0</td>\n",
" <td>964983815</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>50</td>\n",
" <td>5.0</td>\n",
" <td>964982931</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" userId movieId rating timestamp\n",
"0 1 1 4.0 964982703\n",
"1 1 3 4.0 964981247\n",
"2 1 6 4.0 964982224\n",
"3 1 47 5.0 964983815\n",
"4 1 50 5.0 964982931"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ratings = pd.read_csv('data/ml-latest-small/ratings.csv')\n",
"df_ratings.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"class DataSet():\n",
" def __init__(self, df, col_user, col_item, col_interaction, item_master=None, user_master=None):\n",
" self.col_user, self.col_item, self.col_interaction = col_user, col_item, col_interaction\n",
" \n",
" if user_master is None:\n",
" self.user2index, self.index2user = self._label_indexer(df[self.col_user])\n",
" else:\n",
" self.user2index, self.index2user = self._label_indexer(user_master[self.col_user])\n",
" \n",
" if item_master is None:\n",
" self.item2index, self.index2item = self._label_indexer(df[self.col_item])\n",
" else:\n",
" self.item2index, self.index2item = self._label_indexer(item_master[self.col_item])\n",
" \n",
" self.n_users, self.n_items = len(self.index2user), len(self.index2item)\n",
" \n",
" data = df[self.col_interaction].values\n",
" row_ind = df[self.col_user].map(self.user2index).values\n",
" col_ind = df[self.col_item].map(self.item2index).values\n",
" self.ui_matrix = sp.csr_matrix((data, (row_ind, col_ind)))\n",
" \n",
" \n",
" def _label_indexer(self, X):\n",
" label2index = {label: i for i, label in enumerate(X.unique())}\n",
" index2label = {i: label for label, i in label2index.items()}\n",
" \n",
" return label2index, index2label\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"dataset = DataSet(df_ratings, 'userId', 'movieId', 'rating', item_master=df_movies)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"csr_ui_matrix = dataset.ui_matrix"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9742"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.n_items"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"610"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.n_users"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Item Features\n",
"\n",
"Create item features from the movie master:\n",
"\n",
"- year and recency\n",
"- genre"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movieId</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Toy Story (1995)</td>\n",
" <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Jumanji (1995)</td>\n",
" <td>Adventure|Children|Fantasy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Grumpier Old Men (1995)</td>\n",
" <td>Comedy|Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Waiting to Exhale (1995)</td>\n",
" <td>Comedy|Drama|Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Father of the Bride Part II (1995)</td>\n",
" <td>Comedy</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movieId title \\\n",
"0 1 Toy Story (1995) \n",
"1 2 Jumanji (1995) \n",
"2 3 Grumpier Old Men (1995) \n",
"3 4 Waiting to Exhale (1995) \n",
"4 5 Father of the Bride Part II (1995) \n",
"\n",
" genres \n",
"0 Adventure|Animation|Children|Comedy|Fantasy \n",
"1 Adventure|Children|Fantasy \n",
"2 Comedy|Romance \n",
"3 Comedy|Drama|Romance \n",
"4 Comedy "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_movies.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Genres"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"df_movies_tmp = df_movies.copy()\n",
"df_movies_tmp['genres_split'] = df_movies['genres'].str.split('|')\n",
"df_genres_explode = df_movies_tmp[['movieId', 'genres_split']].explode('genres_split')\n",
"\n",
"df_genres_explode['values'] = 1\n",
"df_genres_pivot = df_genres_explode.pivot_table(index='movieId', columns='genres_split', values='values', aggfunc='count', fill_value=0)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>genres_split</th>\n",
" <th>(no genres listed)</th>\n",
" <th>Action</th>\n",
" <th>Adventure</th>\n",
" <th>Animation</th>\n",
" <th>Children</th>\n",
" <th>Comedy</th>\n",
" <th>Crime</th>\n",
" <th>Documentary</th>\n",
" <th>Drama</th>\n",
" <th>Fantasy</th>\n",
" <th>Film-Noir</th>\n",
" <th>Horror</th>\n",
" <th>IMAX</th>\n",
" <th>Musical</th>\n",
" <th>Mystery</th>\n",
" <th>Romance</th>\n",
" <th>Sci-Fi</th>\n",
" <th>Thriller</th>\n",
" <th>War</th>\n",
" <th>Western</th>\n",
" </tr>\n",
" <tr>\n",
" <th>movieId</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"genres_split (no genres listed) Action Adventure Animation Children \\\n",
"movieId \n",
"1 0 0 1 1 1 \n",
"2 0 0 1 0 1 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"5 0 0 0 0 0 \n",
"\n",
"genres_split Comedy Crime Documentary Drama Fantasy Film-Noir Horror \\\n",
"movieId \n",
"1 1 0 0 0 1 0 0 \n",
"2 0 0 0 0 1 0 0 \n",
"3 1 0 0 0 0 0 0 \n",
"4 1 0 0 1 0 0 0 \n",
"5 1 0 0 0 0 0 0 \n",
"\n",
"genres_split IMAX Musical Mystery Romance Sci-Fi Thriller War Western \n",
"movieId \n",
"1 0 0 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 0 \n",
"3 0 0 0 1 0 0 0 0 \n",
"4 0 0 0 1 0 0 0 0 \n",
"5 0 0 0 0 0 0 0 0 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_genres_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Year and Recency\n",
"- Extract release year from the movie title.\n",
" - in parantheses\n",
" - If no release year, then set it as 1900 tentatively.\n",
"- Scale it into [0, 1] range and call it 'recency'"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df_movies_tmp['year'] = df_movies['title'].str.extract('.*\\((.*)\\).*')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movieId</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" <th>genres_split</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>6059</th>\n",
" <td>40697</td>\n",
" <td>Babylon 5</td>\n",
" <td>Sci-Fi</td>\n",
" <td>[Sci-Fi]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9031</th>\n",
" <td>140956</td>\n",
" <td>Ready Player One</td>\n",
" <td>Action|Sci-Fi|Thriller</td>\n",
" <td>[Action, Sci-Fi, Thriller]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9091</th>\n",
" <td>143410</td>\n",
" <td>Hyena Road</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9138</th>\n",
" <td>147250</td>\n",
" <td>The Adventures of Sherlock Holmes and Doctor W...</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9179</th>\n",
" <td>149334</td>\n",
" <td>Nocturnal Animals</td>\n",
" <td>Drama|Thriller</td>\n",
" <td>[Drama, Thriller]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9259</th>\n",
" <td>156605</td>\n",
" <td>Paterson</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9367</th>\n",
" <td>162414</td>\n",
" <td>Moonlight</td>\n",
" <td>Drama</td>\n",
" <td>[Drama]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9448</th>\n",
" <td>167570</td>\n",
" <td>The OA</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9514</th>\n",
" <td>171495</td>\n",
" <td>Cosmos</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9515</th>\n",
" <td>171631</td>\n",
" <td>Maria Bamford: Old Baby</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9518</th>\n",
" <td>171749</td>\n",
" <td>Death Note: Desu nôto (2006–2007)</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>2006–2007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9525</th>\n",
" <td>171891</td>\n",
" <td>Generation Iron 2</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9611</th>\n",
" <td>176601</td>\n",
" <td>Black Mirror</td>\n",
" <td>(no genres listed)</td>\n",
" <td>[(no genres listed)]</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movieId title \\\n",
"6059 40697 Babylon 5 \n",
"9031 140956 Ready Player One \n",
"9091 143410 Hyena Road \n",
"9138 147250 The Adventures of Sherlock Holmes and Doctor W... \n",
"9179 149334 Nocturnal Animals \n",
"9259 156605 Paterson \n",
"9367 162414 Moonlight \n",
"9448 167570 The OA \n",
"9514 171495 Cosmos \n",
"9515 171631 Maria Bamford: Old Baby \n",
"9518 171749 Death Note: Desu nôto (2006–2007) \n",
"9525 171891 Generation Iron 2 \n",
"9611 176601 Black Mirror \n",
"\n",
" genres genres_split year \n",
"6059 Sci-Fi [Sci-Fi] NaN \n",
"9031 Action|Sci-Fi|Thriller [Action, Sci-Fi, Thriller] NaN \n",
"9091 (no genres listed) [(no genres listed)] NaN \n",
"9138 (no genres listed) [(no genres listed)] NaN \n",
"9179 Drama|Thriller [Drama, Thriller] NaN \n",
"9259 (no genres listed) [(no genres listed)] NaN \n",
"9367 Drama [Drama] NaN \n",
"9448 (no genres listed) [(no genres listed)] NaN \n",
"9514 (no genres listed) [(no genres listed)] NaN \n",
"9515 (no genres listed) [(no genres listed)] NaN \n",
"9518 (no genres listed) [(no genres listed)] 2006–2007 \n",
"9525 (no genres listed) [(no genres listed)] NaN \n",
"9611 (no genres listed) [(no genres listed)] NaN "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_movies_tmp[~(df_movies_tmp['year'].str.isdecimal() == True)]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"df_movies_tmp['year'] = df_movies_tmp['year'].replace({'2006–2007': '2006'}).fillna('1900').astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 9742.000000\n",
"mean 1994.498255\n",
"std 18.818171\n",
"min 1900.000000\n",
"25% 1987.000000\n",
"50% 1999.000000\n",
"75% 2008.000000\n",
"max 2018.000000\n",
"Name: year, dtype: float64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_movies_tmp['year'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x11e203f10>"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAD4CAYAAAD7CAEUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAV+ElEQVR4nO3dfbRddX3n8fcHggg+lFBiBgM26IQ6MAWkEZg+TFUWz6sN1urCas1CVuNahbV0jZ0h2K6B1mEtdKx0mFpWack0WBXxqaaFlgmMU8bp8JAgAgGRKw8lMUIkVERbEPqdP87vlkN6783Z4Z57zk3er7XOunt/92/v8/vdneST/XD2SVUhSdKg9hp1ByRJ84vBIUnqxOCQJHVicEiSOjE4JEmdLBh1B4bhoIMOqqVLl466G5I0r2zcuPG7VbVoZ+12y+BYunQpGzZsGHU3JGleSfLwIO08VSVJ6sTgkCR1YnBIkjoxOCRJnRgckqRODA5JUicGhySpE4NDktSJwSFJ6mS3/OS4JAEsXX3tyN77oUvOGNl7D5tHHJKkTgwOSVInBockqRODQ5LUicEhSerE4JAkdWJwSJI6MTgkSZ0YHJKkTgwOSVInBockqRODQ5LUicEhSerE4JAkdWJwSJI6MTgkSZ0MLTiSvDTJrUm+nmRTkt9p9cOS3JJkIslnk7yk1fdt8xNt+dK+bV3Q6vclOWVYfZYk7dwwjzieBt5SVUcDxwCnJjkB+AhwaVX9a+AJ4JzW/hzgiVa/tLUjyRHAWcCRwKnAHybZe4j9liTNYGjBUT1Ptdl92quAtwCfb/W1wJltekWbpy0/MUla/eqqerqqHgQmgOOG1W9J0syGeo0jyd5J7gAeA9YD3wL+vqqebU02A0va9BLgEYC2/HvAj/fXp1in/71WJdmQZMO2bduGMRxJEkMOjqp6rqqOAQ6hd5Tw+iG+1xVVtbyqli9atGhYbyNJe7w5uauqqv4e+Arw74ADkixoiw4BtrTpLcChAG35jwGP99enWEeSNMeGeVfVoiQHtOn9gJOAe+kFyK+0ZiuBL7fpdW2etvx/VVW1+lntrqvDgGXArcPqtyRpZgt23mSXHQysbXdA7QVcU1V/meQe4Ook/wX4GnBla38l8MkkE8B2endSUVWbklwD3AM8C5xbVc8Nsd+SpBkMLTiq6k7gDVPUH2CKu6Kq6h+Bt0+zrYuBi2e7j5Kk7vzkuCSpE4NDktSJwSFJ6sTgkCR1YnBIkjoxOCRJnRgckqRODA5JUicGhySpk2E+ckSS9lhLV187kvd96JIzhv4eHnFIkjoxOCRJnRgckqRODA5JUicGhySpE4NDktSJwSFJ6sTgkCR1YnBIkjoxOCRJnRgckqRODA5JUidDC44khyb5SpJ7kmxK8v5WvyjJliR3tNfpfetckGQiyX1JTumrn9pqE0lWD6vPkqSdG+bTcZ8FPlhVtyd5BbAxyfq27NKq+lh/4yRHAGcBRwKvBm5Icnhb/AngJGAzcFuSdVV1zxD7LkmaxtCCo6q2Alvb9PeT3AssmWGVFcDVVfU08GCSCeC4tmyiqh4ASHJ1a2twSNIIzMk1jiRLgTcAt7TSeUnuTLImycJWWwI80rfa5labrr7je6xKsiHJhm3bts3yCCRJk4YeHEleDnwB+EBVPQlcDrwOOIbeEcnvzcb7VNUVVbW8qpYvWrRoNjYpSZrCUL8BMMk+9ELjU1X1RYCqerRv+R8Df9lmtwCH9q1+SKsxQ12SNMeGeVdVgCuBe6vq4331g/uavRW4u02vA85Ksm+Sw4BlwK3AbcCyJIcleQm9C+jrhtVvSdLMhnnE8bPArwF3Jbmj1T4EvDPJMUABDwHvA6iqTUmuoXfR+1ng3Kp6DiDJecD1wN7AmqraNMR+S5JmMMy7qr4KZIpF182wzsXAxVPUr5tpPUnS3PGT45KkTgwOSVInBockqRODQ5LUicEhSerE4JAkdWJwSJI6MTgkSZ0YHJKkTgwOSVInBockqRODQ5LUicEhSerE4JAkdWJwSJI6MTgkSZ0YHJKkTgwOSVInAwVHkp8adkckSfPDoEccf5jk1iS/keTHhtojSdJYGyg4qurngXcBhwIbk3w6yUlD7ZkkaSwNfI2jqu4Hfhs4H/gF4LIk30jyy8PqnCRp/Ax6jeOoJJcC9wJvAX6xqv5Nm750mnUOTfKVJPck2ZTk/a1+YJL1Se5vPxe2epJclmQiyZ1Jju3b1srW/v4kK1/kmCVJL8KgRxz/HbgdOLqqzq2q2wGq6tv0jkKm8izwwao6AjgBODfJEcBq4MaqWgbc2OYBTgOWtdcq4HLoBQ1wIXA8cBxw4WTYSJLm3qDBcQbw6ar6B4AkeyXZH6CqPjnVClW1tS9gvk/vaGUJsAJY25qtBc5s0yuAq6rnZuCAJAcDpwDrq2p7VT0BrAdO7ThOSdIsGTQ4bgD265vfv9UGkmQp8AbgFmBxVW1ti74DLG7TS4BH+lbb3GrT1Xd8j1VJNiTZsG3btkG7JknqaNDgeGlVPTU506b3H2TFJC8HvgB8oKqe7F9WVQXUgH2YUVVdUVXLq2r5okWLZmOTkqQpDBocP9jhYvVPA/+ws5WS7EMvND5VVV9s5UfbKSjaz8dafQu9230nHdJq09UlSSMwaHB8APhckv+T5KvAZ4HzZlohSYArgXur6uN9i9YBk3dGrQS+3Fd/T7u76gTge+2U1vXAyUkWtoviJ7eaJGkEFgzSqKpuS/J64Cdb6b6q+tFOVvtZ4NeAu5Lc0WofAi4BrklyDvAw8I627DrgdGAC+CFwdnvv7Uk+DNzW2v1uVW0fpN+SpNk3UHA0bwSWtnWOTUJVXTVd46r6KpBpFp84RfsCzp1mW2uANR36KkkakoGCI8kngdcBdwDPtXIB0waHJGn3NOgRx3LgiHZUIEnagw16cfxu4F8NsyOSpPlh0COOg4B7ktwKPD1ZrKpfGkqvJElja9DguGiYnZAkzR+D3o77N0l+AlhWVTe051TtPdyuSZLG0aCPVf914PPAH7XSEuDPh9UpSdL4GvRU1bn0Hml+C/S+1CnJq4bWK0m7laWrrx11FzSLBr2r6umqemZyJskCZunhhJKk+WXQ4PibJB8C9mvfNf454C+G1y1J0rgaNDhWA9uAu4D30Xuu1HTf/CdJ2o0NelfVPwF/3F6SpD3YoM+qepAprmlU1WtnvUeSpLHW5VlVk14KvB04cPa7I0kadwNd46iqx/teW6rq94Ezhtw3SdIYGvRU1bF9s3vROwLp8l0ekqTdxKD/+P9e3/SzwEM8/819kqQ9yKB3Vb152B2RJM0Pg56q+g8zLa+qj89OdyRJ467LXVVvBNa1+V8EbgXuH0anJEnja9DgOAQ4tqq+D5DkIuDaqnr3sDomSRpPgz5yZDHwTN/8M60mSdrDDBocVwG3JrmoHW3cAqydaYUka5I8luTuvtpFSbYkuaO9Tu9bdkGSiST3JTmlr35qq00kWd1pdJKkWTfoXVUXJ/kr4Odb6eyq+tpOVvtT4A/ohU6/S6vqY/2FJEcAZwFHAq8GbkhyeFv8CeAkYDNwW5J1VXXPIP2WJM2+QY84APYHnqyq/wZsTnLYTI2r6iZg+4DbXgFcXVVPV9WDwAS9L446Dpioqgfa94Fc3dpKkkZk0K+OvRA4H7iglfYB/mwX3/O8JHe2U1kLW20J8Ehfm82tNl1dkjQigx5xvBX4JeAHAFX1beAVu/B+lwOvA44BtvLCT6S/KElWJdmQZMO2bdtma7OSpB0MGhzPVFXRHq2e5GW78mZV9WhVPdf3/R7HtUVbgEP7mh7SatPVp9r2FVW1vKqWL1q0aFe6J0kawKDBcU2SPwIOSPLrwA3swpc6JTm4b/atwOQdV+uAs5Ls266dLKP3AcPbgGVJDkvyEnoX0NchSRqZnd5VlSTAZ4HXA08CPwn856pav5P1PgO8CTgoyWbgQuBNSY6hd+TyEL2voaWqNiW5BriH3kMUz62q59p2zgOuB/YG1lTVpu7DlCTNlp0GR1VVkuuq6qeAGcNih/XeOUX5yhnaXwxcPEX9OnrfcS5JGgODnqq6Pckbh9oTSdK8MOizqo4H3p3kIXp3VoXewchRw+qYJGk8zRgcSV5TVX8HnDJTO0nSnmNnRxx/Tu+puA8n+UJVvW0uOiVJGl87u8aRvunXDrMjkqT5YWfBUdNMS5L2UDs7VXV0kifpHXns16bh+Yvjrxxq7yRJY2fG4KiqveeqI5Kk+aHLY9UlSTI4JEndGBySpE4MDklSJwaHJKkTg0OS1InBIUnqxOCQJHVicEiSOjE4JEmdGBySpE4MDklSJwaHJKkTg0OS1MnQgiPJmiSPJbm7r3ZgkvVJ7m8/F7Z6klyWZCLJnUmO7VtnZWt/f5KVw+qvJGkwwzzi+FPg1B1qq4Ebq2oZcGObBzgNWNZeq4DLoRc0wIXA8cBxwIWTYSNJGo2hBUdV3QRs36G8AljbptcCZ/bVr6qem4EDkhwMnAKsr6rtVfUEsJ5/GUaSpDk019c4FlfV1jb9HWBxm14CPNLXbnOrTVeXJI3IyC6OV1UBNVvbS7IqyYYkG7Zt2zZbm5Uk7WCug+PRdgqK9vOxVt8CHNrX7pBWm67+L1TVFVW1vKqWL1q0aNY7LknqmevgWAdM3hm1EvhyX/097e6qE4DvtVNa1wMnJ1nYLoqf3GqSpBFZMKwNJ/kM8CbgoCSb6d0ddQlwTZJzgIeBd7Tm1wGnAxPAD4GzAapqe5IPA7e1dr9bVTtecJckzaGhBUdVvXOaRSdO0baAc6fZzhpgzSx2TZL0IvjJcUlSJwaHJKkTg0OS1InBIUnqxOCQJHVicEiSOjE4JEmdGBySpE4MDklSJwaHJKkTg0OS1MnQnlUlabwsXX3tqLug3YRHHJKkTgwOSVInBockqRODQ5LUicEhSerE4JAkdWJwSJI6MTgkSZ0YHJKkTgwOSVInBockqZORBEeSh5LcleSOJBta7cAk65Pc334ubPUkuSzJRJI7kxw7ij5LknpGecTx5qo6pqqWt/nVwI1VtQy4sc0DnAYsa69VwOVz3lNJ0j8bp1NVK4C1bXotcGZf/arquRk4IMnBo+igJGl0wVHA/0yyMcmqVltcVVvb9HeAxW16CfBI37qbW+0FkqxKsiHJhm3btg2r35K0xxvV93H8XFVtSfIqYH2Sb/QvrKpKUl02WFVXAFcALF++vNO6kqTBjSQ4qmpL+/lYki8BxwGPJjm4qra2U1GPteZbgEP7Vj+k1aR5yS9U0nw356eqkrwsySsmp4GTgbuBdcDK1mwl8OU2vQ54T7u76gTge32ntCRJc2wURxyLgS8lmXz/T1fVXye5DbgmyTnAw8A7WvvrgNOBCeCHwNlz32VJ0qQ5D46qegA4eor648CJU9QLOHcOuiZJGsA43Y4rSZoHDA5JUicGhySpE4NDktSJwSFJ6sTgkCR1YnBIkjoZ1bOqpJHysR/SrvOIQ5LUicEhSerE4JAkdeI1Do2U1xqk+ccjDklSJwaHJKkTg0OS1InBIUnqxOCQJHVicEiSOjE4JEmd+DkOAX6eQtLgPOKQJHVicEiSOpk3wZHk1CT3JZlIsnrU/ZGkPdW8CI4kewOfAE4DjgDemeSI0fZKkvZM8+Xi+HHARFU9AJDkamAFcM8w3swLxZI0vfkSHEuAR/rmNwPH9zdIsgpY1WafSnLfi3i/g4Dvvoj1x8nuNBbYvcazO40FHM9YyEemLA86lp8Y5D3mS3DsVFVdAVwxG9tKsqGqls/GtkZtdxoL7F7j2Z3GAo5nnM32WObFNQ5gC3Bo3/whrSZJmmPzJThuA5YlOSzJS4CzgHUj7pMk7ZHmxamqqno2yXnA9cDewJqq2jTEt5yVU15jYncaC+xe49mdxgKOZ5zN6lhSVbO5PUnSbm6+nKqSJI0Jg0OS1MkeERxJ1iR5LMndfbWjk/y/JHcl+Yskr+xbdkF7tMl9SU7pq4/FY0+6jCfJSUk2tvrGJG/pW+enW30iyWVJMs5j6Vv+miRPJfnNvtq82zdt2VFt2aa2/KWtPvJ903U8SfZJsrbV701yQd86I98/SQ5N8pUk97Tf9/tb/cAk65Pc334ubPW03/1EkjuTHNu3rZWt/f1JVs6DsbyrjeGuJH+b5Oi+bXXfN1W127+Afw8cC9zdV7sN+IU2/V7gw236CODrwL7AYcC36F2Q37tNvxZ4SWtzxDwYzxuAV7fpfwts6VvnVuAEIMBfAaeN81j6ln8e+Bzwm21+vu6bBcCdwNFt/seBvcdl3+zCeH4VuLpN7w88BCwdl/0DHAwc26ZfAXyz/X3/KLC61VcDH2nTp7fffdq+uKXVDwQeaD8XtumFYz6Wn5nsI71HN02OZZf2zR5xxFFVNwHbdygfDtzUptcDb2vTK+j94X+6qh4EJug98uSfH3tSVc8Ak489mXNdxlNVX6uqb7f6JmC/JPsmORh4ZVXdXL0/QVcBZw6/9y/Ucd+Q5EzgQXpjmTQv9w1wMnBnVX29rft4VT03Lvum9anLeAp4WZIFwH7AM8CTjMn+qaqtVXV7m/4+cC+9p1KsANa2Zmt5/ne9Ariqem4GDmj75hRgfVVtr6on6P0OTp3DoXQeS1X9besrwM30PgsHu7hv9ojgmMYmnv8FvZ3nP2A41eNNlsxQHxfTjaff24Dbq+ppen3f3LdsnMYz5ViSvBw4H/idHdrP131zOFBJrk9ye5L/1OrjvG9g+vF8HvgBsBX4O+BjVbWdMdw/SZbSOxq/BVhcVVvbou8Ai9v0vPi3YMCx9DuH3pEU7OJY9uTgeC/wG0k20jvUe2bE/XmxZhxPkiOBjwDvG0HfuppuLBcBl1bVU6Pq2C6abjwLgJ8D3tV+vjXJiaPpYifTjec44Dng1fRO834wyWtH08Xptf+AfAH4QFU92b+sHeHNm88odB1LkjfTC47zX8z7zosPAA5DVX2D3qkCkhwOnNEWzfR4k7F97MkM4yHJIcCXgPdU1bdaeQvPH67CGI1nhrEcD/xKko8CBwD/lOQfgY3Mz32zGbipqr7bll1H73rCnzGm+wZmHM+vAn9dVT8CHkvyf4Hl9P5HOxb7J8k+9P6h/VRVfbGVH01ycFVtbaeiHmv16f4t2AK8aYf6/x5mv6fScSwkOQr4E3rXyx5v5V17nNNcXtAZ5YveRbr+C3yvaj/3oncO+b1t/kheeHH8AXoXkBa06cN4/iLSkfNgPAe0vv7yFNvY8QLs6eM8lh3WuYjnL47P132zELid3oXkBcANwBnjtG86jud84H+06ZfR+9qDo8Zl/7Tf5VXA7+9Q/6+88ILyR9v0Gbzw4vitrX4gvetsC9vrQeDAMR/La+hdr/2ZHdrv0r4ZyR/EEfyB+Qy9864/ove/vHOA99O7E+GbwCW0T9G39r9F706D++i7m4XeXRbfbMt+az6MB/hteued7+h7Tf7FXw7c3cbzB/2/g3Ecyw7rXUQLjvm6b1r7d9O7ZnD35F/ycdk3u/Bn7eX07nbbRC80/uM47R96pwOL3p1sk38XTqd3N9uNwP30wvvA1j70vkDuW8BdwPK+bb2X3j/EE8DZ82AsfwI80dd2w4vZNz5yRJLUyZ58cVyStAsMDklSJwaHJKkTg0OS1InBIUnqxOCQJHVicEiSOvn/B9GUPOrAFtMAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"df_movies_tmp['year'].plot.hist()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import MinMaxScaler"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"mmscaler = MinMaxScaler()\n",
"df_movies_tmp['recency'] = mmscaler.fit_transform(df_movies_tmp[['year']])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## merge"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"df_item_features = pd.merge(df_genres_pivot, df_movies_tmp[['movieId', 'recency']], on='movieId').set_index('movieId')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>(no genres listed)</th>\n",
" <th>Action</th>\n",
" <th>Adventure</th>\n",
" <th>Animation</th>\n",
" <th>Children</th>\n",
" <th>Comedy</th>\n",
" <th>Crime</th>\n",
" <th>Documentary</th>\n",
" <th>Drama</th>\n",
" <th>Fantasy</th>\n",
" <th>...</th>\n",
" <th>Horror</th>\n",
" <th>IMAX</th>\n",
" <th>Musical</th>\n",
" <th>Mystery</th>\n",
" <th>Romance</th>\n",
" <th>Sci-Fi</th>\n",
" <th>Thriller</th>\n",
" <th>War</th>\n",
" <th>Western</th>\n",
" <th>recency</th>\n",
" </tr>\n",
" <tr>\n",
" <th>movieId</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.805085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.805085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.805085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.805085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.805085</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" (no genres listed) Action Adventure Animation Children Comedy \\\n",
"movieId \n",
"1 0 0 1 1 1 1 \n",
"2 0 0 1 0 1 0 \n",
"3 0 0 0 0 0 1 \n",
"4 0 0 0 0 0 1 \n",
"5 0 0 0 0 0 1 \n",
"\n",
" Crime Documentary Drama Fantasy ... Horror IMAX Musical \\\n",
"movieId ... \n",
"1 0 0 0 1 ... 0 0 0 \n",
"2 0 0 0 1 ... 0 0 0 \n",
"3 0 0 0 0 ... 0 0 0 \n",
"4 0 0 1 0 ... 0 0 0 \n",
"5 0 0 0 0 ... 0 0 0 \n",
"\n",
" Mystery Romance Sci-Fi Thriller War Western recency \n",
"movieId \n",
"1 0 0 0 0 0 0 0.805085 \n",
"2 0 0 0 0 0 0 0.805085 \n",
"3 0 1 0 0 0 0 0.805085 \n",
"4 0 1 0 0 0 0 0.805085 \n",
"5 0 0 0 0 0 0 0.805085 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_item_features.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a Sparce Matrix"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.base import BaseEstimator, TransformerMixin"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"def df2sparse(df, index, columns, value, shape):\n",
" data = df[value]\n",
" row_ind = df[index]\n",
" col_ind = df[columns]\n",
" return sp.csr_matrix((data, (row_ind, col_ind)), shape=shape)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"class ItemFeatureConverter(BaseEstimator, TransformerMixin):\n",
" def __init__(self, dataset):\n",
" self.dataset = dataset\n",
" \n",
" def fit(self, df, y=None):\n",
" \n",
" self.ifeat2index = {col: i for i, col in enumerate(df.columns)}\n",
" self.labels_item_features = list(self.ifeat2index.keys())\n",
" self.n_item_features = len(self.ifeat2index)\n",
" \n",
" return self\n",
" \n",
" def transform(self, df):\n",
" col_item = self.dataset.col_item\n",
" \n",
" columns_target = df.columns[df.columns.isin(self.labels_item_features)]\n",
"\n",
" df_stacked = df[columns_target].stack().reset_index()\n",
" df_stacked.columns = [col_item, 'label_feature', 'value']\n",
" #df_stacked = df_stacked[df_stacked['value']!=0]\n",
" \n",
" df_stacked['item_index'] = df_stacked[col_item].map(dataset.item2index)\n",
" df_stacked['feature_index'] = df_stacked['label_feature'].map(self.ifeat2index)\n",
"\n",
" csr_item_features = df2sparse(df_stacked, 'item_index', 'feature_index', 'value',\n",
" shape=(dataset.n_items, self.n_item_features))\n",
" \n",
" return csr_item_features"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"ifeat_converter = ItemFeatureConverter(dataset)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"csr_item_features = ifeat_converter.fit_transform(df_item_features)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"21"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ifeat_converter.n_item_features"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<9742x21 sparse matrix of type '<class 'numpy.float64'>'\n",
"\twith 204582 stored elements in Compressed Sparse Row format>"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csr_item_features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Modeling\n",
"- Using LightFM"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"from lightfm import cross_validation"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"csr_ui_matrix_train, csr_ui_matrix_test = cross_validation.random_train_test_split(csr_ui_matrix)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Make a Class"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.base import ClassifierMixin"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"class LightFMClassifier(BaseEstimator, ClassifierMixin):\n",
" def __init__(self, feature_type=None, **params):\n",
" self.model = LightFM(**params)\n",
" if feature_type in ['user', 'item', None]:\n",
" self.feature_type = feature_type\n",
" else:\n",
" raise ValueError('Invalid feature_type: ', feature_type)\n",
" \n",
" def fit(self, features, ui_matrix, epochs=10):\n",
" user_features, item_features = None, None\n",
" if self.feature_type == 'user':\n",
" user_features = features\n",
" elif self.feature_type == 'item':\n",
" item_features = features\n",
" \n",
" self.model.fit(ui_matrix,\n",
" user_features=user_features,\n",
" item_features=item_features,\n",
" epochs=epochs)\n",
" \n",
" return self\n",
" \n",
" def predict(self, features):\n",
" item_bias, item_embeddings = self.model.get_item_representations()\n",
" \n",
" recommend_score = item_bias * csr_item_features.T\n",
" return 1/(1+np.exp(-recommend_score))\n",
" \n",
" def score(self, features, ui_matrix):\n",
" user_features, item_features = None, None\n",
" if self.feature_type == 'user':\n",
" user_features = features\n",
" elif self.feature_type == 'item':\n",
" item_features = features\n",
" \n",
" score = evaluation.auc_score(self.model, ui_matrix,\n",
" user_features=user_features,\n",
" item_features=item_features).mean()\n",
" return score\n",
" \n",
" @property\n",
" def feature_importances_(self):\n",
" _, user_embeddings = self.model.get_user_representations()\n",
" item_bias, item_embeddings = self.model.get_item_representations()\n",
" \n",
" item_coefs = np.dot(user_embeddings, item_embeddings.T) + item_bias \n",
" return np.abs(item_coefs).mean(0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## fit and score"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"pipe_lfm = Pipeline([('item_features', ItemFeatureConverter(dataset)),\n",
" ('lfm', LightFMClassifier(feature_type='item', no_components=5))])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Pipeline(memory=None,\n",
" steps=[('item_features',\n",
" ItemFeatureConverter(dataset=<__main__.DataSet object at 0x11a1d2d90>)),\n",
" ('lfm', LightFMClassifier(feature_type='item'))],\n",
" verbose=False)"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe_lfm.fit(df_item_features, csr_ui_matrix_train)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.597118"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe_lfm.score(df_item_features, csr_ui_matrix_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Feature Importances"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Based on Embedding Features"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"labels_item_features = pipe_lfm.named_steps['item_features'].labels_item_features"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"importance = pipe_lfm.named_steps['lfm'].feature_importances_"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x10d82ad50>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbwAAAD4CAYAAACXDlMRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAgAElEQVR4nO3deZRdVZ328e9DMRMSxEQ7jUDURgEZQgiRWYJpWsUW0NiAqKC2EVfLoK0t3foKigMKCI0ThoiMEpRJGmgIDSRMARLIRJhUiMqgjEHGAMnz/nF2kUtRc27Vrar7fNbKqnvPuE+55Ff7nLOfLdtEREQMdas1ugERERH9IQUvIiKaQgpeREQ0hRS8iIhoCil4ERHRFFZvdAOifSNHjvSYMWMa3YyIiEHl9ttvf9z2qPbWpeANUGPGjGHu3LmNbkZExKAi6Y8drcstzYiIaApN18OTJEC2VzS6LZ1Z9NDTjDnq8kY3IyKiXy05bu8+O3ZT9PAkjZF0r6SzgDuB/ydpjqSFkr5Zs90ny7IFks4uy0ZJurBsP0fSLmX5MZJOlzRT0v2SDu/oOJLWl/SApDXK+uG13yMiou81Uw9vM+BgYDgwGZgACLhU0u7AE8DXgZ1tPy5pw7LffwMn2b5R0ibAVcAWZd3mwERgfeBeST8D3tH2OLafkTQT2Bu4BDgAuMj2y31+1RERATRXwfuj7VsknQDsBcwry4dRFcNtgd/YfhzA9pNl/SRgy+pOKADDJQ0rny+3vQxYJulR4M3Anh0cZxrwH1QF71PAZ9s2UNIUYApAy/B2XzKKiIheaqaC91z5KeB7tn9eu1LSYR3stxqwo+0X22wPsKxm0XI6+X3avqncWt0DaLF9ZzvbTAWmAqw1erOkekdE1FEzFbxWVwHHSjrX9rOSNgJeBq4FLpb0Q9tPlFuRTwIzgMOA4wEkjbU9v5Pjd3QcgLOAXwHHdtXIrTcawdw+fHgbEdFsmuKllVq2Z1AVndmSFgEXAOvbXgx8B5glaQHww7LL4cD48hLKXcChXRy/o+MAnAu8ATivntcUERFdU+bD6z+SJgP72P5EV9uOHz/eGXgeEdEzkm63Pb69dc14S7MhJP0IeD/wgUa3JSKiGaXg9RPbHb0UExER/aBpC56k5cAiYA3gFaoXSk4aKAksSVqJga4vEzEi+kLTFjzgBdtjASS9iepFluHA0bUbSVrd9isNaF9ERNRR072l2R7bj1IN+P6CKodIulTStcA1koZJukbSHZIWSdoHXo0su0fSGZLuk3SupEmSbpL0O0kTynYTJM2WNE/SzZLe2cDLjYhoSs3cw3sN2/dLagHeVBaNA7ax/aSk1YH9bP9N0kjgFkmXlu3+Afgo8GlgDvAxYFfgQ8B/AfsC9wC72X5F0iTgu8BH2rYhSSsREX0nBa9jV9cMGBfw3ZK5uQLYiCpGDOAB24sAJC0GrrHtMsZvTNlmBHCmpM0AUz03fJ0krURE9J0UvELS26jiwR4ti56rWX0QMArY3vbLkpYAa5d1tfFiK2q+r2Dl7/dY4Drb+0kaA8zsqj1JWomIqK88w6OaAgg4Ffix2x+JPwJ4tBS7icCmPTzFCOCh8vmQXjc0IiJ6rZkL3jqS5pfbkP9HlZn5zQ62PZcqXmwR8EmqZ3I98QPge5LmkV51RERDJFpsgEq0WEREz3UWLdbMPbyIiGgiQ77gSfo7SdMl/UHS7ZKukPSOfjr3kjKMISIiGmxIP09SNUvrxcCZtg8oy7alGlJwXyPb1pVEi8VAkhixGAqGeg9vIvCy7VNbF9heANwo6XhJd5bklP0BJO0haZak30q6X9Jxkg6SdFvZ7u1lu1GSLpQ0p/zbpSx/o6QZkhZLmkY1fg9J35J0ZGsbJH1H0hH9+HuIiGh6Q73gbQXc3s7yDwNjgW2BScDxkkaXddtSTfK6BfAJ4B22JwDTqGY+B/hvqqDpHagSU6aV5UcDN9p+F1XPcpOy/HSqtzuRtBpwAHBO20ZJmiJprqS5y59/utcXHRERrzekb2l2YlfgPNvLgb9KmgXsAPwNmGP7EQBJf6AargDVzAoTy+dJwJbVHVMAhksaBuxOVUyxfbmkp8rnJZKekLQd1e3UebafaNuoJK1ERPSdoV7wFgOTe7hPd5JTVgN2tP1i7Y41BbA906gGnf8dVY8vIiL60VAveNdSZWBOKb0nJG0DLAX2l3QmsCFVz+wrwObdPO4Mqtubx5djjrU9H7ieKjz625LeD7yhZp+LgW9R5Wh+rKsTJFosIqK+hnTBKyHO+wEnS/oq8CKwBDgSGAYsoApz/g/bf5HU3YJ3OPATSQupfofXUz33+yZwXklvuRn4U01bXpJ0HbC03EqNiIh+lKSVflJeVrkD+Kjt33W1fZJWIiJ6LkkrDSZpS+D3VFMHdVnsIiKi/pqu4JWxcvPLv79Ieqh8Xirprm4e41BJrcMMzpA0uXyeKel1f1nYvsv222z/e32vJiIiumtIP8NrTxkOMBZA0jHAs7ZPKPPUXdbV/pJWrx3IvioktXT0PC9JKwNf0kciBpem6+F1oUXSaSUpZYakdeDVntvJkuYCR0g6RtKXOzuQpL0kzZZ0h6TflHF6rfma35d0B/DRvr+kiIiAFLy2NgN+UpJSllKlqLRa0/Z42yd2dZASGP11YJLtccBc4Es1mzxhe5zt6XVse0REdKLpbml24YEyng6qSLIxNevO78FxdgS2BG4qg9HXBGZ3dSxJU4ApAC3DR/XgdBER0ZUUvNeqTVlZDqxT8/25HhxHwNW2D+xgfbvHSrRYRETfScHrG7dQDUz/B9u/l7QesJHtbk9JlKSViIj6yjO8PmD7MarczPNKGstsuh9bFhERfSBJKwNUklYiInouSSsREdH0UvAiIqIp5KWVQtK+VFP4bGH7nk62OxKYavv58v0K4GO2l9azPUlaGdiSshIx+KSHt9KBwI3lZ2eOBNZt/WL7A/UudhERUX8peECJ/doV+AxwQFnWIukESXdKWijpMEmHA38PXFfmtmuNChtZPn+pbH9n6QkiaYyku9uLLIuIiP6TW5qVfYArbd8n6QlJ2wMTqJJWxtp+RdKGtp+U9CVgou3Haw9Q9vkU8G6qgee3SpoFPEUVWXag7c9K+jVVZNk5bRuRpJWIiL6THl7lQKA113J6+T4J+LntVwBsP9nFMXYFLrb9nO1ngYuA3cq6ziLLXmV7asnrHN+y7oheX0xERLxe0/fwJG0I7AlsLclAC2BgTh1P01lkWURE9IOmL3jAZOBs259rXVBuRS4APifputpbmsAzwPrA422OcwNwhqTjqG5p7gd8oreNSrRYRER95ZZmdfvy4jbLLgRGA38CFkpaAHysrJsKXNn60kor23cAZwC3AbcC02zP68N2R0REDyRabIBKtFhERM8lWiwiIppeCh4g6dk23w+R9ONGtSciIuovL63UgaTVW4cvtPe9u/vVSrTYwJVYsYjBKQWvC5LGAKcDI4HHgE/Z/pOkM4AXge2Am8rwhtrvZwGnUsWQ/QH4tO2nJM0E5lON2zsPOLE/ryciolml4FXWkTS/5vuGwKXl84+AM22fKenTwCnAvmXdW4CdbS8vBbD2+0LgMNuzJH0LOJoqhxNgzfYeqiZpJSKi7+QZXuUF22Nb/wHfqFm3E/Cr8vlsqp5Zq9/YXt72u6QRwAa2Z5XlZwK712x3fnuNSNJKRETfScFbNc918b27+0VERB/LLc2u3Uw1g8LZwEFUiSqdsv20pKck7Wb7BqrElVld7VcrSSsREfWVgte1w4BfSvoK5aWVbu53MHCqpHWB+3uwX0RE9IEkrQxQSVqJiOi5JK1ERETTS8GLiIimMCif4Unal2qGgy1s39PO+pnAl23X7Z5gOed9tu+q1zE7k6SV/pPklIjmMFh7eAcCN5af/WVfYMue7CBpUP5BERExFA26gidpGNXg789QDRdA0jqSpku6W9LFlBnFJR0q6fiafV8NhZb0cUm3SZov6eeSWsryZyV9R9ICSbdIerOknYEPAceX7d8uaaak8WWfkZKW1JzjUknXAtdIWk/S6eVc8yTt01+/q4iIWGnQFTxgH+BK2/cBT0jaHvg88LztLagivLYv215INfN4q/2B6ZK2KJ93Kckqy6nG2AGsB9xie1vgeuCztm+mihr7Sklj+UMXbRwHTLb9HuBrwLW2JwATqYrmeu3tJGmKpLmS5i5//unu/0YiIqJLg7HgHQhML5+nl++7A+cA2F4ILCyfHwPul7SjpDcCmwM3Ae+lKopzSobme4G3lWO+BFxWPt8OjOlFG6+2/WT5vBdwVDnPTGBtYJP2dkq0WERE3xlUz5jKjAR7AltLMtACGJjXyW7TgX8B7gEutm1JogqE/s92tn/ZKwcnLqfj39ErrPyDYe0262qjwwR8xPa9nbTxdZK0EhFRX4OthzcZONv2prbH2N4YeICqJ/YxAElbAdvU7HMx1W3Q2p7hNcBkSW8q+2woadMuzv0MsH7N9yWsvHU6uZP9rgIOK0UWSdt1cZ6IiOgDg63gHUhVwGpdCLwVGCbpbuBbVAUQANtPAXcDm9q+rSy7C/g6MKNM43M1MLqLc08HvlJePHk7cALweUnzqObK68ixwBrAQkmLy/eIiOhniRYboBItFhHRc4kWi4iIpjeoXlrpjKTlwCKqa3oA+ITtpY1tVe8laaX+kqgS0dyGUg+vddbyrYAngX9rdIMiImLgGEoFr9ZsYCMASWNLYspCSRdLekNZPlPSSWWg992SdpB0kaTfSfp264EkXSLpdkmLJU2pWf66RJay/M3lPAvKv53L8naTXSIion8MuYJXCsl7qZJRAM4Cvmp7G6pbnkfXbP5Sebh5KvBbql7hVsAhZaA6wKdtbw+MBw6vWf66RJay/BRgVlk+DljcRbJLbduTtBIR0UeGUsFbp6SZ/AV4M3C1pBHABrZnlW3OpEpladVaFBcBi20/YnsZ1QzlG5d1h0taANxSlm1WlneUyLIn8DMA28ttP03nyS6vStJKRETfGTIvrVCe4Ulal2qw979RFbjOLCs/V9R8bv2+uqQ9gEnATrafL9MOtaaqdDeRBaq0lY6SXSIioh8MpYIHQClMhwOXAD8FnpK0m+0bgE8Aszo9wGuNAJ4qx9wc2LEb+1xDFWZ9crm9Oqws+62kk2w/WiLS1rf9x44OkmixiIj6Gkq3NF9lex5VgPSBwMFUMxQsBMZSJbF015VUPb27geOobmt25QhgoqRFVLc6t+xlsktERNRRklYGqCStRET0XJJWIiKi6aXgdULS18r4u4Vl/Ny7O9huvKRTOlg3U9K9Zf/5kiZLmiZpy75tfURE1BpyL63Ui6SdgA8C42wvkzQSWLO9bW3PBTq7/3hQ2abVBV2dP9Fi9ZdosYjmlh5ex0YDj5dxedh+3PbDJZHl5pKicpuk9SXtIemyLo73qtLra/cec0RE9I0UvI7NADaWdJ+kn0p6j6Q1gfOBI0qSyiTghW4c69yaW5pv7GijJK1ERPSd3NLsgO1nJW0P7AZMpCp03wEesT2nbPM3gDKZeWdec0uzo+1tTwWmAqw1erO8PhsRUUcpeJ2wvRyYCcws4+q6nIFB0lVU0WZzbf9r37YwIiK6KwWvA5LeCayw/buyaCxwN/A+STvYniNpfdrc0rT9T/U4f5JWIiLqKwWvY8OAH0naAHgF+D0wBfhlWb4OVbGb1LgmRkREdyVpZYBK0kpERM8laSUiIppeCl5ERDSFPMOrIenvgJOBHYClwF+BI23f12a7m23v3JdtSdJK/SRhJSIgPbxXqRocdzEw0/bbbW8P/CfVEIPWbVYH6OtiFxER9ZeCt9JEqlnMT21dYHsB0CLpBkmXAncBSHq2/NxD0ixJv5V0v6TjJB1UIscWSXp72W6UpAslzSn/dmnA9UVENLXc0lxpK6oJW9szDtjK9gPtrNsW2AJ4ErgfmGZ7gqQjgMOAI4H/Bk6yfaOkTYCryj6vIWkK1dAHWoaPWsXLiYiIWil43XNbB8UOYI7tRwAk/YEqgxNgEVWvEaqxelvWRIoNlzTM9rO1B0q0WERE30nBW2kxMLmDdc91st+yms8rar6vYOXvdzVgR9svdrcxSVqJiKivPMNb6VpgrXJbEQBJ21CFR6+qGVS3N1uPO7YOx4yIiB5IwStcRc7sB0yS9AdJi4HvAX+pw+EPB8aXmdPvAg6twzEjIqIHEi02QCVaLCKi5xItFhERTW/Iv7QiycC5tj9evq8OPALcavuDPTzWWODvbV9R/5a+VpJWVk3SVSKirWbo4T0HbFWm8wH4R+ChXh5rLPCBnuzQms4SERGN1QwFD+AKoPVP/gOB8wAkrSbpd5JG1Xz/fUlG+aikOyUtkHS9pDWBbwH7S5ovaX9J60k6vSSrzJO0TznOIZIulXQtcI2ksyTt29oYSee2bhsREf2jWQredOAASWsD2wC3AtheAZwDHFS2mwQssP0Y8A3gn2xvC3zI9ktl2fm2x9o+H/gacK3tCVSDzI+XtF451jhgsu33AL8ADgGQNALYGXjd/UpJUyTNlTR3+fNP1/2XEBHRzJqi4NleCIyh6t21ff52OvDJ8vnTVDOaA9wEnCHps0BLB4feCzhK0nxgJrA2sElZd7XtJ8v5ZwGblZ7kgcCFtl9pp51TbY+3Pb5l3RE9vs6IiOhYMz1fuhQ4AdgDeGPrQtt/lvRXSXsCEyi9PduHSno31a3Q2yVt384xBXzE9r2vWVjt1zad5Szg48ABwKfqckUREdFtzVTwTgeW2l4kaY8266ZR3do82/ZyAElvt30rcKuk9wMbA88A69fsdxVwmKTDbFvSdrbndXD+M4DbgL/YvqurxiZaLCKivpriliaA7Qdtn9LB6kuBYay8nQnV87hFku4EbgYWANdRhUDPl7Q/cCywBrCwJLMc28n5/wrc3eYcERHRT5K0AkgaTzV9Tz1yMzs6x7pUMyiMs93lGylJWomI6LkkrXRC0lHAhVSzm/fVOSZR9e5+1J1iFxER9dfvz/AkLafq6awBvEL1MsdJZYhAv7N9HHBcd7btbdKK7f8DNu1F8yIiok4a8dLKC7bHAkh6E/ArYDhwdAPa0lNjgfG8fmhDhySt3t4QhK4kWqxjiQ2LiN5o6C1N248CU4AvqLK2pF+Wl0XmSZoIIKlF0gkl+WShpMPK8iWSRpbP4yXNLJ+PkXSmpBsk/VHShyX9oBz3SklrlO22lzRL0u2SrpI0uiyfKen7JUHlPkm7dZC0MkHS7NLWmyW9s+yfpJWIiAGm4cMSbN8vqQV4E9U4NdveWtLmwAxJ76AatzYGGGv7FUkbduPQb6dKP9kSmE01Xu4/JF0M7C3pcuBHwD62HytvXX6HavA5wOq2J0j6AHC07UmSvgGMt/0FAEnDgd1KmyYB3wU+UvYfB2xj+0lJ7wG+CFxSk7RycNsGq5p8dgpAy/BR3f4dRkRE1xpe8NrYlaoIYfseSX8E3kEV+XVq663B1gSTLvyv7ZclLaJKSrmyLF9EVTzfCWwFXC2Jss0jNftfVH7eXrZvzwjgTEmbAaZ6LtnqNUkrkn5aklY+QidJK8BUgLVGb5bXZyMi6qjhBU/S24DlwKO92P0VVt6WXbvNumVQ5WVKetkrx1+soLpuAYtt79TBsZeVn8vp+Pd0LHCd7f0kjaGKF2uVpJWIiAGkoQWv9HhOBX5ckkpuoIr2urbcytwEuBe4GvicpOtab2mW3tMSYHvgf1l5K7G77gVGSdrJ9uzyXO8dthd3sk/bpJURrJxq6JAuzncGSVqJiGiYRry0sk556WMx8H/ADOCbZd1PgdXKbcjzgUNsL6OK/voTVaLJAuBjZftvAv8taS5VT6zbyuwHk4Hvl2POp3q21pm2SSs/AL4naR5d/PGQpJWIiMZK0ko/SdJKRETfS9JKgyVpJSKi8Rr+0kozSNJKRETjNU3Bq4k0a7Wv7SU9PMYhwAzbD9exae1K0srrJWElIlZF0xQ8aiLNVsEhwJ1Anxe8iIior6Z9hidpmKRrJN1RIsf2KcvHSLpb0mmSFkuaIWkdSZOpcjTPLW9priPpG5LmlMizqSoj2CUdLumuEoM2XdJqkn5XhmFQvv++9XtERPS9Zip4rcMh5pd4sReB/WyPo4ogO7G1YAGbAT+x/S5gKVUs2QXAXOAg22Ntv0A1fnAH21sB6wAfLPsfBWxnexvg0DITxDlUYwyhSo5ZYPux2gZKmiJprqS5y5/Puy0REfXUTAXvhVKoxtrejypp5buSFlKNB9wIeHPZ9gHb88vnzqLFJkq6tYwb3BN4V1m+kKon+HGqNBiA04FPls+fpp3xeLan2h5ve3zLuiN6faEREfF6zfQMr62DgFHA9iVzcwkr48mW1Wy3nKr39hqS1qYaKD/e9p8lHVOz/97A7sA/A1+TtHXZ5q+S9gQmsLK3164krURE1Fcz9fDaGgE8WordRLo3bKA2Wqy1uD0uaRhVaguSVgM2tn0d8NVynmFl22lUtzZ/Y7tHyTAREbFqmrmHdy7wP+V25Fzgnm7scwZwqqQXgJ2A06je2vwLMKds0wKcU6YBEnCK7aVl3aVUtzITLxYR0c8SLdaPJI0HTrK9W1fbJlosIqLnOosWa+YeXr+SdBTwebp4dhcREX1jSD/Dk/R3ZRzcHyTdLumK8ur/ZR1sP03SluXzEkkj29nmGElf7mlbbB9ne1PbN/b8SiIiYlUN2R5eGVN3MXCm7QPKsm2BD3W0j+1/XYXzrd7eLOa9lWix10qsWESsqqHcw5sIvGz71NYFthcANwDDJF0g6R5J59YkpMwsz9leQ9LXJN0n6UbgnTXLZ0o6uczHd4Sk7SXNKr3JqySNrtnu+5JuK8fp8hleRETU15Dt4QFbUQ0ab892VIPEHwZuAnYB2r3VKGl74ABgLNXv6442x13T9vgyY/osYB/bj5UJYr9DNcgcYHXbEyR9ADiaKm2l7bmmAFMAWoYndSwiop6GcsHrzG22HwSQNJ8qSaWjZ2u7ARfbfr5sf2mb9eeXn++kKrJXlw5jC/BIzXYXlZ8dJrfYngpMBVhr9GZ5fTYioo6GcsFbTBkM3o62SSqr8nt4rvwUsNj2Tl2cc1XPFxERvTCU/8N7LVVW5pTSc0LSNlQ9tp64HjhD0veofl//DPy8ne3uBUZJ2sn27HKL8x22F/em8YkWi4ioryH70oqrEfX7AZPKsITFwPeoUlF6cpw7qG5bLgD+l5WJKm23e4mqR/l9SQuA+cDOvb+CiIiopyStDFBJWomI6LnOklaGbA8vIiKiVtMUPEmWdE7N99UlPdZR6ko3jvdqKksP99ujt+eMiIjeG8ovrbT1HLCVpHXKbOX/CDzU24OtSipLdyRpJekqEVFfTdPDK66gmpwV4EDgvNYVbTMyJd0paYyk9SRdLmlBWbZ/Wf9qKouk90m6o2xzTVk2QdJsSfMk3Szp1YSWiIjof81W8KYDB5TZyrcBbu3GPu8DHra9re2tgCtrV0oaRTUv3kdsbwt8tKy6B9jN9nbAN4DvdnWiEmw9V9Lc5c8/3e2LioiIrjXTLU1sL5Q0hqp3d0U3d1sEnCjp+8Bltm9os35H4HrbD5RzPFmWjwDOlLQZYGCNbrQvSSsREX2k2Xp4UM06fgI1tzOLV3jt72NtANv3AeOoCt+3JX2jm+c5Friu9Ar/ufV4ERHRGE3VwytOB5baXiRpj5rlS4APAkgaB7y1fP574Enb50haCrR9WeUW4KeS3mr7AUkbll7eCFa+FHNITxuZpJWIiPpquoJXQqNPaWfVhcAnSyLLrcB9ZfnWwPGSVgAvU81aXnu8x8osBxdJWg14lOoN0B9Q3dL8OtDcr1tGRAwASVoZoJK0EhHRc0laiYiIppeCFxERTWHIPsOTdBLwR9snl+9XAX9uTUiRdCLwkO0fdvN4ewAv2b65j5r8GkM1aSXpKRHRKEO5h3cTZXqe8jLJSOBdNet3BnpSvPagh9P9SBqyf1BERAw2Q/k/yDcDJ5XP7wLuBEZLegPwPLAFYEmzgGHA48Ahth+RdDhwKNXYvLuAo8r35ZI+DhxGlaRyKrBJOceRtm+SdAzwduBtwJ8k3Vu2eVv5ebLt9t4SjYiIPjRkC57thyW9ImkTqp7ZbGAjYCfgaeBuqoK4TxlasD/wHeDTVAXurbaXSdrA9lJJpwLP2j4BQNKvgJNs31jOcRVVEQXYEtjV9gulAG4OTATWB+6V9DPbL7dtcxneMAWgZfiovvi1REQ0rSFb8IqbqYrdzsAPqQrezlQF7yFgL+BqSQAtwCNlv4XAuZIuAS7p4NiTgC3LvgDDJQ0rny8tMzK0utz2MmCZpEeBNwMPtj1gosUiIvrOUC94rc/xtqa6pfln4N+BvwEzgY1s79TOfnsDu1NFgn1N0tbtbLMasKPtF2sXlgL4XJttl9V8Xk43fu9JWomIqK+h/NIKVD28D1JFgy0vkV8bUN3WPA8YJWknAElrSHpXecFlY9vXAV+liggbBjxDdUuy1QyqZ3mU/cf2xwVFRETvDPWCt4jq7cxb2ix72vajwGTg+5IWAPOpeoMtwDmSFgHzgFNsLwX+B9hP0nxJuwGHA+MlLZR0F9VLLRERMUAlWmyASrRYRETPJVosIiKa3qAteJL2lWRJm3dj22mStqzDOcdI+ljN9/GSMqYuImIQGLS3NCWdD/w9cK3to/vpnHsAX7b9wb4+11qjN/Pog0/u69P0q8SKRURfG3K3NMt4t12BzwAHlGV7SJop6QJJ90g6V2WMQFk+vnx+VtLxkhZL+j9JE8r6+yV9qGwzRtINku4o/1ojxY4DdisvrnyxnPOyss+Gki4pL7HcImmbsvwYSafXnOPwfv1lRUQEMEgLHrAPcKXt+4AnJG1flm8HHEmVdPI2YJd29l2Pqlf4LqqhBt+mmrB1P+BbZZtHgX+0PQ7Yn5UTxh4F3GB7rO2TXntYvgnMs70N8F/AWTXrNgf+CZgAHC1pjfYuStIUSXMlzV3+/NPd+T1EREQ3DdaCdyAwvXyeXr4D3Gb7QdsrqIYZjGln35eAK8vnRcCsEvO1qGb7NYDTytCE31AV0K7sCpwNYPta4I2Shpd1lyJGBdsAAAyHSURBVNteZvtxqmL65vYOYHuq7fG2x7esO6Ibp4yIiO4adEkrkjYE9gS2lmSqcXMGLqd7iSYve+WDyxWt+9heUTO7wReBvwLbUv1R8OLrjtIzPU5aiYiI+hqM/+GdDJxt+3OtC8qMB7vV8RwjgAdLETyYqqjC69NWat0AHAQcW15uedz232qyNnsk0WIREfU1GG9pHghc3GbZhay8rVkPPwUOLgksm7MyG3Mh1RRBCyR9sc0+xwDbS1pI9XLLwXVsT0RErKJBOyxhqEvSSkREzw25YQkRERE9lYLXQ5JOknRkzferJE2r+X6ipC81pnUREdGRwfjSSqPdBPwLcHKZSmgkMLxm/c5Ub3l2StLqtl/paP2ih55mzFGXr2pbGy7pKhExUKSH13M3U82nB/Auqolln5H0BklrAVsAe0maI+lOSVPbJL6cLGkucERDWh8R0aRS8HrI9sPAK5I2oerNzQZupSqC46kGsP/Y9g62twLWoZqEttWaZXD5if3c9IiIppaC1zs3UxW71oI3u+b7TcBESbeWpJY9qXqCrc7v6KCJFouI6DspeL1zE1Vx25rqluYtVD28namK4U+Byba3Bk4D1q7Z9zk6kGixiIi+k5dWeudm4MvA/baXA09K2oCqJ/fZss3jZVaHycAFPT1BklYiIuorBa93FlG9nfmrNsuG2X5c0mlUPb+/AHMa0L6IiGgjSSsDVJJWIiJ6LkkrERHR9FLwIiKiKQy5Z3iSllM9T2u1L/Ar2ztLGgNcVsbH9fb4Bn5o+9/L9y9TPbs7ppN9DgWet31WR9u0NdCTVpKgEhGDzZAreMALtse2WbZzHY+/DPiwpO+VGcy7ZPvU9pZ3FS8WERH10xS3NCU9286yQyRdIulqSUskfUHSlyTNk3RLmVm9Pa8AU2knL1PSGEnXSloo6ZqSxoKkY0pPMPFiERENMhQL3jqS5pd/bSeKbWsr4MPADsB3qG47bkeVnPLJTvb7CXCQpLajw38EnGl7G+Bc4JQO9m83XixJKxERfadZbml25Drbz1CFPz8N/E9ZvgjYpqOdbP9N0lnA4cALNat2oiqgAGcDP+jgEO3Gi9meStV7ZK3Rm2W8SEREHQ3FHl5PLKv5vKLm+wpgdUkb1/QWD22z78nAZ4D1enHeDuPFIiKibwzFHl7d2P4z0G5v0faTkn5NVfROL4tvBg6g6t0dBNzQ23MnWiwior6avYe3qk6kihhrdRjwKUkLgU+Ql1IiIgaMRIsNUIkWi4jouUSLRURE0+uy4ElaR9IsSS390aD+Vsbj/bh8PlRSh8MRJO0hqceD2Ms4v5GS1pR0vaQ8O42I6Gfd+Q/vp4GLyrxvA5Kklnq0r6NElBp7AM9SvZzSm+O/JOkaYH+qcXodGkjRYokRi4ihoDu3NA8Cfguv9nBmSrpA0j2SzpWksu69JaVkkaTTJa3V9kCSdigpJPMlHS/pzrK8pXyfU9Z/rhvnWyLp+5LuAD4qaS9JsyXdIek3ZfJVJB0n6a5y3BM6u9A2iSiH1+w3veRwHgp8sbR/N0mjJF1Y2j1H0i5l3zdKmiFpsaRpgGpOc0n5nUZERD/qtIcnaU3gbbaX1Czejmpm74eBm4BdSkzWGcB7bd9XBmV/nmqsWq1fAp+1PVvScTXLPwM8bXuHUihvkjSjo/MBN5Z1T9geJ2kkcBEwyfZzkr4KfEnST4D9gM1tW9Ws5N11FPBW28skbWB7qaRTgWdtn1B+P78CTrJ9Y4kRuwrYAjgauNH2tyTtXa6v1Z1UyS6vI2kKMAWgZfioHjQ1IiK60lUPbySwtM2y22w/aHsFMB8YA7wTeMD2fWWbM4Hda3cqxWZ927PLotrZwvcCPilpPnAr8EZgs07O16o1sWRHYEuqQjkfOBjYFHgaeBH4haQPA893cb21FgLnSvo4VX5meyYBPy7nvBQYXnqWuwPnANi+HHiqdYdy6/UlSeu3PZjtqSVybHzLum1TyyIiYlV09QzvBWDtNstq00mWd+MY3SHgMNtXvWahtEcX52tNLBFwte0DX3dgaQLwXmAy8AVgz262aW+qwvXPwNckbd3ONqsBO9p+sc05uzr2WlSFOCIi+kmnxcr2U+X52tpt/6Pexr3AGEn/YPv3VIOuZ7U51lJJz0h6t+1bqRJJWl0FfF7StbZflvQO4KEeXMctwE9azy9pPWAjqtug69q+QtJNwP3dOZik1YCNbV8n6cbS1mHAM8Dwmk1nUA02P77sN9b2fOB64GPAtyW9H3hDzbHfCDxu++XO2pCklYiI+urOSyszgF0726AUw08Bv5G0iCqLsr03Hj8DnFZuAa5HdcsRYBpwF3BHeZHl5/Sg52j7MeAQ4LyScjIb2BxYH7isLLsR+FI3D9kCnFOuZR5wiu2lVOHS+7W+tEIVHj2+vNhyF9VLLQDfBHaXtJgqTPpPNceeCAyM1y8jIppIl0krksYBX7T9iVU+mTTM9rPl81HAaNtNFb8l6SLgqJrnne1K0kpERM+pk6SVLntRtu+QdF2dxrrtLek/y3n/SNUraxrlrddLuip2ERFRf92KFrN9ep0Gdp9ve6ztrWzvXW5FDlgqM6Wrmsnckr5ds26kpJdVUlpqls+XNL3NskslfdL2S7bPknSapK/0z1VERARkeqCeeIDqzc2vl+8fBRbXbiBpC6rnf7tJWs9261ukhwPXSbqUavjEu6nGKXaor5JWkpoSEc0q4dHd9zxwt6TWe8P7A79us82BVHPhzQD2aV1YBu5PpZoB/WfAF2x3NLYvIiL6QApez0wHDpC0MdWYwIfbrN+/bHMeVfGrdQLwPuBO29f3dUMjIuK1UvB65krgH6nG5Z1fu6L0/B63/SfgGmA7SRvWbLIN1e978zLO73UkTZE0V9Lc5c8/3d4mERHRSyl4PWD7JeB24N+BC9qsPpCqmC0B/kA1QP0j8OpA9p8CHwd+RwfP7xItFhHRd/LSSs+dCMyy/WRrhFgpaP8CbG374bJsIvD/gNOAzwG/sz1T0n3ALZJ+3dlbqklaiYiorxS8HrK9mDZvZwK7AQ+1FrviemBLSZsCX6UKuMb2w5JOpnqB5VP90OSIiKAbSSvRGElaiYjouc6SVlLwBihJz1CFcg8VI4HHG92IOsm1DFxD6XqG0rVA/13PprbbnVA0tzQHrns7+itlMJI0d6hcT65l4BpK1zOUrgUGxvXkLc2IiGgKKXgREdEUUvAGrqmNbkCdDaXrybUMXEPpeobStcAAuJ68tBIREU0hPbyIiGgKKXgREdEUUvAGIEnvk3SvpN9LOqrR7ektSadLelTSnY1uSz1I2ljSdZLukrRY0hGNblNvSVpb0m2SFpRr+Waj27SqJLVImifpska3ZVVJWiJpUZlQelAnUEjaQNIFku6RdLeknRrWljzDG1gktQD3Uc3K8CAwBzjQ9l0NbVgvSNodeBY4y/ZWjW7PqpI0Ghht+w5J61MFie87SP+3EbCe7WclrQHcCBxh+5YGN63XJH0JGA8Mt/3BRrdnVZQQ+vG2B/3Ac0lnAjfYniZpTWBd20sb0Zb08AaeCcDvbd9fZmeYTs1ksoNJmffvyUa3o15sP2L7jvL5GeBuYKPGtqp3XHm2fF2j/Bu0f/1KeguwNzCt0W2JlSSNAHYHfgHVjDONKnaQgjcQbQT8ueb7gwzS/6gOZZLGANsBtza2Jb1XbgHOBx4FrrY9aK8FOBn4D2BFoxtSJwZmSLpd0pRGN2YVvBV4DPhlud08TdJ6jWpMCl5ED0kaBlwIHGn7b41uT2/ZXm57LPAWYIKkQXnbWdIHgUdt397ottTRrrbHAe8H/q08HhiMVgfGAT+zvR3wHNCw9xJS8Aaeh4CNa76/pSyLAaA877oQONf2RY1uTz2UW0zXAe9rdFt6aRfgQ+W513RgT0nnNLZJq8b2Q+Xno8DFVI86BqMHgQdr7h5cQFUAGyIFb+CZA2wm6a3lAe8BwKUNblPw6osevwDutv3DRrdnVUgaJWmD8nkdqpek7mlsq3rH9n/afovtMVT/f7nW9scb3Kxek7ReeSmKcvtvL2BQvuls+y/AnyW9syx6L9Cwl7wyW8IAY/sVSV8ArgJagNPLpLODjqTzgD2AkZIeBI62/YvGtmqV7AJ8AlhUnn0B/JftKxrYpt4aDZxZ3gpeDfi17UH/Ov8Q8Wbg4urvK1YHfmX7ysY2aZUcBpxb/oC/nwZOfJ1hCRER0RRySzMiIppCCl5ERDSFFLyIiGgKKXgREdEUUvAiIqIppOBFRERTSMGLiIim8P8BgN1ZGzsYcXEAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pd.Series(importance, index=labels_item_features).sort_values().plot.barh()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Permutation Importance"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.inspection import permutation_importance"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"result = permutation_importance(pipe_lfm, df_item_features, csr_ui_matrix_test, n_repeats=8, n_jobs=4)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAgAElEQVR4nO3deXxcZdn/8c+XUChbS0MrItIFRQgtUGhFwKBEEXcBQaEPW6GKuBRRUfEXHilK3H1Aij482LBDQEGwikKBtIWytkAXSmQRWhEQCk2BAi1tev3+uO9pTqYzmZlss13v12tembnPdp9pM1fOmft8j8wM55xzrtRsVuwOOOecc5l4gXLOOVeSvEA555wrSV6gnHPOlSQvUM4550qSFyjnnHMlyQuUc70g6WJJ/13sfriuJI2Q9A9JW/XhOv+fpBl9tb4e9mFHSW2StixmPwaKFyjX7yQtk/SWpNWSXpR0uaRtS6Bfl0s6r4D5J0ual2wzs9PM7Mf90Ldpkq7u6/X2RKb9LgNnAZeb2VsAkuZI+lJvVmhmPzGzXq2jUPF359BEH14EZgOnDmQ/isULlBsonzWzbYH9gInA2YUsrMD/vw4wSZsXuw+FikcXJwF5F/gy289rgK8UuxMDwsz84Y9+fQDLgEMTr38J/DU+PwC4F1gFLAIOScw3B2gC7gHeAt4LGPA14EngdeDHwHviOl4D/gBsEZefDMxL64vF9ZwKrAPeBlYDf4nTzwL+Gdf9GHBkbK8D1gAdcf5Vsf1y4LzE+r8MPAWsBGYC70rb9mmx76uA3wLK8p5NA65OWzbf/T4E+Dfw/4CX4/t/XGJdQ4ErgRXAcsIfC5sl3rN7gPOBV4Abs+z3p4FH4rafBaYl1j869vck4F+xD42J6TWxb6n3+SFglzhtD+D2+P49Dnwxsdyn4r/J68BzwJlZ3rsPAU8lXjfF/q+J+3BR4j39enxPn4ltv4n781rs18GZ/k1y7WOGPmXtO/AZYGH8P3EvsHdsvwrYQPi/vxr4XmzfHHgTGFXs3+1+/+wodgf8UfkPEgUK2AVYGj9gd44fgp8iHM1/LL4eEeedE3/5x8ZfykHxQ+HPwJDYvha4E9iV8MH7GHBSXH4yWQpUfH45ieIS274AvCv25xjgDWCnbta3cR3AR+IH1X7AlsB04K60bf8V2B4YSSgQn8jynm38MEwsm+9+HwKsB/4n9uPDcT92j9OvjOvajvBB+wQwJbGP64Gp8T3fKst+HwLsFd+nvYEXgSPitNGxv7+Py+8T+1sXp38XWALsDihO3wHYhlAcTo7b3je+n3vG5V4gFgxgGLBflvfu68AtaW1zgC9l+L9wO1ALbBXbjo992Rz4DvAfYHD6v0mufczQp4x9j/v4EvABQuE+ifD7smX6707a+hYDnyv273Z/P/yUiRsoN0taBcwD5gI/IXwY/M3M/mZmG8zsdmABoWClXG5mS81svZmti22/MLPXzGwp8Cgwy8yeNrNXgb8Tful7xMz+aGbPx/5cT/jrev88Fz8OuNTMHjaztcAPgAMljU7M8zMzW2Vm/yJ8lzC+gO4Vut//bWZrzWwucAvwRUk1wLHAD8zsdTNbBvwaOCGx3PNmNj2+529l6oiZzTGzJfF9Wgy0EAph0rlm9paZLSIcHe8T278EnG1mj1uwyMxeIRxJLDOzy+K2HyEcwX0hLrcO2FPSEDNrN7OHs7xP2xOOVPLxUzNbmdpPM7vazF6J2/81ocDv3s3y2fYxXba+nwr8n5k9YGYdZnYFodAdkKPfrxP2s6J5gXID5Qgz297MRpnZ1+IHwijgC5JWpR5APbBTYrlnM6zrxcTztzK87vEADEknSlqY6M84YHiei7+LcMoMADNbTTgi3Dkxz38Sz98ssK+F7He7mb2ReL089m844Uh0edq0ZB8zveddSPqApNmSVkh6lXDqMv19yravuxBO76UbBXwg7f/DccA74/SjCH+8LJc0V9KBWbrXTjg6zEeXfZV0Zhwl92rc/tAM+5WU779ntr6PAr6Tts+7EP6turMd4ZRgRfMC5YrpWeCqWLhSj23M7GeJeXoTt/8GsHXqhaR3pk3vsm5JowinbL4B7GBm2xOOVJRnX54nfOCk1rcN4XTRcz3pfC8Ni9tPGUno38uEv+ZHpU1L9jF9PzPt97WE79h2MbOhwMV0vk+5PEv4/ixT+9y0/w/bmtlXAcxsvpkdDrwDuJnwvVsmi4H35bEPXdolHQx8D/giMCz++79awH5l1U3fnwWa0vZ5azNrydbvOKDjvYQjtormBcoV09XAZyV9XFKNpMGSDpH07j5a/yJgrKTxkgYTvkNIepHwHU7KNoQPhBUAkk4mHEEl53+3pC2ybK8FODlub0vCacwH4mm0YjhX0hbxg/czwB/NrIPw4dgkabtYlL9N9yPeMu33dsBKM1sjaX/gvwro1wzgx5J2i6Mz95a0A+H7ufdJOkHSoPh4v6S6uB/HSRoaT/W+RhhAkMmDwPaSkkeF6f/WmWxH+P5tBbC5pB8SvvPrlRx9/z1wWjwilaRtJH1aUuoIMFO/9yecCl1OhfMC5YrGzJ4FDieM6FpB+Gvyu/TR/0szewL4EXAH4buk9Gt5mgnfC6ySdLOZPUb4PuY+wgfDXoQRbSmthAEe/5H0cobt3QH8N+F7kxcIRwnH9sW+9MB/CKe6nicMSz7NzP4Rp00lHF0+TXhPrgUu7WZdmfb7a8CPJL0O/JDsRzOZ/E+cfxbhw7qZMEjhdeAwwnv2fNyHnxO+B4LwPdkySa8RTikel2nlZvY2YfDK8Ynm3wBHS2qXdGGWft0G3EoYNLKcMOov5+nOPGXsu5ktIIz8vIjw7/UUYVBKyk+Bs+P/0TNj23GEI9aKJzO/YaFzlUTSIYTRZn11JFp2JI0A7gb2zTbQoxxJegdhkNG+Zram2P3pb+V0cZpzzuXFzFYQrqmqKGb2EuGavKrgp/icc86VJD/F55xzriT5EZRzzrmS5N9Blajhw4fb6NGji90N55zrdw899NDLZjYivd0LVIkaPXo0CxYsKHY3nHOu30nKeE2Xn+JzzjlXkrxARZKOkGSSuh2aKukMScn4nL9JqvjQRudcaaqtrUVS1gfThnY7Pfmora0t9u504QWq0yTCVfWTcsx3Bol8NzP7lJlVfGijc640tbe357rdTd63t2hvby/y3nTlBQpQuP14PTCFGE0Ts+F+JelRSYslTZV0OiFleLak2XG+ZZKGx+ffjvM/KumM2DY6piP/XtJSSbMkbVWUHXXOlQyp1xm0JaU/9scHSQSHA7ea2ROSXpE0gRDIOBoYb2brJdWa2UpJ3wYazKxLFltc5mTCjccEPCBpLiFfazdgkpl9WdIfCNH7m4RzSjqVcH8YRo4c2V/76pwrEaVYpEqpT34EFUwCrovPr4uvDyXcSGw9gJmtzLGOeuAmM3sj3gfoT8DBcdozZrYwPn+IUPg2YWaXmNlEM5s4YsQmIy6dcxUm31NvuU7hlUKf+kPVH0FJqiXcqnsvSUa47bIB8/twM2sTzzsIt4h2zlWxSkvx6Y/98SMoOJpw07xRZjbazHYBniHcS+gr8eZgqUIG4VbLme7WeTdwhKSt443ijoxtzjnXr7odxZdjevIxbNiwIu9JV16gwum8m9LabiTcdvxfwGJJi+i8IdslwK2pQRIpZvYw4R40DwIPADPM7JF+7LdzzuU+9Tbt1bxP061cmeubjIHlYbElauLEieZJEs65aiDpITObmN7uR1DOOedKkhco55xzJaksC1SuWCJJcyRtcrjYB9vcsy/X6ZwrDbnignI9CokTKuVooVJTlgWK/GOJ+tIRQEEFKjUC0DlX2nLFBeVzDVBPliu1aKFSU3YFKkss0VaSrouRQjcRrzOSdJqkXyaWnSzpovj8eEkPSloo6f8k1cT21ZKaJC2SdL+kHSUdBHwO+GWc/z3JozRJwyUtS2xjpqRW4E5J20i6NG7rEUmHD9R75Vy1KaUUhL5SifuUr7IrUCRiiYBULNFXgTfNrA44B5gQ572RcD1SyjHAdZLq4vMPmtl4wsWzx8V5tgHuN7N9gLuAL5vZvcBM4LtmNt7M/pmjj/sBR5vZh4FGoNXM9gcaCEVum0wLSTpV0gJJC1asWJH/O+Kc26hHp+hKuL/VrBwLVKZYog8Rs+3MbDGwOD5fATwt6QBJOwB7APcAHyUUsfmSFsbXu8Z1vg38NT7PGkuUw+2JaKTDgLPiduYAg4GMQXsedeRc75VKTE9f9bealdV3JMoeS9TdBbHXAV8E/kHIyjOFP0uuMLMfZJh/nXX+r+gg+3u0ns4CPzht2hvJbgNHmdnj3fTROdcHKvEDvRL3KV/ldgSVLZboIWLSg6RxwN6JZW4inBZMHnndCRwt6R1xmVpJo3JsOz3iaBmdpxKP7ma524CpsSgiad8c23HOFUGvRvH1cPlSixYqNeVWoLLFEo0BtpXUBvyIULAAMLN2oA0YZWYPxrbHgLOBWZIWA7cToo26cx3w3TjQ4T3Ar4CvSnoEGN7Ncj8GBhEik5bG1865EtKbEXyFxgmVcrRQqfGooxLlUUfOuWohjzpyzjlXTrxAOeecK0lVV6Ak7RAvtl0o6T+SnovPV0l6LM91nCbpxPj8cklHx+d9HrHkXDnrbYSQehgj5BFClaGshpn3BTN7BRgPIGkasNrMfiVpNJ3XP2UlaXMzu7gv+iKpxsw6+mJdzpWiVIRQr0wbWvA6qv0C10pRdUdQOdRI+r2kpZJmSUpFJs2RdIGkBcA3JU2TdGZ3K5J0mKT7JD0s6Y8KEU1IWibp55IeBr7Q/7vkqpF/QOfP36vS5QWqq92A35rZWGAVcFRi2hYx5eHXuVYiaThhGPuhZrYfsAD4dmKWV8xsPzO7Lm05jzpyfaa3p9b64lEO++9KV9Wd4svhGTNbGJ+nxxxdX8B6DiAkn98TfwG2AO7LtS4zu4RwS3kmTpzo4/9dr5TCJSTFLAD57r8XqdLlBaqrtYnnHcRU9OgN8idCHl+224EUsi7nnKtKfoqvf9wPfFDSewEUbrnxviL3yVWRUjh6SumL04SFLlNIhFApvVeuKz+C6gdmtkLSZKBF0pax+WzgieL1yrmB11cf/jatT1bjyoxHHZUojzpyzlULedSRc865cuIFyjnnXEnyAtUNSY3xot3FCnFIH8gy30RJF2aZNkfS4+qMVzpa0gxJe/Zv7105a2lpYdy4cdTU1DBu3DhaWlqK3SXnBpwPkshC0oHAZ4D9zGxtvPh2i0zzmtkCwsW42RwX50m5oe966ipNS0sLjY2NNDc3U19fz7x585gyZQoAkyZlu3Khd2pra2lvb8863c4Zgs59bZP2YcOG+T2NXL/xI6jsdgJeNrO1AGb2spk9L+n9ku6VtEjSg5K2k3SIpJw5finyUFnXjaamJpqbm2loaGDQoEE0NDTQ3NxMU1NTj9eZ62LUVGZe1hvykfmmft0Vtb7uo6s+fgSV3Szgh5KeAO4gpD/cF38eY2bzJQ0B3spjXddISs330WwzSToVOBVg5MiRvem7K2NtbW3U19d3aauvr6etra1X6+2vAuCFxfUXP4LKwsxWAxMIBWMFoTB9BXjBzObHeV4zs/V5rO44MxsfH690s81LYt7fxBEjRvTBXrhyVFdXx7x587q0zZs3j7q6ul6tN9cRUn+st6DbpjuXxgtUN8ysw8zmmNk5wDeAz+daRtJtcTDEjP7voatEjY2NTJkyhdmzZ7Nu3Tpmz57NlClTaGxsLHbXnBtQfoovC0m7AxvM7MnYNB5oAz4h6f3xFN92pJ3iM7OPD3BXXYVJDYSYOnUqbW1t1NXV0dTU1KsBEvkcoXR3qs7OGZJxeiGRQrn4UZRL5wUqu22B6ZK2B9YDTxFO910W27ciFKdDi9dFV6kmTZrUbyP2MsmnOHjckBtoHnVUojzqyDlXLTzqyDnnXFnxAuWcc64kVW2BktQRR9stjRfdfkdS1b4fLjOPHMrO3xvX7/rqGoZyewCrE8/fQbgY99wM821ejP5NmDDBXHFde+21NmbMGGttbbW3337bWltbbcyYMXbttdcWpT/Dhg0zYOPDzhnS5XX6Y9iwYf3Wl1J7b1x5AxZYps/pTI3V8EgWqPh6V+AVwu3aJwMzgVZgLmFE353Aw8AS4PC4zGjgH8DlhJsRXkMY1XcP8CSwf5xvf0IKxSPAvcDuufrnBar4xo4da62trV3aWltbbezYsQPWh/A35KbPzczsnCE9X7aXSuG9cZUjW4Gq2lF8klab2bZpbauA3YFPAucBe5vZSkmbA1ub2WsxNPZ+YDdgFGH4+b7AUmA+sAiYAnwOONnMjoiRSG+a2XpJhwJfNbOjMvQpGXU0Yfny5f2y7y4/NTU1rFmzhkGDBm1sW7duHYMHD6ajo2NA+pB+7VGX39dpQ2Haqz1btpdK4b1xlcNH8RXudjNLxTQL+ImkxYRTgTsDO8Zpz5jZEjPbQChSd8a/CJYQjrAAhgJ/lPQocD4wNtMGzaOOSkp/RQ4VyjqP8gd02e6UynvjKpsXqEjSrkAH8FJseiMx+ThgBDDBzMYDLwKD47S1ifk2JF5voPNC6B8Ds81sHPDZxLKuhHnkUHb+3riB4EkSgKQRwMXARWZmGSJdhgIvmdk6SQ2EU3uFGAo8F59P7k1f3cDpj8ihQqUf/ST/b2aLH0pJxhD19VFUKbw3rvJV83dQHYTTcIMIUUZXAf9jZhskTQYmmtk34rzDgb8QBkssAA4gfE8F8Nd4ZISky+PrGySNTk2LNz+8gnBUdgtwvJmN7q5/niThnKsW2b6DqtoCVeq8QDnnqoUPknDOOVdWvEA555wrSV6gEiS9U9J1kv4p6SFJf5P0vgzz3VuM/jnnXDXxAhUpDIe6CZhjZu8xswnAD+i83ol4wS5mdlBxeunyVU45cbW1tUja5MG0oRnba2tri91l5waEF6hODcA6M7s41WBmi4AaSXdLmgk8BiGFIv48RNJcSX+W9LSkn0k6TtKDkpZIek+cb4SkGyXNj48PFmH/qkZLSwuNjY1Mnz6dNWvWMH36dBobG4tapLobDt7e3p4tjitje3t7e851OlcJvEB1Ggc8lGXafsA3zWyT033APsBpQB1wAvA+M9sfmAFMjfP8BjjfzN4PHBWnuX7S1NREc3MzDQ0NDBo0iIaGBpqbm2lqaip215xzBfALdfPzoJk9k2XafDN7AUDSP4FZsX0J4agMQoDsnom/eIdI2tbMVidXlJbF14fdry5tbW3U19d3aauvr6etra1IPQr68ojHj55cNfAjqE5LgQlZpr2RpR3yizraDDjAzMbHx87pxQk8i6+vlGpOXKbTdb3J2HOu0nmB6tQKbBmPYgCQtDdwcB+sexadp/uQNL4P1umyKMWcuFwFJeMgiSztqQgjL1Ku0vkpvihm8B0JXCDp+8AaYBlwcx+s/nTgtzENfXPgLsL3Vq4flFtOXHeFxqYNXD+cKzUedVSiPOrIOVctPOrIOedcWfEC5ZxzriRVfIHKN76on7a9LN6qwznnXIEqukDlE1/kylOpRRlliyvKFVtUyjFGpfYeuyqU7dqMSngAHwHuytAu4JfAo4QLao+J7YcAc4E/A08DPyPc7v3BON974nwjgBuB+fHxwdi+A2FI+VJCWsRyYDjwI+CMxPabCMkUWfs+YcIEc5lde+21NmbMGGttbbW3337bWltbbcyYMXbttdcOyPbDr03uti7OGdLjdfdEb9dT7PfYVRdggWX6DM/UWCkPwvDu8zO0HwXcDtQQjqb+BewUC9Sq+HxLwm3az43LfBO4ID6/FqiPz0cCbfH5hcAP4/NPAxYL1Gjg4di+GfBPYIfu+u4FKruxY8daa2trl7bW1lYbO3bsgGy/GgpUsd9jV12yFahqvQ6qHmgxsw7gRUlzgfcDr9GL6CLgQ8DnAczsFknt8fkySa9I2pdQEB8xs1fSO+VRR/kphSij/owaKoUYo1J4j52r6O+g6D6+KJs+iy5KMwOYDJwMXJppBvOoo7yUQpRR+l96/bnunjx6qxTeY+cqvUBliy9aBRwjqUbSCMKRz4MFrDdbdNFdwH/Ftk8CwxLL3AR8gnCkdlvhu+JSih1llK0AdDtIIsf09Bij/upjvor9HjsHFR51ZJY1vugMYFtgEeF7ou+Z2X8k7ZHnqrNFF50LtEhaCtxL+G4r1Ze3Jc0GVsVTi66HSjHKKJ+CUE6xRaX4Hrvq41FHA0TSZsDDwBfM7Mlc83vUkXOuWnjUURFJ2hN4Crgzn+LknHOuwk/xlQozewzYtdj9cM65cuJHUM4550pSxRQoSR2SFkp6VNJfJG1f7D65ypQp1ig9yqiUIoucK1cVU6CAt+I1SeOAlcDXi90hV1lSw8Xb29szXneUfN3e3r7Jcs65wlRSgUq6D9gZwjVKku6XtFjSTZKGxfY5ks6XtEBSm6T3S/qTpCclnZdakaSbYwr60rTrqVZLapK0KK5/x9i+Y9zOovg4KLYfL+nBeJT3f5JqBvQdcc65MlNxBSp+8H8UmBmbrgS+b2Z7E+KKzknM/nYc2ngxISD268A4YLKkHeI8p1hIQZ8InJ5o3wa438z2IVwH9eXYfiEwN7bvByyVVAccQwiVHQ90EEJo0/t+aiyYC1asWNHr98L1vUKOhpIX6TrnCldJBWorSQuB/xDy7m6XNBTY3szmxnmuIKRGpKSK2BJgqZm9YGZrCUnmu8Rpp0taBNwf23aL7W8Df43PHyIEwkJIUP9fADPrMLNXCQVzAjA/9vGjZBjV51FHpa+Q6wb7IwbJuWpSScPM3zKz8ZK2JkQJfZ1QkLqTzNlLz+DbXNIhhGDYA83sTUlzgMFxnnXW+enTQffvpYArzOwH+e6MKz09LTZepJzrmUo6ggLAzN4kRBF9B3gDaJd0cJx8AuF+T/kaCrTH4rQHcEAey9wJfBXC6cZ4FHcncLSkd8T2WkmjCuiHKzG5svb6KlPPuWpWSUdQG5nZIzEnbxJwEnBxPLJ6mpAmnq9bgdMktQGPE07z5fJN4BJJUwhHVl81s/sknQ3MipFH6whHeMsL6IsrEdmOiMopa8+5cuBZfCXKs/icc9XCs/icc86VFS9QzjnnSlLVFKhEFFLqMboH65gs6V193ztXTC0tLYwbN46amhrGjRtHS0tLzmVScUceceRc/6nIQRJZvBUvku2NycCjwPO9744rBS0tLTQ2NtLc3Ex9fT3z5s1jypQpAFlvzpcatWdmMG1ol0ETuS7MleTDzp3LU9UcQaWTtK2kOyU9LGmJpMNj++gYffT7GG80S9JWko4mpElcE4/AtpL0Q0nzY0DtJYqfTpJOl/RYjFe6TtJmMUJpRJy+maSnUq9d8TQ1NdHc3ExDQwODBg2ioaGB5uZmmpqait0156pe1Yzik9RBSIwAeAb4ArC1mb0maThhCPluwCjCzQUnmtlCSX8AZprZ1fFC3TPNbEFcZ62ZrYzPrwL+YGZ/kfQ8MMbM1kra3sxWSToHeNXMLpB0GPAVMzsqrY+nAqcCjBw5csLy5T4Kvb/V1NSwZs0aBg0atLFt3bp1DB48mI6OjozLpB9BMe3VTaZ1p1p+55zLl4/i60w7H29mRxLSHX4Sr5e6gxAuu2Oc9xkzWxifJ2OM0jVIekDSEkLE0djYvphwpHU8sD62XQqcGJ+fAlyWvjKPOhp4dXV1zJs3r0vbvHnzqKur6/E605PO01PPnXP5qaYCle44YAQwIX439SKdMUbJ2KOMMUaSBgO/A442s72A3yeW/zTwW0JY7HxJm5vZs8CLkj4C7A/8ve93yRWqsbGRKVOmMHv2bNatW8fs2bOZMmUKjY2NWZfpTaHxIuVc/qppkES6ocBLZrZOUgPh1F4urwPbxeepYvSypG2Bo4EbYlLELmY2W9I84FhgW2AVMAO4GrjKzDKfP3IDKjUQYurUqbS1tVFXV0dTU1PWARJJkrBzhnQ5recRR871nWouUNcAf4mn5xYA/8hjmcsJsUlvAQcSjpoeJSSoz4/z1ABXxww+ARea2ao4bSbh1N4mp/dc8UyaNCmvgpSUPBLyiCPn+kfVDJIoBZImAueb2cG55vWoI+dctcg2SKKaj6AGlKSzCCnnm9yo0Dnn3KaqeZDEgDKzn5nZKDObl3tu55xzFV2gJL0zXij7T0kPSfpbvK36X7PMP0PSnvH5snh9VPo80ySd2d99d30vFU+0yb2c0uKKPLrIudJQsQUqpjrcBMwxs/eY2QTgB3Re67QJM/uSmT3Ww+356dIS197envXapGzXLbW3txe5185Vr4otUEAD4bbsF6cazGwRcDewraQbJP1D0jWJiKI5cSBDF5IaJT0Rh43vnmifI+kCSQuAb0qaIGluPFq7TdJOifl+LunBuJ6cgyRc/vJJbyin7Tjngkr+q38cIQUik30JqQ/PA/cAHwQyfjckaQLhWqbxhPfr4bT1bmFmEyUNItxO/nAzWyHpGKCJkBoBsLmZ7S/pU8A5wKEZtpWMOipgV11/Fg8vTM4VRyUXqO48aGb/BpC0kBBllG3wwsHATWb2Zpx/Ztr06+PP3QlF8fb4gVYDvJCY70/xZ9boJDO7BLgEwjDzvPfG5ZXQ0NNCk1q3FyrnBlYlF6ilhHSHTHJGGRXgjfhTwFIzOzDHNnu7PZdmoK7l82sGnRtYlfwdVCuwZTxtBoCkvQlHRIW4Czgi3l5jO+CzWeZ7HBgh6cC4rUGSxmaZ1xVJxlF8WdoleXSRc0VUsX/Jm5lJOhK4QNL3gTXAMuDmAtfzsKTrgUXAS3RGGqXP97bCPaMujDFHmwMXEI7kXAno7gjI44qcKz0edVSiPOrIOVct/H5QzjnnyooXKOeccyWp4guUJJN0deL15pJWZIs7yrGu8fE6JldELS0tjBs3jpqaGsaNG0dLS0vOZVIxR6lYI48wcq70VXyBIgwDHydpq/j6Y8BzPVzXeKCgAuURSH2rpaWFxsZGpk+fzpo1a5g+fTqNjY05i1Qq5gjwCCPnykQ1FCiAvxFuww4wCWgBkLSZpCcljUi8fkrSCElfkPSopEWS7pK0BfAj4BhJCyUdI2kbSZfGCKNHJB0e1zNZ0s59smUAAB4oSURBVExJrcCdkq6UdESqMzFe6fCBfAMqRVNTE83NzTQ0NDBo0CAaGhpobm6mqakp4/z5XFzrF+A6V5qqpUBdBxwraTCwN/AAgJltINyCPXWPpkOBRWa2Avgh8HEz2wf4nJm9HduuN7PxZnY90Ai0mtn+hOy/X0raJq5rP+BoM/sw0AxMBohD0A8CbknvZExaXyBpwYoVK/r8TagEbW1t1NfXd2mrr6+nra0t6zLZClDyOijnXOmpigJlZosJ8UKTCEdTSZcCJ8bnp9B5O/Z7gMslfZkQW5TJYcBZMS5pDjAYSIXo3W5mK+P25wK7xSO1ScCNZrY+Qz8vMbOJZjZxxIgRBe9nNairq2PevK6pVPPmzaOuri7rMtkupUimmTvnSk9VFKhoJvAr4um9FDN7FnhR0keA/YG/x/bTgLOBXYCHJO2QYZ0CjopHVOPNbKSZpf6UfyNt3iuB44GTCUXR9UBjYyNTpkxh9uzZrFu3jtmzZzNlyhQaGxszzp9PAfIi5VxpqqYv8C8FVpnZEkmHpE2bQTjVd5WZdQBIeo+ZPQA8IOmThEL1OrBdYrnbgKmSpsbkin3N7JEs278ceBD4T0/vOeVg0qRJAEydOpW2tjbq6upoamra2N4dSdg5QzzCyLkyUTUFKqaXX5hl8kzCqb3LEm2/lLQb4SjpTkLU0b/oPKX3U+DHhDijxZI2A54BPpNl+y9KaqPAqCW3qUmTJuVVkJKSR0kea+RcefCoIyDepPB8M+u3GwlK2hpYAuxnZq/mmt+jjpxz1cKjjrKQdBZwI+F28P21jUOBNmB6PsXJOedcFZ3iy8bMfgb8rJ+3cQcwqj+34Zxzlabqj6AKJel8SWckXt8maUbi9a8lfbs4vasu3UUe1dbWwrShm0Qa9SQmyTlXHF6gCncP4UJb4sCI4UDyxoQHAffmWolHIPVOrsijVJRRMtKopzFJzrkiSV2s6I/8HsC7gGfj872AK4BZwDBgS2AVIXFiPvAocAmdg1HmEEb9LQC+0912JkyYYC67sWPHWmtra5e21tZWGzt2rAEGmJ0zJPw0M6DbZZxzxQMssAyfgz6KrwckPQN8GPgkYRj6zsB9wKuE77MOt5giIekq4A9m9hdJc4DHzOxrWdZ7KnAqwMiRIycsX768v3elbNXU1LBmzRoGDRq0sW3dunUMHjyYDRs2AIRrns59beP0zTbbLOsyHR0dA9d551wXPoqvb91LOJV3EKEw3Zd4fQ/QIOkBSUuAj9D1FOD12VZqHnWUt0Iij1J/hPUkJsk5VzxeoHom9T3UXoTTePcDB9L5/dPvCEGxewG/J2T0paRHILkeKDTyqKfLOOeKx7+o75l7gTOBpy1EI62UtD3hSOnLcZ6XJW0LHA3cUJxuVq7uIo8mTZq0MaU8FWmUPJXdk5gk59zA8++gekBSDdAOXGhmZ8e2y4EDzWx3SecRUsv/AzwBLDezafE7qDPNLGdEhCdJOOeqRbbvoLxAlSgvUM65auGDJJxzzpUVL1DOOedKUtUUKEkm6erE680lrZD01x6ub4akPXuw3CE93WYl8ugh51w2VVOgCMO7x0naKr7+GPBcT1dmZl8yv/Fgr/Q2eqi2thZJGx9MG7rxeXoGn3Ou/FRTgQL4G/Dp+HwSidu/S5om6czE60cljZa0jaRbJC2KbcfE6XPifaSQ9AlJD8d57oxt+0u6T9Ijku6VtPuA7WWZaGpqorm5mYaGBgYNGkRDQwPNzc00NTV1mS81ZDxde3t7egzVxufJDL5syzvnSlu1FajrgGMlDQb2Bh7IY5lPAM+b2T5mNg64NTlR0gjCxbhHmdk+wBfipH8AB5vZvoRsvp/k2pCkUyUtkLRgxYoVee9UuWpra6O+vr5LW319PW1tbZvMmzxS2njElEO+8znnSlNVFSgzWwyMJhw9/S3PxZYAH5P0c0kH26Y3HDwAuMvMnonbWBnbhwJ/lPQocD5d446y9a+qoo4KjStKf+SS73zOudJUVQUqmgn8isTpvWg9Xd+PwQBm9gSwH6FQnSfph3lu58fA7HjU9Vm6xh05PHrIOde9aow6uhRYZWZLJB2SaF8GfAZA0n7AmPj8XcBKM7ta0irgS2nrux/4naQxZvaMpNp4FDWUzkEYk/trZ8pZd3FFSd0dBSVP4dk5QzaJOMq1vHOudFVdgTKzfwMXZph0I3CipKWE76aeiO17Ab+UtAFYB3w1bX0r4m0y/hRvYPgSYYTgL4ArJJ0N3NIvO1MBUtl5PZGp8Ni0XnbIOVcyPOqoRHnUkXOuWnjUkXPOubLiBco551xJ8gLVDUmr48/RMSrpvMS04ZLWSboobZmFkq5La5sp6cTE699L+m5/998558qZF6j8PUNnCgWEC3KXJmeQVAfUAAdL2iYx6XTgXEnbSzoI+ADh2ijXWzHeyKONnKs8XqDy9ybQloo3Ao4B/pA2zyTgKmAWcHiq0cyWAZcQRvb9L/ANM1vf3x2uZF2Gl6dFGznnKoMXqMKkopJ2ATqA59OmHxPnaSEUq6RfEWKTHjWzuzKtvNqijnrLY4ycq2xeoApzK+Eap2OB65MT4pHVy2b2L+BOYF9JyfNOexPe7z3i9VKbqLaoo97ySyScq2xeoApgZm8DDwHfAW5ImzyJUHyWAf8EhgBHAcSC9DvgeOBJ0i72dc45t6mqS5LoA78G5prZytQppliAvgjsZWbPx7YG4L8JSedfAZ40szmSngDul/QHM/PzeD2UPHqS1CXayDlXGfwIqkBmttTMrkhrPhh4LlWcoruAPSWNAr4PnBmXfx64gDBgwvXWtFcxM1auXJl7XudcWfGooxLlUUfOuWrhUUfOOefKihco55xzJaliC5Sk8yWdkXh9m6QZide/lvTtAtZ3SEyBcM45NwAqtkAB9wAHwcZRdsPpetv1g4B7C1jfIan15UuSj5LsB7W1tR5x5FwVqOQP0HvpzLsbCzwK7CRpGCG2qA4wSXOBbYGXgclm9oKk04HTCLeBfww4K77ukHQ8MBX4B3AxMDJu4wwzu0fSNOA9wK7AvyQ9HufZNf68wMwy3TDR5SApEWs0BDMjMdTfL9x1rsJUbIEys+clrZc0knDkcx+wM3Ag8CrQRihgh8e74h4DNAGnEArSGDNbK2l7M1sl6WJgtZn9CkDStcD5ZjYvbuM2QtED2BOoN7O3YsHaA2gAtgMel/S/ZrZuQN4I55wrUxVboKJ7CcXpIOB/CAXqIEKBeg44DLg9/hVeA7wQl1sMXCPpZuDmLOs+lHCdU+r1EEnbxuczzeytxLy3mNlaYK2kl4AdgX+nrzDeOv5UgJEjR6ZPdmTO3/NMPucqU6UXqNT3UHsRTvE9S4gpeg2YA+xsZgdmWO7TwIeAzwKNkvbKMM9mwAFmtibZGD8s30ibd23ieQdZ3nczu4SQes7EiRP9fFUGydN63bU558pfJQ+SgHAE9RlgpZl1mNlKYHvCab4WYISkAwEkDZI0Ng6o2MXMZhMSIIYSvqN6nXCKLmUW4bso4vLjB2KHqln6d0zJiCP//sm5ylPpBWoJYfTe/Wltr5rZS8DRwM8lLQIWEo62aoCrJS0BHgEuNLNVwF+AI+Mdcw8m3IRwoqTFkh4jDKJwA8DMPOLIuSrgUUclyqOOnHPVwqOOnHPOlRUvUM4550pS2RYoSUdIMkl75DHvDEl79sE2R0v6r8TriZL8olvnnOsHZVugCHewnRd/dsvMvmRmj/XBNkcDGwuUmS0ws9P7YL1Vr7a2FkmbPFKRRsmHxxs5Vx3KskDFC2LrgSnAsbHtEElzJN0g6R+SrlG8OCa2T4zPV0v6paSlku6QtH+c/rSkz8V5Rku6W9LD8ZHK4PsZcHAcyfetuM2/xmVqJd0cR/XdL2nv2D5N0qWJbVRFQSv0uqT29nbMbJMHsElbiDrq+bacc+WhLAsUcDhwq5k9AbwiaUJs3xc4gxA1tCvwwQzLbgO0mtlYwrVN5wEfA44EfhTneQn4mJntBxwDpE7jnQXcbWbjzez8rqvlXOARM9sb+H/AlYlpewAfB/YHzpE0qGe77Zxz1aNcC9Qk4Lr4/Do6T/M9aGb/NrMNhOuaRmdY9m3g1vh8CTA35uItScw/CPh9vBbqj4SCl0s9cBWAmbUCO0gaEqfdYmZrzexlQvHbMdMKJJ0qaYGkBStWrMhjk6Ut0ym7bI/erNs5V5nKLupIUi3wEWAvSUa4sNaAW8gvUmiddV78tSG1jJltSNwe41vAi8A+hCK+ZpO1FKYqo44Kucau0EKTXLcXKecqUzkeQR0NXGVmo8xstJntAjwDHNyH2xgKvBCPxE4gFEHYNO4o6W7gOAjfhwEvm9lrfdinstKTC8CzHVmlt6XijXqzLedc6SvHAjUJuCmt7UbyGM1XgN8BJ8UIpD3oDH9dTLgn1CJJ30pbZhowQdJiwmCKk/qwPxUv0wCJZKRR8uHxRs5VB486KlEedeScqxYedeScc66seIFyzjlXkrxAOeecK0k5C5SkrSTNlVSTa95yJGmypIvi89MkndjNvIckUiUK2cYyScMlbSHprsRwdhe1tLQwbtw4ampqGDduHC0tLTBtqMcaOVfF8jmCOgX4k5l19HdneqqviqeZXWxmV3YzyyGEmxr2dP1vA3cS0ilc1NLSQmNjI9OnT2fNmjVMnz6dxsZGgE1ijZxz1SOfAnUc8GfImXf3UUmPSFoSs+e2TF+RpPfHrLqFMQ/v0dheE1/Pj9O/ksf2lkn6uaSHgS9IOkzSfTE7748KeX1I+pmkx+J6f9XdjsbcvDPj89MTy10naTThrrnfiv0/WNIISTfGfs+X9MG47A6SZink/c0AkleS3hzfUxc1NTXR3NxMQ0MDgwYNoqGhgebm5mJ3yzlXZN0WKElbALua2bJE8yZ5d5IGA5cDx5jZXoSkhK9mWOVlwFfMbDwhUSFlCuE27O8H3g98WdKYbNtLLPdKzMu7AzgbODS+XgB8W9IOhIy9sTEj77zu9jfNWcC+cbnT4ntwMXB+zOK7G/hNfP1+4ChgRlz2HGBezPu7CRiZWO+jcR83UWlRR/lqa2ujvr6+S1v6a+dc9cl1BDUcWJXWlinvbnfgmRjeCnAF8KHkQpK2B7Yzs/ti07WJyYcBJ0paCDwA7ADs1s32Uq6PPw8gFLB74jpOAkYBrxJiipolfR54M8f+Ji0GrpF0PLA+yzyHAhfFbc4EhsQjtw8BVwOY2S3AxvNU8VTp25I2SaQws0vMbKKZTRwxYkQBXS1vdXV1zJs3r0tb+mvnXPXJVaDeAganteWVK1cgAVPjkcl4MxtjZrPy2N4bieVvTyy/p5lNMbP1hATxG4DP0BkSm49PA78F9gPmZxnYsBlwQGK7O5vZ6jzWvSW9z/erGI2NjUyZMoXZs2ezbt06Zs+ezZQpU4rdLedckXVboMysHaiJp/C68zgwWtJ74+sTgLlp61oFvC7pA7Hp2MTk24CvKt6GQtL7JG2T5z4A3E841fjeuPw2cR3bAkPN7G+EANh98lmZpM2AXcxsNvB9QjbftmyaxTcLmJpYbnx8ehfxxoaSPgkMS8yzAyGnb10B+1fRJk2aRFNTE1OnTmXw4MFMnTqVpqYmgE1y95xz1SOfo59ZhFtJ3JFtBjNbI+lk4I/xSGM+4fuadFMIt7HYQChgr8b2GYRTdw/HQRArgCPy3QkzWyFpMtCSGJxxNqGg/DkWWAHfznOVNcDVkobG5S40s1WS/gLcIOlwQmE6HfhtzN/bnFCYTiPcG6pF0lLgXuBfiXU3EJLXXcKkSZOYNCk9TnESK6cVozfOuVKQM4tP0n7At8zshF5vTNo2dQpM0lnATmb2zd6ut5xI+hNwVuL7uow8i885Vy2yZfHlPIIys4clzZZU0wfXQn1a0g/idpcDk3u5vrISR0XenKs4Oeecy3OAg5ld2hcbM7Pr6Rx5V3XihbrdXQjsnHMuqrgsPkkd8ULa1GO0pHvjtNGpi4N7sX6T9OvE6zMlTcuxTLcRSq6r2trazhsWThta7O4454qk4goU8FZi2Pd4M1tmZj2OJ8pgLfB5ScPzXSBbhFKWoetVr729vfOGhc65qlWJBWoTkja5NkkhJPZmSbfH2KRvSPp2jGu6X1K2lNL1wCWEYevp6xwtqTXGI90paWRsT0YozZF0gaQFQFUNEMklpljl3e6cq2yVWKC2SpzeS781fLpxwOcJ0UNNwJtmti9wH9DdKbnfAsfFYehJ04ErYjzSNcCFWZbfIiZG/DrZWK1RR0lejJxzKZVYoJKn+I7MMe9sM3vdzFYQrsn6S2xfQtdIpS7M7DXCYIfT0yYdSGeE01WE68cyyThQpFqjjpL8tJ5zLqUSC1QhkjFKGxKvNwCbS9olcTR2WtqyFxAuPC4k8SLljdyzVJ9sxcmLlnPVqdoLVLfM7NnE0djFadNWAn8gFKmUe+mMcDoOuHtgelp5No7ic85VLR9F1ju/Br6ReD0VuEzSdwlxTScXpVdlzo+YnHOQR9SRKw6POnLOVYtsUUd+is8551xJ8gLlnHOuJA14gUpEES2VtEjSd+L9l0qepPGSPlXsfpSrVIQR04ZuHASR6VFbm+0aaedcNSlGYUhdpzQW+BjwSeCcIvSjJ8YDBRUojzPqlIowAjZGGWV6tLe3F7mnzrlSUNQjFzN7CTgV+IaCwZIuk7QkRg41AEiqkfQrSY/GGKGpsX1ZKhNP0kRJc+LzaZKukHS3pOWSPi/pF3G9t6rzzr0TJM2V9JCk2yTtFNvnSPq5pAclPSHp4HirjB8Bx8QjwGMk7S/pvtjXeyXtHpefLGmmpFbgTklXStp4A0ZJ1yjc9LCi9cUwcR9q7lz1Kvpf92b2tKQa4B3A8aHJ9pK0BzBL0vsIw7VHA+PNbH03OXlJ7yHcvXZPQnTRUWb2vRh/9GlJtxCiiQ6Pd+Q9hhB3dEpcfnMz2z+e0jvHzA6V9ENgopl9A0DSEODg2KdDgZ8AR8Xl9wP2NrOVkj5MyO67OcYjHQSclN5hSacSCjYjR47M+z0sZT0tMF6YnHNFL1Bp6glFAzP7h6TlwPuAQ4GLzWx9nLYyj3X93czWSVpCuIX7rbE9FWO0OyGL7/b4YVgDvJBY/k/x50Nkjz0aClwhaTfAgEGJaben+mlmcyX9TtIIQgG7MbUvSWZ2CSGIlokTJ1bE+P/kZQyFFJ3Ucl6onKteRS9QknYFOoCXerD4ejpPUw5Om7YWwMw2SFpnnZ+UGwj7LWCpmR2YZd2p2KMOsr9PPybk+R0paTQwJzEtPc7oSsIR4rFUyQW8fXGNnV+n51z1Kup3UPGI4mLgolhA7iZEBBFP7Y0EHgduB76SGnCQOMW3DJgQnx9FYR4HRkg6MK5zkKSxOZZ5Hdgu8Xoo8Fx8PjnHspcDZwCY2WMF9rVipI6IuhvFN2zYsCL30jlXCopRoFK3w1gK3AHMAs6N034HbBZPy10PTDaztcAM4F/AYkmLgP+K858L/Ebh3kodhXQi3n79aODncZ0LCd8NdWc2sGdqkATwC+Cnkh4hx9Gomb0ItAGXFdLPSrLxJoTTXu12FN/KlfmcwXXOVTqPOhogkrYmfP+1n5m9mmt+jzpyzlULedRR8cQRfm3A9HyKk3POuRIYJFENzOwOYFSx++Gcc+XEj6AASavTXk+WdFGx+lNuUhFGhTzS44483sg5l84LVB9QWpxR+ut8lytXqQijQh7QNe7I442cc+kq4gOyP8Xrmy4FhhNvQmhm/5J0ObAG2Be4Jw59T76+kjCEfmvgn8ApZtYe45gWEi5KbiHc9LCoJJXF9Ubl0k/nXN/wAhVsJWlh4nUtMDM+nw5cYWZXSDoFuBBI5eq9GzjIzDpiwUq+XgxMjSkSPyIE4p4Rl9si04iVYkYdlUJiQyn0wTlXOvwUX5BKWB9vZuOBHyamHQhcG59fRTjySfmjmXWkv455e9ub2dzYfgXwocR812fqhJldYmYTzWziiBEjerVDhSr0FF366bqB6INzrrp4geqd9Dij9Nf5Lueccy6NF6jc7iXk50GIYbo71wLxWqd2SQfHphOAud0sUlR9cXRS8Ci+tGXyiTfyoyjnqot/B5XbVOAySd8lDpLIc7mTgItjgsTTBSxXdnpaOGxa3/bDOVdZPOqoRHnUkXOuWnjUkXPOubLiBco551xJ8gJVxQqNKEqPJ/KoIudcf6q6AqWg6vY7k0IjiiD7tUoeVeSc62tV8UEtabSkx2P80KPAf0uaL2mxpHMT850Y2xZJuiq2jZB0Y5x/vqQPxvZpki6VNEfS05JOz7YeSdtJekbSoDh9SPJ1H+9rX6+y5FTDPjrnqmuY+W6Eod9DCHfS3R8QMFPSh4BXgLMJUUUvq/O28r8BzjezeZJGArcBdXHaHkAD4Tbwj0v6X+B96esxs9djBt+ngZsJ11X9yczWJTvYV1FHxfoA98LhnOtL1VSglpvZ/ZJ+BRwGPBLbtyUUr30IUUUvA5hZ6r7jhxJu855azxBJ28bnt8Rb0q+V9BKwI/CRLOuZAXyPUKBOBr6c3kEzuwS4BMIw857uaL6XDvR1QRmoSxa8EDpXHaqpQKXihQT81Mz+LzlR0tQsy20GHGBma9LmB1ibaOqgm/fTzO6JpxoPAWrM7NHCuu+cc9WlKr6DSnMbcErqKEjSzpLeAbQCX5C0Q2xPneKbRUiTILaPz7H+bOsBuJIQPHtZX+xIJoUexfQmnqjQqKK+4heXO1cdqukICgAzmyWpDrgvfuiuBo43s6WSmoC5kjoIpwAnA6cDv1W4fcbmwF3Aad2sP9t6AK4BziPcB6roevJB7/FEzrmB4lFHA0jS0cDhZnZCrnk96sg5Vy2yRR15gRogkqYDnwQ+ZWZP5DH/CmB5N7MMB17uo+6Vqkrfx0rfP/B9rAQDsX+jzGyTm+B5gSpTkhZk+oujklT6Plb6/oHvYyUo5v5V4yAJ55xzZcALlHPOuZLkBap8XVLsDgyASt/HSt8/8H2sBEXbP/8OyjnnXEnyIyjnnHMlyQuUc865kuQFqoRJqpV0u6Qn48+MeUKSTorzPCnppER7k6RnJa0euF7nR9In4i1QnpJ0VobpW0q6Pk5/QNLoxLQfxPbHJX18IPudr57un6QdJM2WtFrSRQPd70L0Yh8/JukhSUviz48MdN/z0Yv921/SwvhYJOnIge57vnrzexinj4z/V8/slw4WcsM6fwzsA/gFcFZ8fhbw8wzz1AJPx5/D4vNhcdoBwE7A6mLvS1qfa4B/ArsCWwCLgD3T5vkacHF8fixwfXy+Z5x/S2BMXE9NsfepD/dvG6CeEKd1UbH3pZ/2cV/gXfH5OOC5Yu9PH+/f1sDm8flOwEup16X06M0+JqbfAPwROLM/+uhHUKXtcOCK+PwK4IgM83wcuN3MVppZO3A78AkAM7vfzF4YkJ4WZn/gKTN72szeBq4j7GtSct9vAD6qEJ54OHCdma01s2eAp+L6SkmP98/M3jCzecAaSltv9vERM3s+ti8FtpK05YD0On+92b83zWx9bB8MlOpItN78HiLpCOAZwr9hv/ACVdp2TBSY/xDuN5VuZ+DZxOt/x7ZSlk+fN84Tf9lfBXbIc9li683+lYu+2sejgIct3FetlPRq/yR9QNJSYAlwWqJglZIe76PC3SC+D5xLP6q6NPNSI+kO4J0ZJjUmX5iZSSrVv8ScK5ikscDPCTcQrShm9gAwNt454QpJf7e0e8qVuWmEO42vVj/eQNQLVJGZ2aHZpkl6UdJOZvaCpNS57HTPAYckXr8bmNOnnex7zwG7JF6/O7ZlmuffkjYHhgKv5LlssfVm/8pFr/ZR0ruBm4ATzeyf/d/dgvXJv6GZtcVBSuOAUrs9QW/28QPA0ZJ+AWwPbJC0xsz6dGCPn+IrbTOB1Ki8k4A/Z5jnNuAwScPiKL/DYlspmw/sJmmMpC0IX77OTJsnue9HA60WvpWdCRwbRxeNAXYDHhygfuerN/tXLnq8j5K2B24hDAC6Z8B6XJje7N+Y+GGOpFHAHsCygel2QXq8j2Z2sJmNNrPRwAXAT/q6OAE+iq+UH4Tz2XcCTwJ3ALWxfSIwIzHfKYTBAk8BJyfaf0E4r7wh/pxW7H1K9O1TwBOEUUSNse1HwOfi88GE0UFPEQrQrollG+NyjwOfLPa+9MP+LQNWEm6m+W/SRlaVyqOn+wicDbwBLEw83lHs/enD/TuBMHBgIfAwcESx96U//p8m1jGNfhrF51FHzjnnSpKf4nPOOVeSvEA555wrSV6gnHPOlSQvUM4550qSFyjnnHMlyQuUc865kuQFyjnnXEn6/w5qrJHVV6MZAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sorted_idx = result.importances_mean.argsort()\n",
"\n",
"fig, ax = plt.subplots()\n",
"ax.boxplot(result.importances[sorted_idx].T,\n",
" vert=False, labels=df_item_features.columns[sorted_idx])\n",
"ax.set_title(\"Permutation Importances (train set)\")\n",
"fig.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment