Created
October 9, 2021 23:46
-
-
Save nogawanogawa/70bd4970e8b69b5f4b220d7e1dd8b900 to your computer and use it in GitHub Desktop.
replay_method_sample.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "replay_method_sample.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyOuiT6HnrlmBg0hyGxEMoK/", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/nogawanogawa/70bd4970e8b69b5f4b220d7e1dd8b900/replay_method_sample.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "4Gi-po9aa4DA" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"from scipy import stats\n", | |
"from sklearn.metrics import precision_score " | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "XphYUBCBa5tM" | |
}, | |
"source": [ | |
"np.random.seed(seed=42)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ex6cosC5a62F" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "YoMGcc3da-3N" | |
}, | |
"source": [ | |
"## Step1\n", | |
"ランダムにランキングが生成され、それをランダムにクリックされたときのログを生成する" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "bIBHjGLQa9NU" | |
}, | |
"source": [ | |
"# アルファベット20文字をアイテムプールとする\n", | |
"item_list = ['A', 'B', 'C', 'D', 'E', \n", | |
" 'F', 'G', 'H', 'I', 'J']\n", | |
"\n", | |
"T = 100000" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 419 | |
}, | |
"id": "hRXgr1Dma9lb", | |
"outputId": "1284a9cf-42e1-4a4f-87dc-3d3924b3bd60" | |
}, | |
"source": [ | |
"I = pd.DataFrame({'item_i' : np.random.choice(item_list, T)})\n", | |
"\n", | |
"#check log\n", | |
"I" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>item_i</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>G</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>D</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>H</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>E</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>G</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99995</th>\n", | |
" <td>E</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99996</th>\n", | |
" <td>D</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99997</th>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99998</th>\n", | |
" <td>D</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99999</th>\n", | |
" <td>A</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>100000 rows × 1 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" item_i\n", | |
"0 G\n", | |
"1 D\n", | |
"2 H\n", | |
"3 E\n", | |
"4 G\n", | |
"... ...\n", | |
"99995 E\n", | |
"99996 D\n", | |
"99997 A\n", | |
"99998 D\n", | |
"99999 A\n", | |
"\n", | |
"[100000 rows x 1 columns]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 419 | |
}, | |
"id": "Y_VBURBjbEsc", | |
"outputId": "edb91390-baf2-424b-f066-5cc92e783ed8" | |
}, | |
"source": [ | |
"J = pd.DataFrame({'item_j' : np.random.choice(item_list, T), 'click': np.random.binomial(1,0.2,size=T)})\n", | |
"\n", | |
"#check log\n", | |
"J" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>item_j</th>\n", | |
" <th>click</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>G</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>A</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>D</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>G</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>E</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99995</th>\n", | |
" <td>I</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99996</th>\n", | |
" <td>J</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99997</th>\n", | |
" <td>H</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99998</th>\n", | |
" <td>G</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99999</th>\n", | |
" <td>E</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>100000 rows × 2 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" item_j click\n", | |
"0 G 1\n", | |
"1 A 0\n", | |
"2 D 0\n", | |
"3 G 0\n", | |
"4 E 1\n", | |
"... ... ...\n", | |
"99995 I 0\n", | |
"99996 J 0\n", | |
"99997 H 0\n", | |
"99998 G 1\n", | |
"99999 E 0\n", | |
"\n", | |
"[100000 rows x 2 columns]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "_LqXHTlybGo7" | |
}, | |
"source": [ | |
"## Step2\n", | |
"Step1で作成されたログについて、適合率(Precision)を計算" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "V8a3AfaUbLys", | |
"outputId": "8100a38f-6d39-4049-a9b9-713eaba0a047" | |
}, | |
"source": [ | |
"df = pd.concat([I, J], axis=1)\n", | |
"df[df['item_i'] == df['item_j']]['click']" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0 1\n", | |
"21 0\n", | |
"39 0\n", | |
"41 0\n", | |
"56 0\n", | |
" ..\n", | |
"99894 0\n", | |
"99915 0\n", | |
"99916 1\n", | |
"99919 0\n", | |
"99934 0\n", | |
"Name: click, Length: 9962, dtype: int64" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 6 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "yd5HDB2SbIPg", | |
"outputId": "eb0f2444-893c-472c-b83d-8133311db7c9" | |
}, | |
"source": [ | |
"y_pred = df[df['item_i'] == df['item_j']]['click']\n", | |
"y_true = [1] * len(y_pred)\n", | |
"\n", | |
"precision_score(y_pred, y_true) #予測したもののうち、クリックされた回数の割合" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.19795221843003413" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Cb_LJbJmbPbU" | |
}, | |
"source": [ | |
"## Step3\n", | |
"新しくランキングを作る" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 419 | |
}, | |
"id": "eltIZ0SabMaC", | |
"outputId": "41b0e0f2-fff2-4f63-c272-6f9129dd8495" | |
}, | |
"source": [ | |
"J_ = pd.DataFrame({'item_k' : np.random.choice(item_list, T)})\n", | |
"\n", | |
"#check log\n", | |
"J_" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>item_k</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>B</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>C</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>D</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>B</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>C</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99995</th>\n", | |
" <td>G</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99996</th>\n", | |
" <td>I</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99997</th>\n", | |
" <td>F</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99998</th>\n", | |
" <td>C</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>99999</th>\n", | |
" <td>G</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>100000 rows × 1 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" item_k\n", | |
"0 B\n", | |
"1 C\n", | |
"2 D\n", | |
"3 B\n", | |
"4 C\n", | |
"... ...\n", | |
"99995 G\n", | |
"99996 I\n", | |
"99997 F\n", | |
"99998 C\n", | |
"99999 G\n", | |
"\n", | |
"[100000 rows x 1 columns]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Pt2MIrxdbVo1" | |
}, | |
"source": [ | |
"## Step4\n", | |
"Step1で作ったログとStep3で作ったものについて適合率(Precision)を計算" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "-sMkCVcpbWDV", | |
"outputId": "b1086ca4-6c9c-490a-af08-eb8c0c367007" | |
}, | |
"source": [ | |
"df = pd.concat([I, J, J_], axis=1)\n", | |
"df[(df['item_i'] == df['item_j']) & (df['item_i'] == df['item_k'])]['click']" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"41 0\n", | |
"371 0\n", | |
"621 0\n", | |
"694 1\n", | |
"818 0\n", | |
" ..\n", | |
"99362 0\n", | |
"99408 0\n", | |
"99525 1\n", | |
"99785 0\n", | |
"99919 0\n", | |
"Name: click, Length: 990, dtype: int64" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "vnc95ysrcyka", | |
"outputId": "a7da7aca-cc06-40f3-841e-506a213f76ca" | |
}, | |
"source": [ | |
"y_pred = df[(df['item_i'] == df['item_j']) & (df['item_i'] == df['item_k'])]['click']\n", | |
"y_true = [1] * len(y_pred)\n", | |
"\n", | |
"precision_score(y_pred, y_true) #予測したもののうち、クリックされた回数の割合" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.2191919191919192" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "boxvUqU_c1AI" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment