Skip to content

Instantly share code, notes, and snippets.

@jobergum
Last active January 31, 2020 12:58
Show Gist options
  • Save jobergum/53b61b5318f14f406efe85b5c98fd0b1 to your computer and use it in GitHub Desktop.
Save jobergum/53b61b5318f14f406efe85b5c98fd0b1 to your computer and use it in GitHub Desktop.
Exploring the Kaggle Home Depot Product Relevance Competition Data https://www.kaggle.com/c/home-depot-product-search-relevance
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"train = pd.read_csv('data/train.csv', encoding=\"ISO-8859-1\")\n",
"training_examples = train.shape[0]\n",
"test = pd.read_csv('data/test.csv', encoding=\"ISO-8859-1\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"product_descriptions = pd.read_csv('data/product_descriptions.csv')\n",
"product_attributes = pd.read_csv('data/attributes.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"all = pd.concat((train, test), axis=0, ignore_index=True,sort=True)\n",
"all = pd.merge(all, product_descriptions, how='left', on='product_uid')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>product_title</th>\n",
" <th>product_uid</th>\n",
" <th>relevance</th>\n",
" <th>search_term</th>\n",
" <th>product_description</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>Simpson Strong-Tie 12-Gauge Angle</td>\n",
" <td>100001</td>\n",
" <td>3.00</td>\n",
" <td>angle bracket</td>\n",
" <td>Not only do angles make joints stronger, they ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>Simpson Strong-Tie 12-Gauge Angle</td>\n",
" <td>100001</td>\n",
" <td>2.50</td>\n",
" <td>l bracket</td>\n",
" <td>Not only do angles make joints stronger, they ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>BEHR Premium Textured DeckOver 1-gal. #SC-141 ...</td>\n",
" <td>100002</td>\n",
" <td>3.00</td>\n",
" <td>deck over</td>\n",
" <td>BEHR Premium Textured DECKOVER is an innovativ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16</td>\n",
" <td>Delta Vero 1-Handle Shower Only Faucet Trim Ki...</td>\n",
" <td>100005</td>\n",
" <td>2.33</td>\n",
" <td>rain shower head</td>\n",
" <td>Update your bathroom with the Delta Vero Singl...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>17</td>\n",
" <td>Delta Vero 1-Handle Shower Only Faucet Trim Ki...</td>\n",
" <td>100005</td>\n",
" <td>2.67</td>\n",
" <td>shower only faucet</td>\n",
" <td>Update your bathroom with the Delta Vero Singl...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240755</th>\n",
" <td>240756</td>\n",
" <td>stufurhome Norma 24 in. W x 16 in. D x 34 in. ...</td>\n",
" <td>224424</td>\n",
" <td>NaN</td>\n",
" <td>24 whtie storage cabinet</td>\n",
" <td>Create a neat yet stylish storage space for or...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240756</th>\n",
" <td>240757</td>\n",
" <td>Home Decorators Collection 49 in. D Alessandro...</td>\n",
" <td>224425</td>\n",
" <td>NaN</td>\n",
" <td>adirondeck cusion</td>\n",
" <td>Our Bullnose Adirondack Chair Cushions fit Adi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240757</th>\n",
" <td>240758</td>\n",
" <td>Simpson Strong-Tie HB 3-1/2 x 14 in. Top Flang...</td>\n",
" <td>224426</td>\n",
" <td>NaN</td>\n",
" <td>hb</td>\n",
" <td>Joist hangers are designed to provide support ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240758</th>\n",
" <td>240759</td>\n",
" <td>1/4 in. -20 tpi x 1-1/2 in. Stainless Steel Bu...</td>\n",
" <td>224427</td>\n",
" <td>NaN</td>\n",
" <td>hex sockets</td>\n",
" <td>These socket cap screws are ideal for applicat...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240759</th>\n",
" <td>240760</td>\n",
" <td>Bosch 4 in. Bi-Metal Hole Saw</td>\n",
" <td>224428</td>\n",
" <td>NaN</td>\n",
" <td>4 inch hole saw</td>\n",
" <td>The Bosch quick change bi-metal hole saws feat...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>240760 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" id product_title \\\n",
"0 2 Simpson Strong-Tie 12-Gauge Angle \n",
"1 3 Simpson Strong-Tie 12-Gauge Angle \n",
"2 9 BEHR Premium Textured DeckOver 1-gal. #SC-141 ... \n",
"3 16 Delta Vero 1-Handle Shower Only Faucet Trim Ki... \n",
"4 17 Delta Vero 1-Handle Shower Only Faucet Trim Ki... \n",
"... ... ... \n",
"240755 240756 stufurhome Norma 24 in. W x 16 in. D x 34 in. ... \n",
"240756 240757 Home Decorators Collection 49 in. D Alessandro... \n",
"240757 240758 Simpson Strong-Tie HB 3-1/2 x 14 in. Top Flang... \n",
"240758 240759 1/4 in. -20 tpi x 1-1/2 in. Stainless Steel Bu... \n",
"240759 240760 Bosch 4 in. Bi-Metal Hole Saw \n",
"\n",
" product_uid relevance search_term \\\n",
"0 100001 3.00 angle bracket \n",
"1 100001 2.50 l bracket \n",
"2 100002 3.00 deck over \n",
"3 100005 2.33 rain shower head \n",
"4 100005 2.67 shower only faucet \n",
"... ... ... ... \n",
"240755 224424 NaN 24 whtie storage cabinet \n",
"240756 224425 NaN adirondeck cusion \n",
"240757 224426 NaN hb \n",
"240758 224427 NaN hex sockets \n",
"240759 224428 NaN 4 inch hole saw \n",
"\n",
" product_description \n",
"0 Not only do angles make joints stronger, they ... \n",
"1 Not only do angles make joints stronger, they ... \n",
"2 BEHR Premium Textured DECKOVER is an innovativ... \n",
"3 Update your bathroom with the Delta Vero Singl... \n",
"4 Update your bathroom with the Delta Vero Singl... \n",
"... ... \n",
"240755 Create a neat yet stylish storage space for or... \n",
"240756 Our Bullnose Adirondack Chair Cushions fit Adi... \n",
"240757 Joist hangers are designed to provide support ... \n",
"240758 These socket cap screws are ideal for applicat... \n",
"240759 The Bosch quick change bi-metal hole saws feat... \n",
"\n",
"[240760 rows x 6 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment