Skip to content

Instantly share code, notes, and snippets.

@esuji5
Created June 28, 2018 12:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save esuji5/a24539329ea6043dead011a95f92f7f0 to your computer and use it in GitHub Desktop.
Save esuji5/a24539329ea6043dead011a95f92f7f0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 北米メルカリの購入金額予測コンペ\n",
"\n",
"サイト:https://www.kaggle.com/c/mercari-price-suggestion-challenge/data\n",
"\n",
"- 英語の自然言語処理をやっていくのが大変そうだったので、そこ以外で頑張ってみた\n",
"- 商品名、商品説明以外の要素で試したが、精度は上がらず\n",
" - カテゴリやブランド、商品の状態だけで判断しているので、それはそう、という気持ち\n",
"- 元のコンペは、参加者ごとにVMが用意され、使えるマシーンリソースが制限された状態でのコンペだったので、その中での工夫を読んだ方が勉強になりそうとは思った"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"path = \"/Users/esuji/Dropbox/program/notebooks/data/mercari/train.tsv\"\n",
"df = pd.read_csv(path,sep='\\t')\n",
"#df_o = df.copy()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>train_id</th>\n",
" <th>name</th>\n",
" <th>item_condition_id</th>\n",
" <th>category_name</th>\n",
" <th>brand_name</th>\n",
" <th>price</th>\n",
" <th>shipping</th>\n",
" <th>item_description</th>\n",
" <th>category1</th>\n",
" <th>category2</th>\n",
" <th>category3</th>\n",
" <th>price_log</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1482530</th>\n",
" <td>1482530</td>\n",
" <td>Free People Inspired Dress</td>\n",
" <td>2</td>\n",
" <td>Women/Dresses/Mid-Calf</td>\n",
" <td>Free People</td>\n",
" <td>20.0</td>\n",
" <td>1</td>\n",
" <td>Lace, says size small but fits medium perfectl...</td>\n",
" <td>Women</td>\n",
" <td>Dresses</td>\n",
" <td>Mid-Calf</td>\n",
" <td>2.995732</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482531</th>\n",
" <td>1482531</td>\n",
" <td>Little mermaid handmade dress</td>\n",
" <td>2</td>\n",
" <td>Kids/Girls 2T-5T/Dresses</td>\n",
" <td>Disney</td>\n",
" <td>14.0</td>\n",
" <td>0</td>\n",
" <td>Little mermaid handmade dress never worn size 2t</td>\n",
" <td>Kids</td>\n",
" <td>Girls 2T-5T</td>\n",
" <td>Dresses</td>\n",
" <td>2.639057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482532</th>\n",
" <td>1482532</td>\n",
" <td>21 day fix containers and eating plan</td>\n",
" <td>2</td>\n",
" <td>Sports &amp; Outdoors/Exercise/Fitness accessories</td>\n",
" <td>not_setting</td>\n",
" <td>12.0</td>\n",
" <td>0</td>\n",
" <td>Used once or twice, still in great shape.</td>\n",
" <td>Sports &amp; Outdoors</td>\n",
" <td>Exercise</td>\n",
" <td>Fitness accessories</td>\n",
" <td>2.484907</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482533</th>\n",
" <td>1482533</td>\n",
" <td>World markets lanterns</td>\n",
" <td>3</td>\n",
" <td>Home/Home Décor/Home Décor Accents</td>\n",
" <td>not_setting</td>\n",
" <td>45.0</td>\n",
" <td>1</td>\n",
" <td>There is 2 of each one that you see! So 2 red ...</td>\n",
" <td>Home</td>\n",
" <td>Home Décor</td>\n",
" <td>Home Décor Accents</td>\n",
" <td>3.806662</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482534</th>\n",
" <td>1482534</td>\n",
" <td>Brand new lux de ville wallet</td>\n",
" <td>1</td>\n",
" <td>Women/Women's Accessories/Wallets</td>\n",
" <td>not_setting</td>\n",
" <td>22.0</td>\n",
" <td>0</td>\n",
" <td>New with tag, red with sparkle. Firm price, no...</td>\n",
" <td>Women</td>\n",
" <td>Women's Accessories</td>\n",
" <td>Wallets</td>\n",
" <td>3.091042</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" train_id name item_condition_id \\\n",
"1482530 1482530 Free People Inspired Dress 2 \n",
"1482531 1482531 Little mermaid handmade dress 2 \n",
"1482532 1482532 21 day fix containers and eating plan 2 \n",
"1482533 1482533 World markets lanterns 3 \n",
"1482534 1482534 Brand new lux de ville wallet 1 \n",
"\n",
" category_name brand_name price \\\n",
"1482530 Women/Dresses/Mid-Calf Free People 20.0 \n",
"1482531 Kids/Girls 2T-5T/Dresses Disney 14.0 \n",
"1482532 Sports & Outdoors/Exercise/Fitness accessories not_setting 12.0 \n",
"1482533 Home/Home Décor/Home Décor Accents not_setting 45.0 \n",
"1482534 Women/Women's Accessories/Wallets not_setting 22.0 \n",
"\n",
" shipping item_description \\\n",
"1482530 1 Lace, says size small but fits medium perfectl... \n",
"1482531 0 Little mermaid handmade dress never worn size 2t \n",
"1482532 0 Used once or twice, still in great shape. \n",
"1482533 1 There is 2 of each one that you see! So 2 red ... \n",
"1482534 0 New with tag, red with sparkle. Firm price, no... \n",
"\n",
" category1 category2 category3 \\\n",
"1482530 Women Dresses Mid-Calf \n",
"1482531 Kids Girls 2T-5T Dresses \n",
"1482532 Sports & Outdoors Exercise Fitness accessories \n",
"1482533 Home Home Décor Home Décor Accents \n",
"1482534 Women Women's Accessories Wallets \n",
"\n",
" price_log \n",
"1482530 2.995732 \n",
"1482531 2.639057 \n",
"1482532 2.484907 \n",
"1482533 3.806662 \n",
"1482534 3.091042 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df[df['price'] != 0]\n",
"df.brand_name = df.brand_name.fillna('not_setting')\n",
"df['category1'] = df.category_name.astype(str).apply(lambda x: x.split('/')[0])\n",
"df['category2'] = df.category_name.astype(str).apply(lambda x: x.split('/')[1] if len(x.split('/')) >= 2 else '')\n",
"df['category3'] = df.category_name.astype(str).apply(lambda x: x.split('/')[2] if len(x.split('/')) >= 3 else '')\n",
"# df['price_c'] = df.price\n",
"df['price_log'] = np.log(df.price)\n",
"# df = df.drop('price',axis=1)\n",
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1335b2550>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD8CAYAAABthzNFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAFRJJREFUeJzt3X+w5XV93/HnK0vwBxXUQK1dWBeE0u5Mf0hu0JnG1E4T3dUAhiSGTToxlmFLG9o6mU5do5PaPzo1bZNpTUjIWhnUGggiWrasg9Cpkj8wsBASQCSshJRFIqtkIBpHArz7x/2uHG7Ovfd77t7PPed7z/Mxc+ee87nnfL/v+z3nntf9fD7fH6kqJEla6numXYAkaTYZEJKksQwISdJYBoQkaSwDQpI0lgEhSRrLgJAkjWVASJLGMiAkSWMdN+0CjsXJJ59c27dvn3YZkjQod95559er6pTVHjfogNi+fTsHDx6cdhmSNChJ/qTP4xxikiSNZUBIksYyICRJYxkQkqSxZiYgkrwpye8muSLJm6ZdjyTNu6YBkeTKJI8nuXdJ+84kDyQ5lGRv11zAN4EXA4db1iVJWl3rHsRVwM7RhiRbgMuBXcAOYHeSHcDvVtUu4D3Af2hclyRpFU0DoqpuBZ5Y0nwucKiqHqqqp4FrgAuq6rnu538GvKhlXZKk1U3jQLmtwCMj9w8Dr09yIfAW4OXAry/35CR7gD0A27Zta1imJM23mTmSuqquB67v8bh9wD6AhYWFal2XJM2raezF9Chw2sj9U7u23pKcl2Tfk08+ua6FSZKeN42AuAM4K8npSY4HLgJumGQBVbW/qvacdNJJTQqUJLXfzfVq4Dbg7CSHk1xcVc8AlwE3AfcD11bVfRMud116ENv33nhMz5ekzazpHERV7V6m/QBw4BiWux/Yv7CwcMlalyFJWtnMHEk9CecgJKm9QQaEcxCS1N4gA0KS1N4gA8IhJklqb5AB4RCTJLU3yICQJLU3yIBwiEmS2htkQDjEJEntDTIgJEntzX1AeLoNSRpvkAHhHIQktTfIgHAOQpLaG2RASJLaMyBwHkKSxpnbgDAUJGllgwwIJ6klqb1BBoST1JLU3iADQpLUngEhSRrLgJAkjWVASJLGMiAkSWMNMiDczVWS2htkQLibqyS1N8iAaMEjqyXphQwISdJYBoQkaSwDQpI0lgEhSRrLgJAkjTVTAZHkhCQHk/zotGuRpHnXNCCSXJnk8ST3LmnfmeSBJIeS7B350XuAa1vWJEnqp3UP4ipg52hDki3A5cAuYAewO8mOJD8CfAl4vHFNkqQemgZEVd0KPLGk+VzgUFU9VFVPA9cAFwBvAt4A/DRwSZKpDH95wJwkLTpuCuvcCjwycv8w8Pqqugwgyc8BX6+q58Y9OckeYA/Atm3b2lYqSXNspiapAarqqqr63yv8fF9VLVTVwimnnLKRpUnSXJlGQDwKnDZy/9SurTfP5ipJ7U0jIO4AzkpyepLjgYuAGyZZgGdzlaT2Wu/mejVwG3B2ksNJLq6qZ4DLgJuA+4Frq+q+CZdrD0KSGms6SV1Vu5dpPwAcOIbl7gf2LywsXLLWZUiSVjZzk9R9tOpBuIurJD1vkAHReg7CoJCkgQaEcxCS1N4gA8K9mCSpvUEGhCSpPQNCkjTWIAPCOQhJam+QAeEchCS1N8iAkCS1N8iAcIhJktobZEA4xCRJ7Q0yICRJ7RkQK/CUG5LmmQEhSRprkAHhJLUktTfIgNjISWqHmSTNq0EGhCSpPQNCkjSWASFJGsuAkCSNZUBIksYaZEC4m6sktTfIgJjWuZjc5VXSPBlkQEyD4SBp3hgQkqSxDAhJ0lgGhCRpLANCkjSWASFJGqtXQCT5u60LSfJ3klyR5Lok/6L1+lbjXkuS5l3fHsRvJLk9yb9M0vvggyRXJnk8yb1L2ncmeSDJoSR7Aarq/qq6FHgH8A97/waSpCZ6BURVvRH4GeA04M4kv53kR3o89Spg52hDki3A5cAuYAewO8mO7mfnAzcCB/r+ApKkNnrPQVTVg8D7gfcA/wj4UJIvJ7lwhefcCjyxpPlc4FBVPVRVTwPXABd0j7+hqnaxGEaD5fCUpM3guD4PSvL3gHcBbwNuBs6rqruS/E3gNuD6Cda5FXhk5P5h4PVJ3gRcCLyIFXoQSfYAewC2bds2wWolSZPoFRDArwH/A/jFqvr20caq+mqS969HIVX1eeDzPR63D9gHsLCwUOux7kkd7SE8/MG3LfszSRq6vgHxNuDbVfUsQJLvAV5cVX9RVR+fcJ2PsjiXcdSpXVtvSc4DzjvzzDMnXLUkqa++cxC3AC8Zuf/Srm0t7gDOSnJ6kuOBi4AbJlnAtM7mKknzpG9AvLiqvnn0Tnf7pas9KcnVLM5RnJ3kcJKLq+oZ4DLgJuB+4Nqqum+Sor0ehCS113eI6VtJzqmquwCSfD/w7VWeQ1XtXqb9AMewK2tV7Qf2LywsXLLWZaynleYkJGmo+gbEu4FPJvkqEOBvAD/VrKpVOAchSe31CoiquiPJ3wbO7poeqKq/bFfWqvXMVA9Ckjajvj0IgB8AtnfPOScJVfWxJlVJkqau74FyHwdeC9wNPNs1FzCVgHCISZLa69uDWAB2VNVUDkxbyiEmSWqv726u97I4MS1JmhN9exAnA19KcjvwnaONVXV+k6pW4RCTJLXXNyA+0LKISTnEJEnt9d3N9QtJXgOcVVW3JHkpsKVtaZKkaep7ydFLgOuA3+qatgKfaVWUJGn6+k5S/zyLlwF9Cr578aC/3qqo1XguJklqr29AfKe7+hsASY5j8TiIqfBsrpLUXt+A+EKSXwRe0l2L+pPA/nZlSZKmrW9A7AWOAPcA/5zFM7Guy5XkJEmzqVdAVNVzVfXhqvrJqvqJ7vZMHFU967wEqaSh6nsupj9mzJxDVZ2x7hX14IFyktTeJOdiOurFwE8Cr1z/cvrxQDlJaq/vENM3Rr4erar/Bnj5NEnaxPoeKHfOyNdCkkuZ7FoSGuG8hKQh6Psh/ysjt58BHgbese7VzCg/0CXNo77nYvrHrQuRJM2Wvnsx/cJKP6+qX12fcjaXpT2P7Xtv5OEPOnUjaRgm2YvpB4AbuvvnAbcDD7YoajXu5ipJ7fUNiFOBc6rqzwGSfAC4sar+aavCVuJurpLUXt9TbbwKeHrk/tNdm9bIiW9Js65vD+JjwO1JPt3dfzvw0TYlbU4GgqSh6bsX039M8lngjV3Tu6rq99uVJUmatr5DTAAvBZ6qqv8OHE5yeqOaJEkzoO+R1P8eeA/w3q7pe4H/2aooSdL09e1B/BhwPvAtgKr6KvCyVkVJkqav7yT101VVSQogyQktiknydhZPAngi8JGq+lyL9UiSVte3B3Ftkt8CXp7kEuAW4MN9npjkyiSPJ7l3SfvOJA8kOZRkL0BVfaaqLgEuBX6q/68hSVpvfU/3/V+B64BPAWcDv1RVv9ZzHVcBO0cbkmwBLgd2ATuA3Ul2jDzk/d3PJUlTsuoQU/dhfkt3wr6bJ11BVd2aZPuS5nOBQ1X1ULeOa4ALktwPfBD4bFXdNem6JEnrZ9UeRFU9CzyX5KR1XO9W4JGR+4e7tn8F/DDwE901J/6KJHuSHExy8MiRI+tYUj9DPeBtqHVLmp6+k9TfBO5JcjPdnkwAVfWv17OYqvoQ8KFVHrMP2AewsLDwV66TLUlaH30D4vrua708Cpw2cv/Urq0Xz+YqSe2tGBBJtlXV/6uq9T7v0h3AWd3R2I8CFwE/3ffJns1VktpbbQ7iM0dvJPnUWlaQ5GrgNuDsJIeTXFxVzwCXATcB9wPXVtV9EyzzvCT7nnzyybWUNCjOHUialtWGmDJy+4y1rKCqdi/TfgA4sMZl2oOQpMZW60HUMrenap56EJI0Lav1IP5+kqdY7Em8pLtNd7+q6sSm1S1jVnoQDv9I2sxW7EFU1ZaqOrGqXlZVx3W3j96fSjhsRgaNpFk0yfUgZoZDTJLU3iADoqr2V9Wek05az4O7JUmjBhkQkqT2BhkQDjFJUnuDDAiHmJzYltTeIANiVvmhLWkzMSA2AYNJUguDDAjnICSpvUEGxGaeg7A3IGlWDDIgNN6xhovhJGmUASFJGmuQAeEcRD/2CCQdi0EGxGaeg5CkWTHIgJgXfXoA9hIktWJAzCA/9CXNAgNCkjSWAdGIvQBJQ2dADISBI2mjDTIg3M118zD4pNk1yIBwN1dJam+QASFJas+A0GA5PCW1ZUBo0AwJqR0DYg75oSqpDwNCkjSWAaGJ2QOR5oMBMSB+ME/ObSat3cwERJIzknwkyXXTrkWS1DggklyZ5PEk9y5p35nkgSSHkuwFqKqHqurilvVIkvpr3YO4Ctg52pBkC3A5sAvYAexOsqNxHVoDh2ek+dY0IKrqVuCJJc3nAoe6HsPTwDXABX2XmWRPkoNJDh45cmQdq5UkjZrGHMRW4JGR+4eBrUm+L8kVwOuSvHe5J1fVvqpaqKqFU045pXWtkjS3ZmaSuqq+UVWXVtVrq+o/rfTYoZ3NdbmhmrUO4Tj0I2kjTCMgHgVOG7l/atfWm2dzlaT2phEQdwBnJTk9yfHARcANkyxgaD2IlYz2BtazZzBLvYxZqkVSf613c70auA04O8nhJBdX1TPAZcBNwP3AtVV13yTLtQchSe0d13LhVbV7mfYDwIG1LjfJecB5Z5555loXMTXT+G/a/+AlrcXMTFJPwh6EJLU3yICQJLU3yIDYTJPUR01zGKjVRPlazUINkgYaEA4xSVJ7gwwISVJ7gwyIzTjENC2thnMcJpKGb5AB4RCTJLU3yICQJLVnQEiSxhpkQMzbHMS0x/P7rn/p49b7LLZDstl+x832+6ifQQaEcxCS1N4gA0KS1J4BIUkay4CQJI01yICYt0nqUatNFm7fe+MxTyiOW4bniuqvT42e9l1DMMiAcJJaktobZEBIktozICRJYxkQkqSxDAhJ0lgGhCRprEEGxDzs5jqru0qOs151jFtOy99xPZc9K6/Fsdosv8e0bZbtOMiAcDdXSWpvkAEhSWrPgJAkjWVASJLGMiAkSWMZEJKksQwISdJYx027gKOSnAD8BvA08Pmq+sSUS5Kkuda0B5HkyiSPJ7l3SfvOJA8kOZRkb9d8IXBdVV0CnN+yLknS6loPMV0F7BxtSLIFuBzYBewAdifZAZwKPNI97NnGdUmSVtE0IKrqVuCJJc3nAoeq6qGqehq4BrgAOMxiSDSvS5K0uml8EG/l+Z4CLAbDVuB64MeT/Cawf7knJ9mT5GCSg0eOHGlb6YAsd+6Xvpfr7Hvup9UuRbpaHRtxjpqldY67Pa7uSWuclcuwrsfj1sNmOf/QRjmW7bVR23pmJqmr6lvAu3o8bh+wD2BhYaFa1yVJ82oaPYhHgdNG7p/atfU2D2dzlaRpm0ZA3AGcleT0JMcDFwE3TLIAz+YqSe213s31auA24Owkh5NcXFXPAJcBNwH3A9dW1X0TLtcehCQ11nQOoqp2L9N+ADhwDMvdD+xfWFi4ZK3LkCStbJC7k9qDkKT2BhkQzkFIUnuDDAhJUnuDDAiHmCSpvVQN91izJEeAP1nj008Gvr6O5awX65qMdfU3izWBdU1qPep6TVWdstqDBh0QxyLJwapamHYdS1nXZKyrv1msCaxrUhtZ1yCHmCRJ7RkQkqSx5jkg9k27gGVY12Ssq79ZrAmsa1IbVtfczkFIklY2zz0ISdIK5i4glrke9kat+7Qk/zfJl5Lcl+TfdO0fSPJokru7r7eOPOe9Xa0PJHlLw9oeTnJPt/6DXdsrk9yc5MHu+yu69iT5UFfXHyY5p1FNZ49sk7uTPJXk3dPYXuOur76W7ZPknd3jH0zyzkZ1/ZckX+7W/ekkL+/atyf59sh2u2LkOd/fvf6HutrToK6JX7f1/ntdpq7fGanp4SR3d+0bsr1W+FyY+vuLqpqbL2AL8BXgDOB44A+AHRu4/lcD53S3Xwb8EYvX5f4A8G/HPH5HV+OLgNO72rc0qu1h4OQlbf8Z2Nvd3gv8cnf7rcBngQBvAH5vg167PwVeM43tBfwQcA5w71q3D/BK4KHu+yu6269oUNebgeO62788Utf20cctWc7tXa3pat/VoK6JXrcWf6/j6lry818Bfmkjt9cKnwtTf3/NWw9iuethb4iqeqyq7upu/zmLpzvfusJTLgCuqarvVNUfA4dY/B02ygXAR7vbHwXePtL+sVr0ReDlSV7duJZ/AnylqlY6MLLZ9qrx11efdPu8Bbi5qp6oqj8DbgZ2rnddVfW5WjytPsAXef5a72N1tZ1YVV+sxU+aj438LutW1wqWe93W/e91pbq6XsA7gKtXWsZ6b68VPhem/v6at4BY7nrYGy7JduB1wO91TZd13cUrj3Yl2dh6C/hckjuT7OnaXlVVj3W3/xR41RTqOuoiXviHO+3tBZNvn2lst3/G4n+bR52e5PeTfCHJG7u2rV0tG1HXJK/bRm+vNwJfq6oHR9o2dHst+VyY+vtr3gJiJiT5a8CngHdX1VPAbwKvBf4B8BiL3dyN9oNVdQ6wC/j5JD80+sPuP6Wp7PKWxSsPng98smuahe31AtPcPstJ8j7gGeATXdNjwLaqeh3wC8BvJzlxA0uauddtid288J+QDd1eYz4Xvmta7695C4hjvh72sUryvSy+CT5RVdcDVNXXqurZqnoO+DDPD4tsWL1V9Wj3/XHg010NXzs6dNR9f3yj6+rsAu6qqq91NU59e3Um3T4bVl+SnwN+FPiZ7sOFbgjnG93tO1kc3/9bXQ2jw1BN6lrD67aR2+s44ELgd0bq3bDtNe5zgRl4f81bQBzz9bCPRTfG+RHg/qr61ZH20fH7HwOO7mFxA3BRkhclOR04i8XJsfWu64QkLzt6m8VJznu79R/dE+KdwP8aqetnu70p3gA8OdIVbuEF/9lNe3uNmHT73AS8OckruuGVN3dt6yrJTuDfAedX1V+MtJ+SZEt3+wwWt89DXW1PJXlD9x792ZHfZT3rmvR128i/1x8GvlxV3x062qjttdznArPw/jqWGe4hfrG4B8AfsfjfwPs2eN0/yGI38Q+Bu7uvtwIfB+7p2m8AXj3ynPd1tT7AMe5ZskJdZ7C4h8gfAPcd3S7A9wH/B3gQuAV4Zdce4PKurnuAhYbb7ATgG8BJI20bvr1YDKjHgL9kcWz34rVsHxbnBA51X+9qVNchFseij77Hruge++Pd63s3cBdw3shyFlj8wP4K8Ot0B9Guc10Tv27r/fc6rq6u/Srg0iWP3ZDtxfKfC1N/f3kktSRprHkbYpIk9WRASJLGMiAkSWMZEJKksQwISdJYBoQkaSwDQpI0lgEhSRrr/wM9t435QemXHQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df['price'].plot.hist(bins=2000, logy=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(3945, 12)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_b1k = df[df.price <= 500]\n",
"df_a1k = df[df.price > 300]\n",
"df_a1k.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1500f5ef0>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD8CAYAAABthzNFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAEbNJREFUeJzt3WuwXWddx/Hvj5RSLiUIBGR6scWUQkYL1kOBAcbKiJMCAWQQGnFA6TQiVGF0RgoyXF44gzPKTao0Qq2g01quNhCmlDsvGNoUCr2ESsBqE9CUWwvIUEr/vjjr1GNmnZx1Ts5z9llnfz8zZ7LXs/de+T/pbn55LmvtVBWSJB3qHpMuQJK0NhkQkqReBoQkqZcBIUnqZUBIknoZEJKkXgaEJKmXASFJ6mVASJJ6HTXpApYjyTZg27HHHnvuIx7xiEmXI0mjcs0113y7qjYt9rqM+VYbMzMztWfPnkmXIUmjkuSaqppZ7HVOMUmSeo0yIJJsS7Lztttum3QpkrRujTIgqmpXVe3YuHHjpEuRpHVrlAEhSWpvlAHhFJMktTfKgHCKSZLaG2VASJLaMyAkSb1GfSX15s2bl32Ok87/SG/7zW98+rLPKUnryShHEK5BSFJ7oxxBtOTIQpJmjXIEIUlqz4CQJPUaZUB4oZwktTfKgHCRWpLaG2VASJLaMyAkSb0MCElSLwNCktRrlAHhLiZJam+UAeEuJklqb5QBIUlqz4CQJPUyICRJvQwISVIvb/c90EK3AQdvBS5pfXIEIUnqNcqA8DoISWpvlAHhdRCS1N4oA0KS1J4BIUnqZUBIknoZEJKkXgaEJKmXASFJ6mVASJJ6GRCSpF4GhCSp15oJiCRnJvlcknckOXPS9UjStGt6N9ckFwHPAA5W1S/Na98KvBXYALyzqt4IFPBD4Bhgf8u6VtpCd3r1Lq+Sxqz1COJiYOv8hiQbgAuAs4AtwPYkW4DPVdVZwCuBNzSuS5K0iKYBUVWfBb57SPMZwL6q+kZV3QFcCjyrqu7qnv8ecK+FzplkR5I9SfbceuutTeqWJE1mDeI44JZ5x/uB45I8J8mFwHuAty/05qraWVUzVTWzadOmxqVK0vRaM98oV1UfAD4w5LVJtgHbNm/e3LYoSZpikxhBHABOmHd8fNc2mN8HIUntTSIgrgZOSXJykqOBs4HLJ1CHJOkwmgZEkkuAzwOnJtmf5JyquhM4D7gC2AtcVlU3LPG8fuWoJDXWdA2iqrYv0L4b2H0E590F7JqZmTl3ueeQJB3emrmSWpK0towyIJxikqT2RhkQ7mKSpPbWzHUQ65H3aJI0ZqMcQTjFJEntjTIgnGKSpPZGGRCSpPYMCElSr1EGhGsQktTeKAPCNQhJam+UASFJas+AkCT1GmVAuAYhSe2N8krqsd/N1SusJY3BKEcQkqT2DAhJUi8DQpLUy4CQJPUaZUC4i0mS2htlQHgltSS1N8qAkCS1Z0BIknqN8kK59coL6CStJY4gJEm9DAhJUi8DQpLUy4CQJPUaZUB4oZwktTfKgPBCOUlqb5QBIUlqz+sgRsDrIyRNgiMISVIvA0KS1MuAkCT1MiAkSb0GBUSSX25diCRpbRk6gvjbJFcleWkSLz6QpCkwaJtrVT05ySnAi4FrklwF/ENVXbmSxSS5L/AZ4PVV9eGVPPd65PZXSS0NXoOoqq8BrwFeCfwa8LYkX03ynIXek+SiJAeTXH9I+9YkNyXZl+T8eU+9ErhsaV2QJLUwdA3itCRvBvYCTwG2VdWjusdvPsxbLwa2HnKuDcAFwFnAFmB7ki1JngrcCBxcaickSStv6JXUfwO8E3h1Vf14rrGqvpnkNQu9qao+m+SkQ5rPAPZV1TcAklwKPAu4H3BfZkPjx0l2V9VdQzsiSVpZQwPi6cCPq+pnAEnuARxTVf9TVe9Z4u95HHDLvOP9wOOq6rzu3L8HfHuhcEiyA9gBcOKJJy7xt5YkDTV0DeLjwL3nHd+na1txVXXx4Raoq2pnVc1U1cymTZtalCBJYnhAHFNVP5w76B7fZ5m/5wHghHnHx3dtg/l9EJLU3tCA+FGS0+cOkvwq8OPDvP5wrgZOSXJykqOBs4HLl3ICvw9CktobugbxCuC9Sb4JBPh54PmLvSnJJcCZwIOT7AdeV1XvSnIecAWwAbioqm5YTvHq5/URklbC0Avlrk7ySODUrummqvrpgPdtX6B9N7B7cJWHSLIN2LZ58+blnkKStIil3KzvscBpwOnMXrvwwjYlLc4pJklqb9AIIsl7gF8ErgV+1jUX8O5GdS1WjyMISWps6BrEDLClqqplMUNV1S5g18zMzLmTrmVMXJuQtBRDp5iuZ3ZhWpI0JYaOIB4M3NjdxfUnc41V9cwmVUmSJm5oQLy+ZRFL5RqEJLU3aIqpqj4D3Azcs3t8NfDFhnUtVo+7mCSpsaG3+z4XeB9wYdd0HPChVkVJkiZv6CL1y4AnArfD3V8e9JBWRS3GezFJUntD1yB+UlV3JAEgyVHMXgcxEW5zXVluf5XUZ2hAfCbJq4F7d9/89lJgV7uytBYYHNJ0GzrFdD5wK3Ad8AfM3kdpwW+SkySN39Cb9d0F/H33I0maAkPvxfTv9Kw5VNXDV7yiAbwOQpLaW8q9mOYcA/w28MCVL2cYF6klqb2hF8p9Z97Pgap6C+BKpSStY0OnmE6fd3gPZkcUQ0cfkqQRGvqX/F/Pe3wns7fdeN6KVyNJWjOG7mL69daFaDy8PkKaDkOnmP7kcM9X1ZtWppxh3MUkSe0tZRfTY4HLu+NtwFXA11oUtRh3Ma1NC40sDsdRh7R2DQ2I44HTq+oHAEleD3ykqn63VWGSpMkaequNhwJ3zDu+o2uTJK1TQ0cQ7wauSvLB7vjZwD+2KUmStBYM3cX0F0k+Cjy5a/r9qvpSu7IkSZM2dIoJ4D7A7VX1VmB/kpMb1SRJWgOGfuXo64BXAq/qmu4J/FOroiRJkzd0BPFbwDOBHwFU1TeBY1sVtRi/clSS2hsaEHdUVdHd8jvJfduVtLiq2lVVOzZu3DjJMiRpXRu6i+myJBcCD0hyLvBi/PIgrQBv2yGtXUN3Mf1V913UtwOnAq+tqiubVqapZnBIk7doQCTZAHy8u2GfoSBJU2LRNYiq+hlwVxIn/CVpigxdg/ghcF2SK+l2MgFU1R83qUqSNHFDA+ID3Y8kaUocNiCSnFhV/1lV3ndJa8JSF69d7JaWb7E1iA/NPUjy/sa1SJLWkMWmmDLv8cNbFiIdieV8WZGkw1tsBFELPF5xSR6V5B1J3pfkD1v+XpKkxS0WEI9OcnuSHwCndY9vT/KDJLcvdvIkFyU5mOT6Q9q3Jrkpyb4k5wNU1d6qegnwPOCJy+2QJGllHDYgqmpDVd2/qo6tqqO6x3PH9x9w/ouBrfMbugvvLgDOArYA25Ns6Z57JvARYPcy+iJJWkFL+T6IJauqzwLfPaT5DGBfVX2jqu4ALgWe1b3+8qo6C3hBy7okSYsbeh3ESjoOuGXe8X7gcUnOBJ4D3IvDjCCS7AB2AJx44ontqpSkKTeJgOhVVZ8GPj3gdTuBnQAzMzNNF84laZo1nWJawAHghHnHx3dtg/mFQZLU3iRGEFcDp3TfaX0AOBv4naWcoKp2AbtmZmbObVCfpphXXkv/p+kIIsklwOeBU5PsT3JOVd0JnAdcAewFLquqG5Z4XkcQktRY0xFEVW1foH03R7CV1RGEJLU3iTUISdIIjDIgnGKSpPbWzDbXpXCKSUeq9c39Dnd+F7w1FqMcQUiS2hvlCCLJNmDb5s2bJ12KpoS3E9c0GuUIoqp2VdWOjRs3TroUSVq3RhkQkqT2DAhJUq9RBoTbXCWpvVEGhGsQktTeKANCktTeKLe5SlqYd6TVSjEgpDXCv9i11owyILxQTvLiPbU3yjUIF6klqb1RjiCkMfNf/hoLA0Ja4wwUTcoop5gkSe2NMiC8klqS2htlQLhILUntjTIgJEntuUgtaUmWumjuhX7j5QhCktTLgJAk9XKKSZpy3gNKC3EEIUnqZUBIknqNcorJu7lKms9psjZGOYLwQjlJam+UASFJam+UU0ySls67wmqpDAhJvQwUGRCSRsPQWl2uQUiSehkQkqReBoQkqZdrEJK0TOv9Ar01FRBJng08Hbg/8K6q+tiES5I0AS5Grw3Np5iSXJTkYJLrD2nfmuSmJPuSnA9QVR+qqnOBlwDPb12bJGlhqzGCuBh4O/DuuYYkG4ALgKcC+4Grk1xeVTd2L3lN97ykdcpRwtrXPCCq6rNJTjqk+QxgX1V9AyDJpcCzkuwF3gh8tKq+2Lo2Se0ZBOM1qV1MxwG3zDve37X9EfAbwHOTvKTvjUl2JNmTZM+tt97avlJJmlJrapG6qt4GvG2R1+wEdgLMzMzUatQlSdNoUgFxADhh3vHxXdsgfh+EpNU0rdNkk5piuho4JcnJSY4GzgYuH/pmvw9CktpbjW2ulwCfB05Nsj/JOVV1J3AecAWwF7isqm5oXYskabjV2MW0fYH23cDu5ZzTKSZJam+U92JyikmS2ltTu5gkaTWs93sorZRRjiCSbEuy87bbbpt0KZK0bo1yBFFVu4BdMzMz5066FklqaZKjnVGOICRJ7Y1yBOEuJklDTOsFbitllCMIdzFJUnujDAhJUnujnGKSpBackvr/RhkQrkFIGqOxBdAop5hcg5Ck9kYZEJKk9gwISVKvUQaEt9qQpPZGGRCuQUhSe6MMCElSe6Pc5ipJa9nYtrMuxBGEJKmXASFJ6jXKgHAXkyS1N8qAcBeTJLU3yoCQJLVnQEiSehkQkqReBoQkqZcBIUnqlaqadA3LluRW4D+W+LYHA99uUM5aN439ts/TYRr7DEfW71+oqk2LvWjUAbEcSfZU1cyk61ht09hv+zwdprHPsDr9dopJktTLgJAk9ZrGgNg56QImZBr7bZ+nwzT2GVah31O3BiFJGmYaRxCSpAGmKiCSbE1yU5J9Sc6fdD0rJclFSQ4muX5e2wOTXJnka92vP9e1J8nbuj+DryQ5fXKVL1+SE5J8KsmNSW5I8vKufb33+5gkVyX5ctfvN3TtJyf5Qte/f0lydNd+r+54X/f8SZOsf7mSbEjypSQf7o7XdX8Bktyc5Lok1ybZ07Wt6ud7agIiyQbgAuAsYAuwPcmWyVa1Yi4Gth7Sdj7wiao6BfhEdwyz/T+l+9kB/N0q1bjS7gT+tKq2AI8HXtb991zv/f4J8JSqejTwGGBrkscDfwm8uao2A98Dzulefw7wva79zd3rxujlwN55x+u9v3N+vaoeM2876+p+vqtqKn6AJwBXzDt+FfCqSde1gv07Cbh+3vFNwMO6xw8DbuoeXwhs73vdmH+AfwWeOk39Bu4DfBF4HLMXTB3Vtd/9WQeuAJ7QPT6qe10mXfsS+3k8s38ZPgX4MJD13N95/b4ZePAhbav6+Z6aEQRwHHDLvOP9Xdt69dCq+lb3+L+Ah3aP192fQzeN8CvAF5iCfnfTLdcCB4Erga8D36+qO7uXzO/b3f3unr8NeNDqVnzE3gL8GXBXd/wg1nd/5xTwsSTXJNnRta3q5/uoIz2B1r6qqiTrcrtakvsB7wdeUVW3J7n7ufXa76r6GfCYJA8APgg8csIlNZPkGcDBqromyZmTrmeVPamqDiR5CHBlkq/Of3I1Pt/TNII4AJww7/j4rm29+u8kDwPofj3Yta+bP4ck92Q2HP65qj7QNa/7fs+pqu8Dn2J2iuUBSeb+wTe/b3f3u3t+I/CdVS71SDwReGaSm4FLmZ1meivrt793q6oD3a8Hmf2HwBms8ud7mgLiauCUbvfD0cDZwOUTrqmly4EXdY9fxOwc/Vz7C7tdD48Hbps3ZB2NzA4V3gXsrao3zXtqvfd7UzdyIMm9mV132ctsUDy3e9mh/Z7783gu8MnqJqnHoKpeVVXHV9VJzP4/+8mqegHrtL9zktw3ybFzj4HfBK5ntT/fk16IWeVFn6cB/8bsnO2fT7qeFezXJcC3gJ8yO/d4DrPzrp8AvgZ8HHhg99owu5vr68B1wMyk619mn5/E7BztV4Bru5+nTUG/TwO+1PX7euC1XfvDgauAfcB7gXt17cd0x/u65x8+6T4cQd/PBD48Df3t+vfl7ueGub+vVvvz7ZXUkqRe0zTFJElaAgNCktTLgJAk9TIgJEm9DAhJUi8DQpLUy4CQJPUyICRJvf4XPz2EcPuS5WEAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_b1k['price'].plot.hist(bins=50, logy=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df_b1kws = df_b1k[df_b1k.shipping == 1]\n",
"df_b1kwos = df_b1k[df_b1k.shipping == 0]\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x576 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(12, 8))\n",
"ax = fig.add_subplot(111)\n",
"ax.hist(x=df_b1kws.price, bins=np.arange(0, 500, 5),alpha=0.4, ec=\"black\")\n",
"ax.hist(x=df_b1kwos.price,bins=np.arange(0, 500, 5),alpha=0.4, ec=\"black\")\n",
"ax.set_yscale('log')\n",
"# 送料込み"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(18703, 12)\n",
"(34842, 12)\n"
]
}
],
"source": [
"print(df[df.price==3].shape)\n",
"print(df[df.price<=4].shape)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x118fce7b8>"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAD8CAYAAABgmUMCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvFvnyVgAAFGdJREFUeJzt3X2wXPV93/H3x+BnYyOKQomACDKyWzJtsCJjZmKnTl3zWBs7nVI8aVCJJ0qnMDUTdxrZzgQmHmZw64eUxiXBtcbg2CY4NrFaK8WC8diTPwAJovBooguGIlmAbDzGDh4I+Ns/9ndhUXWl/cHdu7vS+zWzs+d89+zZ7545dz/3POzZVBWSJI3qJZNuQJI0WwwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDp10A+Nw5JFH1sqVKyfdhiTNlFtvvfV7VbV8f9MdkMGxcuVKtm7dOuk2JGmmJHlwlOncVSVJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqckB+c/zFWrn+axN53QcuO2sirytJPdzikCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUZWzBkeTYJN9IcneSu5K8v9UvSbIzybZ2O3PoOR9MMpfk3iSnDdVPb7W5JOvH1bMkaf/G+c3xp4EPVNVtSQ4Dbk2yuT32yar62PDESU4EzgV+AfhZ4IYkr28Pfwp4B7AD2JJkY1XdPcbeJUkLGFtwVNUuYFcb/lGSe4AV+3jK2cA1VfUk8J0kc8DJ7bG5qrofIMk1bVqDQ5ImYEmOcSRZCbwRuLmVLkxye5INSZa12grgoaGn7Wi1heqSpAkYe3AkeQ3wZeCiqnocuAL4eeAkBlskH1+k11mXZGuSrbt3716MWUqS9mKswZHkpQxC4/NV9RWAqnqkqp6pqp8Cn+a53VE7gWOHnn5Mqy1Uf56qurKq1lTVmuXLly/+m5EkAeM9qyrAZ4B7quoTQ/WjhyZ7D3BnG94InJvk5UmOB1YBtwBbgFVJjk/yMgYH0DeOq29J0r6N86yqXwZ+A7gjybZW+xDw3iQnAQU8APw2QFXdleRaBge9nwYuqKpnAJJcCFwPHAJsqKq7xti3JGkfxnlW1V8B2ctDm/bxnEuBS/dS37Sv50mSlo7fHJckdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXcYWHEmOTfKNJHcnuSvJ+1v9iCSbk2xv98taPUkuTzKX5PYkq4fmtbZNvz3J2nH1LEnav3FucTwNfKCqTgROAS5IciKwHrixqlYBN7ZxgDOAVe22DrgCBkEDXAy8GTgZuHg+bCRJS29swVFVu6rqtjb8I+AeYAVwNnBVm+wq4N1t+Gzg6hq4CTg8ydHAacDmqnqsqn4AbAZOH1ffkqR9W5JjHElWAm8EbgaOqqpd7aGHgaPa8ArgoaGn7Wi1heqSpAkYe3AkeQ3wZeCiqnp8+LGqKqAW6XXWJdmaZOvu3bsXY5aSpL0Ya3AkeSmD0Ph8VX2llR9pu6Bo94+2+k7g2KGnH9NqC9Wfp6qurKo1VbVm+fLli/tGJEnPGudZVQE+A9xTVZ8YemgjMH9m1Frgq0P189rZVacAP2y7tK4HTk2yrB0UP7XVJEkTcOgY5/3LwG8AdyTZ1mofAi4Drk3yPuBB4Jz22CbgTGAOeAI4H6CqHkvyEWBLm+4PquqxMfYtSdqHsQVHVf0VkAUefvtepi/gggXmtQHYsHjdSZJeKL85LknqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6jJScCT5J+NuRJI0G0bd4vgfSW5J8h+SvG6sHUmSptpIwVFVbwV+HTgWuDXJF5K8Y6ydSZKm0sjHOKpqO/B7wO8C/wy4PMm3k/zauJqTJE2fUY9x/NMknwTuAf458M6q+sdt+JNj7E+SNGUOHXG6/w78T+BDVfWT+WJVfTfJ742lM0nSVBo1OM4CflJVzwAkeQnwiqp6oqo+N7buJElTZ9RjHDcArxwaf1WrSZIOMqMGxyuq6sfzI234VeNpSZI0zUYNjr9Lsnp+JMkvAT/Zx/SSpAPUqMc4LgK+lOS7QIB/CPybsXUlSZpaIwVHVW1J8o+AN7TSvVX19+NrS5I0rUbd4gB4E7CyPWd1Eqrq6rF0JUmaWqN+AfBzwMeAtzAIkDcBa/bznA1JHk1y51DtkiQ7k2xrtzOHHvtgkrkk9yY5bah+eqvNJVnf+f4kSYts1C2ONcCJVVUd8/4s8EfAnlsln6yqjw0XkpwInAv8AvCzwA1JXt8e/hTwDmAHsCXJxqq6u6MPSdIiGvWsqjsZHBAfWVV9C3hsxMnPBq6pqier6jvAHHByu81V1f1V9RRwTZtWkjQho25xHAncneQW4Mn5YlW96wW85oVJzgO2Ah+oqh8AK4CbhqbZ0WoAD+1Rf/PeZppkHbAO4LjjjnsBbUmSRjFqcFyySK93BfARoNr9x4HfXIwZV9WVwJUAa9as6dmlJknqMOrpuN9M8nPAqqq6IcmrgEN6X6yqHpkfTvJp4H+30Z0Mfutj3jGtxj7qkqQJGPWsqt8C/hz4k1ZaAfxF74slOXpo9D0Mjp0AbATOTfLyJMcDq4BbgC3AqiTHJ3kZgwPoG3tfV5K0eEbdVXUBgwPVN8PgR52S/My+npDki8DbgCOT7AAuBt6W5CQGu6oeAH67ze+uJNcCdwNPAxcMXYn3QuB6Bls4G6rqrp43KElaXKMGx5NV9VQSAJIcyuDDf0FV9d69lD+zj+kvBS7dS30TsGnEPiVJYzbq6bjfTPIh4JXtt8a/BPyv8bUlSZpWowbHemA3cAeD3UubGPz+uCTpIDPqWVU/BT7dbpKkg9hIwZHkO+zlmEZVnbDoHUmSplrPtarmvQL418ARi9+OJGnajXSMo6q+P3TbWVV/CJw15t4kSVNo1F1Vq4dGX8JgC6TntzwkSQeIUT/8Pz40/DSDL++ds+jdSJKm3qhnVf3quBuRJM2GUXdV/c6+Hq+qTyxOO5KkaddzVtWbeO4Cg+9kcBHC7eNoSpI0vUYNjmOA1VX1Ixj8djjwtar6t+NqTJI0nUa95MhRwFND40+1miTpIDPqFsfVwC1Jrmvj7wauGk9LkqRpNupZVZcm+Uvgra10flX99fjakiRNq1F3VQG8Cni8qv4bsKP9Up8k6SAz6k/HXgz8LvDBVnop8KfjakqSNL1G3eJ4D/Au4O8Aquq7wGHjakqSNL1GDY6nqqpol1ZP8urxtSRJmmajBse1Sf4EODzJbwE34I86SdJBadSzqj7Wfmv8ceANwO9X1eaxdiZJmkr7DY4khwA3tAsdGhaSdJDb766qqnoG+GmS1y1BP5KkKTfqN8d/DNyRZDPtzCqAqvqPY+lKkjS1Rg2Or7SbJOkgt8/gSHJcVf3fqvK6VJIkYP/HOP5ifiDJl8fciyRpBuwvODI0fMI4G5EkzYb9BUctMCxJOkjt7+D4LyZ5nMGWxyvbMG28quq1Y+1OkjR19rnFUVWHVNVrq+qwqjq0Dc+P7zM0kmxI8miSO4dqRyTZnGR7u1/W6klyeZK5JLcnWT30nLVt+u1J1r7YNyxJenF6fo+j12eB0/eorQdurKpVwI1tHOAMYFW7rQOugEHQABcDbwZOBi6eDxtJ0mSMLTiq6lvAY3uUz+a5n5y9isFP0M7Xr66BmxhcTPFo4DRgc1U9VlU/YHDJkz3DSJK0hMa5xbE3R1XVrjb8MHBUG14BPDQ03Y5WW6j+/0myLsnWJFt37969uF1Lkp611MHxrOHf91ik+V1ZVWuqas3y5csXa7aSpD0sdXA80nZB0e4fbfWdwLFD0x3TagvVJUkTstTBsRGYPzNqLfDVofp57eyqU4Aftl1a1wOnJlnWDoqf2mqSpAkZ9SKH3ZJ8EXgbcGSSHQzOjrqMwa8Jvg94EDinTb4JOBOYA54AzgeoqseSfATY0qb7g6ra84C7JGkJjS04quq9Czz09r1MW8AFC8xnA7BhEVuTJL0IEzs4LkmaTQaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqcuikG9BzVq7/2sRe+4HLzprYa0uaLW5xSJK6GBySpC4GhySpi8EhSepicEiSukwkOJI8kOSOJNuSbG21I5JsTrK93S9r9SS5PMlcktuTrJ5Ez5KkgUlucfxqVZ1UVWva+HrgxqpaBdzYxgHOAFa12zrgiiXvVJL0rGnaVXU2cFUbvgp491D96hq4CTg8ydGTaFCSNLngKODrSW5Nsq7VjqqqXW34YeCoNrwCeGjouTtaTZI0AZP65vhbqmpnkp8BNif59vCDVVVJqmeGLYDWARx33HGL16kk6XkmssVRVTvb/aPAdcDJwCPzu6Da/aNt8p3AsUNPP6bV9pznlVW1pqrWLF++fJztS9JBbcmDI8mrkxw2PwycCtwJbATWtsnWAl9twxuB89rZVacAPxzapSVJWmKT2FV1FHBdkvnX/0JV/Z8kW4Brk7wPeBA4p02/CTgTmAOeAM5f+pYlSfOWPDiq6n7gF/dS/z7w9r3UC7hgCVqTJI1gmk7HlSTNAINDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXSZ1kUNNmZXrvzaR133gsrMm8rqSXji3OCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXL6uuifJy7tLscYtDktTF4JAkdTE4JEldDA5JUhcPjuugNKmD8uCBec0+tzgkSV1mJjiSnJ7k3iRzSdZPuh9JOljNxK6qJIcAnwLeAewAtiTZWFV3T7YzqZ/fXdGsm4ngAE4G5qrqfoAk1wBnAwaHNKJJHteZFMNyPGYlOFYADw2N7wDePKFeJM0Iw3I8ZiU49ivJOmBdG/1xknsXmPRI4HtL09WimLV+YfZ6nrV+YfZ6nrV+YfZ6PhL4Xj76oubxc6NMNCvBsRM4dmj8mFZ7VlVdCVy5vxkl2VpVaxa3vfGZtX5h9nqetX5h9nqetX5h9npeyn5n5ayqLcCqJMcneRlwLrBxwj1J0kFpJrY4qurpJBcC1wOHABuq6q4JtyVJB6WZCA6AqtoEbFqEWe13d9aUmbV+YfZ6nrV+YfZ6nrV+YfZ6XrJ+U1VL9VqSpAPArBzjkCRNiQMqOJIcm+QbSe5OcleS97f6JUl2JtnWbmcOPeeD7TIm9yY5bUJ9P5Dkjtbb1lY7IsnmJNvb/bJWT5LLW8+3J1m9xL2+YWg5bkvyeJKLpm0ZJ9mQ5NEkdw7VupdpkrVt+u1J1i5xv/81ybdbT9clObzVVyb5ydCy/uOh5/xSW5fm2nvKEvfcvR4s1eWEFuj3z4Z6fSDJtlaflmW80GfaZNflqjpgbsDRwOo2fBjwt8CJwCXAf9rL9CcCfwO8HDgeuA84ZAJ9PwAcuUftvwDr2/B64KNt+EzgL4EApwA3T3B5HwI8zODc76laxsCvAKuBO1/oMgWOAO5v98va8LIl7PdU4NA2/NGhflcOT7fHfG5p7yHtPZ2xxMu4az1ot/uAE4CXtWlOXKp+93j848DvT9kyXugzbaLr8gG1xVFVu6rqtjb8I+AeBt86X8jZwDVV9WRVfQeYY3B5k2lwNnBVG74KePdQ/eoauAk4PMnRk2gQeDtwX1U9uI9pJrKMq+pbwGN76aVnmZ4GbK6qx6rqB8Bm4PSl6reqvl5VT7fRmxh8f2lBrefXVtVNNfi0uJrn3uOiW2AZL2Sh9eDZywlV1VPA/OWElrTfttVwDvDFfc1jAst4oc+0ia7LB1RwDEuyEngjcHMrXdg23TbMb9ax90uZ7CtoxqWArye5NYNvwAMcVVW72vDDwFFteFp6hsH3aYb/0KZ5GUP/Mp2m3n+TwX+S845P8tdJvpnkra22gkGP8ybVb896MC3L+K3AI1W1fag2Vct4j8+0ia7LB2RwJHkN8GXgoqp6HLgC+HngJGAXg03SafKWqloNnAFckORXhh9s/9lM1elvGXwR813Al1pp2pfx80zjMl1Ikg8DTwOfb6VdwHFV9Ubgd4AvJHntpPrbw0ytB0Pey/P/CZqqZbyXz7RnTWJdPuCCI8lLGSzgz1fVVwCq6pGqeqaqfgp8mud2lez3UiZLoap2tvtHgesY9PfI/C6odv9om3wqemYQcrdV1SMw/cu46V2mE+89yb8D/iXw6+0Dgra75/tt+FYGxwhe33ob3p215P2+gPVgGpbxocCvAX82X5umZby3zzQmvC4fUMHR9lN+Brinqj4xVB8+BvAeYP6sio3AuUlenuR4YBWDA19LJsmrkxw2P8zggOidrbf5Mx/WAl8d6vm8dvbEKcAPhzZZl9Lz/kOb5mU8pHeZXg+cmmRZ2+VyaqstiSSnA/8ZeFdVPTFUX57Bb9SQ5AQGy/T+1vPjSU5pfwvnDb3Hpeq5dz2YhssJ/Qvg21X17C6oaVnGC32mMel1ebGO/k/DDXgLg02224Ft7XYm8DngjlbfCBw99JwPM/hv4l7GeHbEPno+gcGZJH8D3AV8uNX/AXAjsB24ATii1cPgR63ua+9pzQR6fjXwfeB1Q7WpWsYMQm0X8PcM9ue+74UsUwbHFuba7fwl7neOwX7p+XX5j9u0/6qtK9uA24B3Ds1nDYMP6/uAP6J9yXcJe+5eD9rf6N+2xz68lP22+meBf7/HtNOyjBf6TJvouuw3xyVJXQ6oXVWSpPEzOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTl/wExXSqVif7bTAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_a1k['price'].plot.hist(bins=10)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"8\" halign=\"left\">price</th>\n",
" <th colspan=\"2\" halign=\"left\">price_log</th>\n",
" <th>...</th>\n",
" <th colspan=\"2\" halign=\"left\">shipping</th>\n",
" <th colspan=\"8\" halign=\"left\">train_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>...</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>item_condition_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>639922.0</td>\n",
" <td>26.115753</td>\n",
" <td>30.251765</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>18.0</td>\n",
" <td>30.0</td>\n",
" <td>500.0</td>\n",
" <td>639922.0</td>\n",
" <td>2.902286</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>639922.0</td>\n",
" <td>741459.580829</td>\n",
" <td>427816.130386</td>\n",
" <td>2.0</td>\n",
" <td>371215.25</td>\n",
" <td>741409.0</td>\n",
" <td>1111889.5</td>\n",
" <td>1482534.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>374837.0</td>\n",
" <td>26.845917</td>\n",
" <td>33.328451</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>17.0</td>\n",
" <td>29.0</td>\n",
" <td>500.0</td>\n",
" <td>374837.0</td>\n",
" <td>2.933569</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>374837.0</td>\n",
" <td>740761.457052</td>\n",
" <td>428457.002174</td>\n",
" <td>10.0</td>\n",
" <td>368816.00</td>\n",
" <td>740986.0</td>\n",
" <td>1111606.0</td>\n",
" <td>1482532.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>431479.0</td>\n",
" <td>25.878923</td>\n",
" <td>32.656219</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>16.0</td>\n",
" <td>28.0</td>\n",
" <td>500.0</td>\n",
" <td>431479.0</td>\n",
" <td>2.907268</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>431479.0</td>\n",
" <td>741246.709872</td>\n",
" <td>427802.378834</td>\n",
" <td>0.0</td>\n",
" <td>371171.00</td>\n",
" <td>741197.0</td>\n",
" <td>1112085.5</td>\n",
" <td>1482533.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>31927.0</td>\n",
" <td>23.940693</td>\n",
" <td>30.365606</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>26.0</td>\n",
" <td>499.0</td>\n",
" <td>31927.0</td>\n",
" <td>2.823294</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>31927.0</td>\n",
" <td>743054.771072</td>\n",
" <td>427586.277391</td>\n",
" <td>37.0</td>\n",
" <td>373386.00</td>\n",
" <td>742436.0</td>\n",
" <td>1113253.0</td>\n",
" <td>1482464.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2381.0</td>\n",
" <td>31.524570</td>\n",
" <td>38.246491</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>19.0</td>\n",
" <td>35.0</td>\n",
" <td>404.0</td>\n",
" <td>2381.0</td>\n",
" <td>3.029964</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>2381.0</td>\n",
" <td>734574.986560</td>\n",
" <td>430443.262412</td>\n",
" <td>258.0</td>\n",
" <td>360446.00</td>\n",
" <td>730864.0</td>\n",
" <td>1100051.0</td>\n",
" <td>1482163.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 32 columns</p>\n",
"</div>"
],
"text/plain": [
" price \\\n",
" count mean std min 25% 50% 75% \n",
"item_condition_id \n",
"1 639922.0 26.115753 30.251765 3.0 10.0 18.0 30.0 \n",
"2 374837.0 26.845917 33.328451 3.0 11.0 17.0 29.0 \n",
"3 431479.0 25.878923 32.656219 3.0 11.0 16.0 28.0 \n",
"4 31927.0 23.940693 30.365606 3.0 10.0 15.0 26.0 \n",
"5 2381.0 31.524570 38.246491 3.0 11.0 19.0 35.0 \n",
"\n",
" price_log ... shipping \\\n",
" max count mean ... 75% max \n",
"item_condition_id ... \n",
"1 500.0 639922.0 2.902286 ... 1.0 1.0 \n",
"2 500.0 374837.0 2.933569 ... 1.0 1.0 \n",
"3 500.0 431479.0 2.907268 ... 1.0 1.0 \n",
"4 499.0 31927.0 2.823294 ... 1.0 1.0 \n",
"5 404.0 2381.0 3.029964 ... 1.0 1.0 \n",
"\n",
" train_id \\\n",
" count mean std min 25% \n",
"item_condition_id \n",
"1 639922.0 741459.580829 427816.130386 2.0 371215.25 \n",
"2 374837.0 740761.457052 428457.002174 10.0 368816.00 \n",
"3 431479.0 741246.709872 427802.378834 0.0 371171.00 \n",
"4 31927.0 743054.771072 427586.277391 37.0 373386.00 \n",
"5 2381.0 734574.986560 430443.262412 258.0 360446.00 \n",
"\n",
" \n",
" 50% 75% max \n",
"item_condition_id \n",
"1 741409.0 1111889.5 1482534.0 \n",
"2 740986.0 1111606.0 1482532.0 \n",
"3 741197.0 1112085.5 1482533.0 \n",
"4 742436.0 1113253.0 1482464.0 \n",
"5 730864.0 1100051.0 1482163.0 \n",
"\n",
"[5 rows x 32 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_b1k.groupby(['item_condition_id']).describe()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"8\" halign=\"left\">price</th>\n",
" <th colspan=\"8\" halign=\"left\">shipping</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>category1</th>\n",
" <th>category2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"7\" valign=\"top\">Beauty</th>\n",
" <th>Bath &amp; Body</th>\n",
" <td>7752.0</td>\n",
" <td>19.0</td>\n",
" <td>17.1</td>\n",
" <td>3.0</td>\n",
" <td>9.0</td>\n",
" <td>15.0</td>\n",
" <td>23.0</td>\n",
" <td>290.0</td>\n",
" <td>7752.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Fragrance</th>\n",
" <td>24275.0</td>\n",
" <td>23.8</td>\n",
" <td>20.4</td>\n",
" <td>3.0</td>\n",
" <td>12.0</td>\n",
" <td>18.0</td>\n",
" <td>30.0</td>\n",
" <td>381.0</td>\n",
" <td>24275.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Hair Care</th>\n",
" <td>7767.0</td>\n",
" <td>19.4</td>\n",
" <td>16.2</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>23.0</td>\n",
" <td>239.0</td>\n",
" <td>7767.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Makeup</th>\n",
" <td>124546.0</td>\n",
" <td>18.6</td>\n",
" <td>17.1</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>22.0</td>\n",
" <td>465.0</td>\n",
" <td>124546.0</td>\n",
" <td>0.7</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Other</th>\n",
" <td>489.0</td>\n",
" <td>23.0</td>\n",
" <td>32.2</td>\n",
" <td>3.0</td>\n",
" <td>8.0</td>\n",
" <td>14.0</td>\n",
" <td>25.0</td>\n",
" <td>259.0</td>\n",
" <td>489.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Skin Care</th>\n",
" <td>29815.0</td>\n",
" <td>20.3</td>\n",
" <td>21.6</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>14.0</td>\n",
" <td>24.0</td>\n",
" <td>425.0</td>\n",
" <td>29815.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Tools &amp; Accessories</th>\n",
" <td>13048.0</td>\n",
" <td>19.9</td>\n",
" <td>22.0</td>\n",
" <td>3.0</td>\n",
" <td>9.0</td>\n",
" <td>14.0</td>\n",
" <td>22.0</td>\n",
" <td>375.0</td>\n",
" <td>13048.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">Electronics</th>\n",
" <th>Cameras &amp; Photography</th>\n",
" <td>3949.0</td>\n",
" <td>67.4</td>\n",
" <td>82.7</td>\n",
" <td>3.0</td>\n",
" <td>19.0</td>\n",
" <td>39.0</td>\n",
" <td>75.0</td>\n",
" <td>500.0</td>\n",
" <td>3949.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Car Audio, Video &amp; GPS</th>\n",
" <td>516.0</td>\n",
" <td>37.9</td>\n",
" <td>39.3</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>25.0</td>\n",
" <td>45.0</td>\n",
" <td>309.0</td>\n",
" <td>516.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Cell Phones &amp; Accessories</th>\n",
" <td>53153.0</td>\n",
" <td>28.8</td>\n",
" <td>55.2</td>\n",
" <td>3.0</td>\n",
" <td>8.0</td>\n",
" <td>11.0</td>\n",
" <td>20.0</td>\n",
" <td>500.0</td>\n",
" <td>53153.0</td>\n",
" <td>0.7</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Computers &amp; Tablets</th>\n",
" <td>7239.0</td>\n",
" <td>70.9</td>\n",
" <td>84.8</td>\n",
" <td>3.0</td>\n",
" <td>18.0</td>\n",
" <td>40.0</td>\n",
" <td>89.0</td>\n",
" <td>500.0</td>\n",
" <td>7239.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Media</th>\n",
" <td>11593.0</td>\n",
" <td>14.6</td>\n",
" <td>13.4</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>10.0</td>\n",
" <td>16.0</td>\n",
" <td>304.0</td>\n",
" <td>11593.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Other</th>\n",
" <td>773.0</td>\n",
" <td>37.3</td>\n",
" <td>47.0</td>\n",
" <td>3.0</td>\n",
" <td>12.0</td>\n",
" <td>21.0</td>\n",
" <td>43.0</td>\n",
" <td>415.0</td>\n",
" <td>773.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>TV, Audio &amp; Surveillance</th>\n",
" <td>9112.0</td>\n",
" <td>43.7</td>\n",
" <td>45.5</td>\n",
" <td>3.0</td>\n",
" <td>14.0</td>\n",
" <td>26.0</td>\n",
" <td>59.0</td>\n",
" <td>456.0</td>\n",
" <td>9112.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Video Games &amp; Consoles</th>\n",
" <td>35940.0</td>\n",
" <td>31.8</td>\n",
" <td>43.4</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>19.0</td>\n",
" <td>34.0</td>\n",
" <td>500.0</td>\n",
" <td>35940.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"15\" valign=\"top\">Handmade</th>\n",
" <th>Accessories</th>\n",
" <td>5953.0</td>\n",
" <td>19.1</td>\n",
" <td>27.8</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>10.0</td>\n",
" <td>20.0</td>\n",
" <td>301.0</td>\n",
" <td>5953.0</td>\n",
" <td>0.7</td>\n",
" <td>0.4</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Art</th>\n",
" <td>655.0</td>\n",
" <td>14.8</td>\n",
" <td>16.3</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>10.0</td>\n",
" <td>18.0</td>\n",
" <td>190.0</td>\n",
" <td>655.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bags and Purses</th>\n",
" <td>3169.0</td>\n",
" <td>34.8</td>\n",
" <td>35.3</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>24.0</td>\n",
" <td>43.0</td>\n",
" <td>500.0</td>\n",
" <td>3169.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Books and Zines</th>\n",
" <td>46.0</td>\n",
" <td>14.2</td>\n",
" <td>17.1</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>10.0</td>\n",
" <td>18.8</td>\n",
" <td>115.0</td>\n",
" <td>46.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Candles</th>\n",
" <td>64.0</td>\n",
" <td>21.7</td>\n",
" <td>19.3</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>16.0</td>\n",
" <td>24.2</td>\n",
" <td>95.0</td>\n",
" <td>64.0</td>\n",
" <td>0.2</td>\n",
" <td>0.4</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ceramics and Pottery</th>\n",
" <td>57.0</td>\n",
" <td>20.3</td>\n",
" <td>15.3</td>\n",
" <td>3.0</td>\n",
" <td>9.0</td>\n",
" <td>15.0</td>\n",
" <td>27.0</td>\n",
" <td>66.0</td>\n",
" <td>57.0</td>\n",
" <td>0.3</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Children</th>\n",
" <td>1242.0</td>\n",
" <td>12.7</td>\n",
" <td>13.6</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" <td>13.0</td>\n",
" <td>175.0</td>\n",
" <td>1242.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Clothing</th>\n",
" <td>5637.0</td>\n",
" <td>18.8</td>\n",
" <td>16.9</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>22.0</td>\n",
" <td>459.0</td>\n",
" <td>5637.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Crochet</th>\n",
" <td>234.0</td>\n",
" <td>19.1</td>\n",
" <td>23.6</td>\n",
" <td>4.0</td>\n",
" <td>9.0</td>\n",
" <td>14.0</td>\n",
" <td>19.0</td>\n",
" <td>230.0</td>\n",
" <td>234.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dolls and Miniatures</th>\n",
" <td>49.0</td>\n",
" <td>27.5</td>\n",
" <td>28.5</td>\n",
" <td>5.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>38.0</td>\n",
" <td>129.0</td>\n",
" <td>49.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Furniture</th>\n",
" <td>3.0</td>\n",
" <td>11.7</td>\n",
" <td>1.5</td>\n",
" <td>10.0</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" <td>12.5</td>\n",
" <td>13.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Geekery</th>\n",
" <td>110.0</td>\n",
" <td>12.0</td>\n",
" <td>7.6</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>55.0</td>\n",
" <td>110.0</td>\n",
" <td>0.7</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Glass</th>\n",
" <td>592.0</td>\n",
" <td>18.8</td>\n",
" <td>14.6</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>15.0</td>\n",
" <td>22.0</td>\n",
" <td>180.0</td>\n",
" <td>592.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Holidays</th>\n",
" <td>325.0</td>\n",
" <td>15.9</td>\n",
" <td>10.3</td>\n",
" <td>3.0</td>\n",
" <td>9.0</td>\n",
" <td>14.0</td>\n",
" <td>20.0</td>\n",
" <td>79.0</td>\n",
" <td>325.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Housewares</th>\n",
" <td>431.0</td>\n",
" <td>17.3</td>\n",
" <td>13.7</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>14.0</td>\n",
" <td>20.0</td>\n",
" <td>115.0</td>\n",
" <td>431.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"12\" valign=\"top\">Vintage &amp; Collectibles</th>\n",
" <th>Collectibles</th>\n",
" <td>5810.0</td>\n",
" <td>24.2</td>\n",
" <td>28.0</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>16.0</td>\n",
" <td>26.0</td>\n",
" <td>473.0</td>\n",
" <td>5810.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Electronics</th>\n",
" <td>2729.0</td>\n",
" <td>27.2</td>\n",
" <td>29.8</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>18.0</td>\n",
" <td>32.0</td>\n",
" <td>350.0</td>\n",
" <td>2729.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Furniture</th>\n",
" <td>10.0</td>\n",
" <td>20.3</td>\n",
" <td>16.3</td>\n",
" <td>4.0</td>\n",
" <td>9.0</td>\n",
" <td>15.0</td>\n",
" <td>25.8</td>\n",
" <td>55.0</td>\n",
" <td>10.0</td>\n",
" <td>0.2</td>\n",
" <td>0.4</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Home Decor</th>\n",
" <td>476.0</td>\n",
" <td>23.6</td>\n",
" <td>22.0</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>17.0</td>\n",
" <td>26.0</td>\n",
" <td>175.0</td>\n",
" <td>476.0</td>\n",
" <td>0.3</td>\n",
" <td>0.4</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Housewares</th>\n",
" <td>1732.0</td>\n",
" <td>27.0</td>\n",
" <td>20.5</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>22.0</td>\n",
" <td>33.0</td>\n",
" <td>230.0</td>\n",
" <td>1732.0</td>\n",
" <td>0.2</td>\n",
" <td>0.4</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jewelry</th>\n",
" <td>2521.0</td>\n",
" <td>28.5</td>\n",
" <td>38.3</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>17.0</td>\n",
" <td>30.0</td>\n",
" <td>500.0</td>\n",
" <td>2521.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Other</th>\n",
" <td>410.0</td>\n",
" <td>20.2</td>\n",
" <td>27.3</td>\n",
" <td>3.0</td>\n",
" <td>8.0</td>\n",
" <td>14.0</td>\n",
" <td>24.0</td>\n",
" <td>279.0</td>\n",
" <td>410.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Paper Ephemera</th>\n",
" <td>165.0</td>\n",
" <td>20.6</td>\n",
" <td>34.7</td>\n",
" <td>3.0</td>\n",
" <td>8.0</td>\n",
" <td>11.0</td>\n",
" <td>25.0</td>\n",
" <td>400.0</td>\n",
" <td>165.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Serving</th>\n",
" <td>1886.0</td>\n",
" <td>25.8</td>\n",
" <td>18.9</td>\n",
" <td>3.0</td>\n",
" <td>16.0</td>\n",
" <td>22.0</td>\n",
" <td>28.0</td>\n",
" <td>219.0</td>\n",
" <td>1886.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Supplies</th>\n",
" <td>1112.0</td>\n",
" <td>20.9</td>\n",
" <td>19.7</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>16.0</td>\n",
" <td>25.0</td>\n",
" <td>226.0</td>\n",
" <td>1112.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Toy</th>\n",
" <td>7335.0</td>\n",
" <td>25.9</td>\n",
" <td>29.7</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>17.0</td>\n",
" <td>29.0</td>\n",
" <td>415.0</td>\n",
" <td>7335.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trading Cards</th>\n",
" <td>6480.0</td>\n",
" <td>14.8</td>\n",
" <td>24.1</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>8.0</td>\n",
" <td>16.0</td>\n",
" <td>500.0</td>\n",
" <td>6480.0</td>\n",
" <td>0.9</td>\n",
" <td>0.3</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"17\" valign=\"top\">Women</th>\n",
" <th>Athletic Apparel</th>\n",
" <td>123395.0</td>\n",
" <td>28.8</td>\n",
" <td>23.6</td>\n",
" <td>3.0</td>\n",
" <td>14.0</td>\n",
" <td>22.0</td>\n",
" <td>36.0</td>\n",
" <td>484.0</td>\n",
" <td>123395.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Coats &amp; Jackets</th>\n",
" <td>15068.0</td>\n",
" <td>33.9</td>\n",
" <td>30.3</td>\n",
" <td>3.0</td>\n",
" <td>16.0</td>\n",
" <td>25.0</td>\n",
" <td>41.0</td>\n",
" <td>486.0</td>\n",
" <td>15068.0</td>\n",
" <td>0.3</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dresses</th>\n",
" <td>45724.0</td>\n",
" <td>29.4</td>\n",
" <td>29.3</td>\n",
" <td>3.0</td>\n",
" <td>13.0</td>\n",
" <td>20.0</td>\n",
" <td>36.0</td>\n",
" <td>473.0</td>\n",
" <td>45724.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jeans</th>\n",
" <td>32305.0</td>\n",
" <td>25.9</td>\n",
" <td>18.5</td>\n",
" <td>3.0</td>\n",
" <td>14.0</td>\n",
" <td>20.0</td>\n",
" <td>31.0</td>\n",
" <td>456.0</td>\n",
" <td>32305.0</td>\n",
" <td>0.3</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jewelry</th>\n",
" <td>56728.0</td>\n",
" <td>27.0</td>\n",
" <td>39.5</td>\n",
" <td>3.0</td>\n",
" <td>8.0</td>\n",
" <td>14.0</td>\n",
" <td>30.0</td>\n",
" <td>500.0</td>\n",
" <td>56728.0</td>\n",
" <td>0.6</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Maternity</th>\n",
" <td>3356.0</td>\n",
" <td>21.1</td>\n",
" <td>16.5</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>25.0</td>\n",
" <td>190.0</td>\n",
" <td>3356.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Other</th>\n",
" <td>7252.0</td>\n",
" <td>25.4</td>\n",
" <td>29.1</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>17.0</td>\n",
" <td>29.0</td>\n",
" <td>496.0</td>\n",
" <td>7252.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Pants</th>\n",
" <td>8403.0</td>\n",
" <td>19.6</td>\n",
" <td>16.0</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>22.0</td>\n",
" <td>259.0</td>\n",
" <td>8403.0</td>\n",
" <td>0.3</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Shoes</th>\n",
" <td>77575.0</td>\n",
" <td>35.8</td>\n",
" <td>34.1</td>\n",
" <td>3.0</td>\n",
" <td>16.0</td>\n",
" <td>26.0</td>\n",
" <td>41.0</td>\n",
" <td>500.0</td>\n",
" <td>77575.0</td>\n",
" <td>0.2</td>\n",
" <td>0.4</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Skirts</th>\n",
" <td>10620.0</td>\n",
" <td>21.6</td>\n",
" <td>15.6</td>\n",
" <td>3.0</td>\n",
" <td>12.0</td>\n",
" <td>18.0</td>\n",
" <td>27.0</td>\n",
" <td>306.0</td>\n",
" <td>10620.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Suits &amp; Blazers</th>\n",
" <td>1910.0</td>\n",
" <td>19.2</td>\n",
" <td>12.1</td>\n",
" <td>3.0</td>\n",
" <td>12.0</td>\n",
" <td>16.0</td>\n",
" <td>24.0</td>\n",
" <td>173.0</td>\n",
" <td>1910.0</td>\n",
" <td>0.3</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Sweaters</th>\n",
" <td>34298.0</td>\n",
" <td>26.5</td>\n",
" <td>21.9</td>\n",
" <td>3.0</td>\n",
" <td>13.0</td>\n",
" <td>20.0</td>\n",
" <td>33.0</td>\n",
" <td>450.0</td>\n",
" <td>34298.0</td>\n",
" <td>0.3</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Swimwear</th>\n",
" <td>18416.0</td>\n",
" <td>21.8</td>\n",
" <td>19.7</td>\n",
" <td>3.0</td>\n",
" <td>12.0</td>\n",
" <td>16.0</td>\n",
" <td>25.0</td>\n",
" <td>418.0</td>\n",
" <td>18416.0</td>\n",
" <td>0.3</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Tops &amp; Blouses</th>\n",
" <td>106889.0</td>\n",
" <td>18.2</td>\n",
" <td>14.8</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>14.0</td>\n",
" <td>22.0</td>\n",
" <td>450.0</td>\n",
" <td>106889.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Underwear</th>\n",
" <td>33756.0</td>\n",
" <td>18.1</td>\n",
" <td>13.5</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>14.0</td>\n",
" <td>22.0</td>\n",
" <td>359.0</td>\n",
" <td>33756.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Women's Accessories</th>\n",
" <td>42292.0</td>\n",
" <td>30.3</td>\n",
" <td>38.6</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>18.0</td>\n",
" <td>34.0</td>\n",
" <td>500.0</td>\n",
" <td>42292.0</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Women's Handbags</th>\n",
" <td>45480.0</td>\n",
" <td>52.0</td>\n",
" <td>58.1</td>\n",
" <td>3.0</td>\n",
" <td>17.0</td>\n",
" <td>31.0</td>\n",
" <td>64.0</td>\n",
" <td>500.0</td>\n",
" <td>45480.0</td>\n",
" <td>0.3</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nan</th>\n",
" <th></th>\n",
" <td>6311.0</td>\n",
" <td>25.1</td>\n",
" <td>29.7</td>\n",
" <td>3.0</td>\n",
" <td>10.0</td>\n",
" <td>16.0</td>\n",
" <td>28.0</td>\n",
" <td>445.0</td>\n",
" <td>6311.0</td>\n",
" <td>0.4</td>\n",
" <td>0.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>139 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" price \\\n",
" count mean std min \n",
"category1 category2 \n",
"Beauty Bath & Body 7752.0 19.0 17.1 3.0 \n",
" Fragrance 24275.0 23.8 20.4 3.0 \n",
" Hair Care 7767.0 19.4 16.2 3.0 \n",
" Makeup 124546.0 18.6 17.1 3.0 \n",
" Other 489.0 23.0 32.2 3.0 \n",
" Skin Care 29815.0 20.3 21.6 3.0 \n",
" Tools & Accessories 13048.0 19.9 22.0 3.0 \n",
"Electronics Cameras & Photography 3949.0 67.4 82.7 3.0 \n",
" Car Audio, Video & GPS 516.0 37.9 39.3 3.0 \n",
" Cell Phones & Accessories 53153.0 28.8 55.2 3.0 \n",
" Computers & Tablets 7239.0 70.9 84.8 3.0 \n",
" Media 11593.0 14.6 13.4 3.0 \n",
" Other 773.0 37.3 47.0 3.0 \n",
" TV, Audio & Surveillance 9112.0 43.7 45.5 3.0 \n",
" Video Games & Consoles 35940.0 31.8 43.4 3.0 \n",
"Handmade Accessories 5953.0 19.1 27.8 3.0 \n",
" Art 655.0 14.8 16.3 3.0 \n",
" Bags and Purses 3169.0 34.8 35.3 3.0 \n",
" Books and Zines 46.0 14.2 17.1 3.0 \n",
" Candles 64.0 21.7 19.3 3.0 \n",
" Ceramics and Pottery 57.0 20.3 15.3 3.0 \n",
" Children 1242.0 12.7 13.6 3.0 \n",
" Clothing 5637.0 18.8 16.9 3.0 \n",
" Crochet 234.0 19.1 23.6 4.0 \n",
" Dolls and Miniatures 49.0 27.5 28.5 5.0 \n",
" Furniture 3.0 11.7 1.5 10.0 \n",
" Geekery 110.0 12.0 7.6 4.0 \n",
" Glass 592.0 18.8 14.6 3.0 \n",
" Holidays 325.0 15.9 10.3 3.0 \n",
" Housewares 431.0 17.3 13.7 3.0 \n",
"... ... ... ... ... \n",
"Vintage & Collectibles Collectibles 5810.0 24.2 28.0 3.0 \n",
" Electronics 2729.0 27.2 29.8 3.0 \n",
" Furniture 10.0 20.3 16.3 4.0 \n",
" Home Decor 476.0 23.6 22.0 3.0 \n",
" Housewares 1732.0 27.0 20.5 3.0 \n",
" Jewelry 2521.0 28.5 38.3 3.0 \n",
" Other 410.0 20.2 27.3 3.0 \n",
" Paper Ephemera 165.0 20.6 34.7 3.0 \n",
" Serving 1886.0 25.8 18.9 3.0 \n",
" Supplies 1112.0 20.9 19.7 3.0 \n",
" Toy 7335.0 25.9 29.7 3.0 \n",
" Trading Cards 6480.0 14.8 24.1 3.0 \n",
"Women Athletic Apparel 123395.0 28.8 23.6 3.0 \n",
" Coats & Jackets 15068.0 33.9 30.3 3.0 \n",
" Dresses 45724.0 29.4 29.3 3.0 \n",
" Jeans 32305.0 25.9 18.5 3.0 \n",
" Jewelry 56728.0 27.0 39.5 3.0 \n",
" Maternity 3356.0 21.1 16.5 3.0 \n",
" Other 7252.0 25.4 29.1 3.0 \n",
" Pants 8403.0 19.6 16.0 3.0 \n",
" Shoes 77575.0 35.8 34.1 3.0 \n",
" Skirts 10620.0 21.6 15.6 3.0 \n",
" Suits & Blazers 1910.0 19.2 12.1 3.0 \n",
" Sweaters 34298.0 26.5 21.9 3.0 \n",
" Swimwear 18416.0 21.8 19.7 3.0 \n",
" Tops & Blouses 106889.0 18.2 14.8 3.0 \n",
" Underwear 33756.0 18.1 13.5 3.0 \n",
" Women's Accessories 42292.0 30.3 38.6 3.0 \n",
" Women's Handbags 45480.0 52.0 58.1 3.0 \n",
"nan 6311.0 25.1 29.7 3.0 \n",
"\n",
" \\\n",
" 25% 50% 75% max \n",
"category1 category2 \n",
"Beauty Bath & Body 9.0 15.0 23.0 290.0 \n",
" Fragrance 12.0 18.0 30.0 381.0 \n",
" Hair Care 10.0 15.0 23.0 239.0 \n",
" Makeup 10.0 15.0 22.0 465.0 \n",
" Other 8.0 14.0 25.0 259.0 \n",
" Skin Care 10.0 14.0 24.0 425.0 \n",
" Tools & Accessories 9.0 14.0 22.0 375.0 \n",
"Electronics Cameras & Photography 19.0 39.0 75.0 500.0 \n",
" Car Audio, Video & GPS 15.0 25.0 45.0 309.0 \n",
" Cell Phones & Accessories 8.0 11.0 20.0 500.0 \n",
" Computers & Tablets 18.0 40.0 89.0 500.0 \n",
" Media 7.0 10.0 16.0 304.0 \n",
" Other 12.0 21.0 43.0 415.0 \n",
" TV, Audio & Surveillance 14.0 26.0 59.0 456.0 \n",
" Video Games & Consoles 11.0 19.0 34.0 500.0 \n",
"Handmade Accessories 6.0 10.0 20.0 301.0 \n",
" Art 6.0 10.0 18.0 190.0 \n",
" Bags and Purses 15.0 24.0 43.0 500.0 \n",
" Books and Zines 7.0 10.0 18.8 115.0 \n",
" Candles 10.0 16.0 24.2 95.0 \n",
" Ceramics and Pottery 9.0 15.0 27.0 66.0 \n",
" Children 7.0 9.0 13.0 175.0 \n",
" Clothing 10.0 15.0 22.0 459.0 \n",
" Crochet 9.0 14.0 19.0 230.0 \n",
" Dolls and Miniatures 10.0 15.0 38.0 129.0 \n",
" Furniture 11.0 12.0 12.5 13.0 \n",
" Geekery 7.0 10.0 15.0 55.0 \n",
" Glass 11.0 15.0 22.0 180.0 \n",
" Holidays 9.0 14.0 20.0 79.0 \n",
" Housewares 10.0 14.0 20.0 115.0 \n",
"... ... ... ... ... \n",
"Vintage & Collectibles Collectibles 11.0 16.0 26.0 473.0 \n",
" Electronics 11.0 18.0 32.0 350.0 \n",
" Furniture 9.0 15.0 25.8 55.0 \n",
" Home Decor 11.0 17.0 26.0 175.0 \n",
" Housewares 15.0 22.0 33.0 230.0 \n",
" Jewelry 10.0 17.0 30.0 500.0 \n",
" Other 8.0 14.0 24.0 279.0 \n",
" Paper Ephemera 8.0 11.0 25.0 400.0 \n",
" Serving 16.0 22.0 28.0 219.0 \n",
" Supplies 10.0 16.0 25.0 226.0 \n",
" Toy 11.0 17.0 29.0 415.0 \n",
" Trading Cards 4.0 8.0 16.0 500.0 \n",
"Women Athletic Apparel 14.0 22.0 36.0 484.0 \n",
" Coats & Jackets 16.0 25.0 41.0 486.0 \n",
" Dresses 13.0 20.0 36.0 473.0 \n",
" Jeans 14.0 20.0 31.0 456.0 \n",
" Jewelry 8.0 14.0 30.0 500.0 \n",
" Maternity 10.0 15.0 25.0 190.0 \n",
" Other 11.0 17.0 29.0 496.0 \n",
" Pants 10.0 15.0 22.0 259.0 \n",
" Shoes 16.0 26.0 41.0 500.0 \n",
" Skirts 12.0 18.0 27.0 306.0 \n",
" Suits & Blazers 12.0 16.0 24.0 173.0 \n",
" Sweaters 13.0 20.0 33.0 450.0 \n",
" Swimwear 12.0 16.0 25.0 418.0 \n",
" Tops & Blouses 10.0 14.0 22.0 450.0 \n",
" Underwear 10.0 14.0 22.0 359.0 \n",
" Women's Accessories 10.0 18.0 34.0 500.0 \n",
" Women's Handbags 17.0 31.0 64.0 500.0 \n",
"nan 10.0 16.0 28.0 445.0 \n",
"\n",
" shipping \\\n",
" count mean std min \n",
"category1 category2 \n",
"Beauty Bath & Body 7752.0 0.5 0.5 0.0 \n",
" Fragrance 24275.0 0.5 0.5 0.0 \n",
" Hair Care 7767.0 0.4 0.5 0.0 \n",
" Makeup 124546.0 0.7 0.5 0.0 \n",
" Other 489.0 0.6 0.5 0.0 \n",
" Skin Care 29815.0 0.6 0.5 0.0 \n",
" Tools & Accessories 13048.0 0.6 0.5 0.0 \n",
"Electronics Cameras & Photography 3949.0 0.4 0.5 0.0 \n",
" Car Audio, Video & GPS 516.0 0.4 0.5 0.0 \n",
" Cell Phones & Accessories 53153.0 0.7 0.5 0.0 \n",
" Computers & Tablets 7239.0 0.4 0.5 0.0 \n",
" Media 11593.0 0.6 0.5 0.0 \n",
" Other 773.0 0.5 0.5 0.0 \n",
" TV, Audio & Surveillance 9112.0 0.5 0.5 0.0 \n",
" Video Games & Consoles 35940.0 0.5 0.5 0.0 \n",
"Handmade Accessories 5953.0 0.7 0.4 0.0 \n",
" Art 655.0 0.6 0.5 0.0 \n",
" Bags and Purses 3169.0 0.4 0.5 0.0 \n",
" Books and Zines 46.0 0.5 0.5 0.0 \n",
" Candles 64.0 0.2 0.4 0.0 \n",
" Ceramics and Pottery 57.0 0.3 0.5 0.0 \n",
" Children 1242.0 0.6 0.5 0.0 \n",
" Clothing 5637.0 0.5 0.5 0.0 \n",
" Crochet 234.0 0.5 0.5 0.0 \n",
" Dolls and Miniatures 49.0 0.4 0.5 0.0 \n",
" Furniture 3.0 0.0 0.0 0.0 \n",
" Geekery 110.0 0.7 0.5 0.0 \n",
" Glass 592.0 0.4 0.5 0.0 \n",
" Holidays 325.0 0.5 0.5 0.0 \n",
" Housewares 431.0 0.4 0.5 0.0 \n",
"... ... ... ... ... \n",
"Vintage & Collectibles Collectibles 5810.0 0.5 0.5 0.0 \n",
" Electronics 2729.0 0.5 0.5 0.0 \n",
" Furniture 10.0 0.2 0.4 0.0 \n",
" Home Decor 476.0 0.3 0.4 0.0 \n",
" Housewares 1732.0 0.2 0.4 0.0 \n",
" Jewelry 2521.0 0.6 0.5 0.0 \n",
" Other 410.0 0.6 0.5 0.0 \n",
" Paper Ephemera 165.0 0.6 0.5 0.0 \n",
" Serving 1886.0 0.5 0.5 0.0 \n",
" Supplies 1112.0 0.5 0.5 0.0 \n",
" Toy 7335.0 0.6 0.5 0.0 \n",
" Trading Cards 6480.0 0.9 0.3 0.0 \n",
"Women Athletic Apparel 123395.0 0.4 0.5 0.0 \n",
" Coats & Jackets 15068.0 0.3 0.5 0.0 \n",
" Dresses 45724.0 0.4 0.5 0.0 \n",
" Jeans 32305.0 0.3 0.5 0.0 \n",
" Jewelry 56728.0 0.6 0.5 0.0 \n",
" Maternity 3356.0 0.4 0.5 0.0 \n",
" Other 7252.0 0.4 0.5 0.0 \n",
" Pants 8403.0 0.3 0.5 0.0 \n",
" Shoes 77575.0 0.2 0.4 0.0 \n",
" Skirts 10620.0 0.4 0.5 0.0 \n",
" Suits & Blazers 1910.0 0.3 0.5 0.0 \n",
" Sweaters 34298.0 0.3 0.5 0.0 \n",
" Swimwear 18416.0 0.3 0.5 0.0 \n",
" Tops & Blouses 106889.0 0.4 0.5 0.0 \n",
" Underwear 33756.0 0.5 0.5 0.0 \n",
" Women's Accessories 42292.0 0.5 0.5 0.0 \n",
" Women's Handbags 45480.0 0.3 0.5 0.0 \n",
"nan 6311.0 0.4 0.5 0.0 \n",
"\n",
" \n",
" 25% 50% 75% max \n",
"category1 category2 \n",
"Beauty Bath & Body 0.0 1.0 1.0 1.0 \n",
" Fragrance 0.0 0.0 1.0 1.0 \n",
" Hair Care 0.0 0.0 1.0 1.0 \n",
" Makeup 0.0 1.0 1.0 1.0 \n",
" Other 0.0 1.0 1.0 1.0 \n",
" Skin Care 0.0 1.0 1.0 1.0 \n",
" Tools & Accessories 0.0 1.0 1.0 1.0 \n",
"Electronics Cameras & Photography 0.0 0.0 1.0 1.0 \n",
" Car Audio, Video & GPS 0.0 0.0 1.0 1.0 \n",
" Cell Phones & Accessories 0.0 1.0 1.0 1.0 \n",
" Computers & Tablets 0.0 0.0 1.0 1.0 \n",
" Media 0.0 1.0 1.0 1.0 \n",
" Other 0.0 1.0 1.0 1.0 \n",
" TV, Audio & Surveillance 0.0 1.0 1.0 1.0 \n",
" Video Games & Consoles 0.0 1.0 1.0 1.0 \n",
"Handmade Accessories 0.0 1.0 1.0 1.0 \n",
" Art 0.0 1.0 1.0 1.0 \n",
" Bags and Purses 0.0 0.0 1.0 1.0 \n",
" Books and Zines 0.0 0.0 1.0 1.0 \n",
" Candles 0.0 0.0 0.0 1.0 \n",
" Ceramics and Pottery 0.0 0.0 1.0 1.0 \n",
" Children 0.0 1.0 1.0 1.0 \n",
" Clothing 0.0 0.0 1.0 1.0 \n",
" Crochet 0.0 1.0 1.0 1.0 \n",
" Dolls and Miniatures 0.0 0.0 1.0 1.0 \n",
" Furniture 0.0 0.0 0.0 0.0 \n",
" Geekery 0.0 1.0 1.0 1.0 \n",
" Glass 0.0 0.0 1.0 1.0 \n",
" Holidays 0.0 0.0 1.0 1.0 \n",
" Housewares 0.0 0.0 1.0 1.0 \n",
"... ... ... ... ... \n",
"Vintage & Collectibles Collectibles 0.0 1.0 1.0 1.0 \n",
" Electronics 0.0 1.0 1.0 1.0 \n",
" Furniture 0.0 0.0 0.0 1.0 \n",
" Home Decor 0.0 0.0 1.0 1.0 \n",
" Housewares 0.0 0.0 0.0 1.0 \n",
" Jewelry 0.0 1.0 1.0 1.0 \n",
" Other 0.0 1.0 1.0 1.0 \n",
" Paper Ephemera 0.0 1.0 1.0 1.0 \n",
" Serving 0.0 0.0 1.0 1.0 \n",
" Supplies 0.0 1.0 1.0 1.0 \n",
" Toy 0.0 1.0 1.0 1.0 \n",
" Trading Cards 1.0 1.0 1.0 1.0 \n",
"Women Athletic Apparel 0.0 0.0 1.0 1.0 \n",
" Coats & Jackets 0.0 0.0 1.0 1.0 \n",
" Dresses 0.0 0.0 1.0 1.0 \n",
" Jeans 0.0 0.0 1.0 1.0 \n",
" Jewelry 0.0 1.0 1.0 1.0 \n",
" Maternity 0.0 0.0 1.0 1.0 \n",
" Other 0.0 0.0 1.0 1.0 \n",
" Pants 0.0 0.0 1.0 1.0 \n",
" Shoes 0.0 0.0 0.0 1.0 \n",
" Skirts 0.0 0.0 1.0 1.0 \n",
" Suits & Blazers 0.0 0.0 1.0 1.0 \n",
" Sweaters 0.0 0.0 1.0 1.0 \n",
" Swimwear 0.0 0.0 1.0 1.0 \n",
" Tops & Blouses 0.0 0.0 1.0 1.0 \n",
" Underwear 0.0 0.0 1.0 1.0 \n",
" Women's Accessories 0.0 0.0 1.0 1.0 \n",
" Women's Handbags 0.0 0.0 1.0 1.0 \n",
"nan 0.0 0.0 1.0 1.0 \n",
"\n",
"[139 rows x 16 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_b1k.groupby(['category1','category2']).describe().round(1)[['price','shipping']]\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns#; sns.set(style=\"ticks\", color_codes=True)\n",
"\n",
"# g = sns.pairplot(df[['item_condition_id', 'shipping', 'price']])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
]
}
],
"source": [
"from sklearn.cross_validation import train_test_split\n",
"from sklearn.cross_validation import cross_val_score\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import accuracy_score, classification_report\n",
"from sklearn.linear_model import Ridge, RidgeCV, ElasticNet, LassoCV, LassoLarsCV\n",
"\n",
"def get_clf_datas(df, clf=LinearRegression ,rate=0.3, verbose=True):\n",
"# dfnが最終のデータ形かつ右端にclassifyする列がある前提\n",
" datas = df.values\n",
" X, y = datas[:, :-1], datas[:, -1]\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=rate, random_state=0)\n",
"\n",
" # 分類機はお好みで\n",
" try:\n",
" clf = clf(normalize=False, n_jobs=-1)\n",
" except:\n",
" clf = clf(normalize=False)\n",
" if verbose:\n",
" print('train:', X_train.shape[0], '\\ntest: ', X_test.shape[0])\n",
" return clf, X_train, X_test, y_train, y_test"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"df_t = df.drop(['train_id','name','category_name','item_description','brand_name',\n",
"# 'category1',\n",
"# 'category2',\n",
"# 'item_condition_id',\n",
" 'category3'\n",
" ], axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"df_t = pd.get_dummies(df_t)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>item_condition_id</th>\n",
" <th>shipping</th>\n",
" <th>category1_Beauty</th>\n",
" <th>category1_Electronics</th>\n",
" <th>category1_Handmade</th>\n",
" <th>category1_Home</th>\n",
" <th>category1_Kids</th>\n",
" <th>category1_Men</th>\n",
" <th>category1_Other</th>\n",
" <th>category1_Sports &amp; Outdoors</th>\n",
" <th>...</th>\n",
" <th>category2_Toy</th>\n",
" <th>category2_Toys</th>\n",
" <th>category2_Trading Cards</th>\n",
" <th>category2_Underwear</th>\n",
" <th>category2_Video Games &amp; Consoles</th>\n",
" <th>category2_Weddings</th>\n",
" <th>category2_Women's Accessories</th>\n",
" <th>category2_Women's Handbags</th>\n",
" <th>category2_Woodworking</th>\n",
" <th>price_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1482530</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.097189</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482531</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.970422</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482532</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.910235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482533</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.336753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1482534</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.128508</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 128 columns</p>\n",
"</div>"
],
"text/plain": [
" item_condition_id shipping category1_Beauty category1_Electronics \\\n",
"1482530 2 1 0 0 \n",
"1482531 2 0 0 0 \n",
"1482532 2 0 0 0 \n",
"1482533 3 1 0 0 \n",
"1482534 1 0 0 0 \n",
"\n",
" category1_Handmade category1_Home category1_Kids category1_Men \\\n",
"1482530 0 0 0 0 \n",
"1482531 0 0 1 0 \n",
"1482532 0 0 0 0 \n",
"1482533 0 1 0 0 \n",
"1482534 0 0 0 0 \n",
"\n",
" category1_Other category1_Sports & Outdoors ... \\\n",
"1482530 0 0 ... \n",
"1482531 0 0 ... \n",
"1482532 0 1 ... \n",
"1482533 0 0 ... \n",
"1482534 0 0 ... \n",
"\n",
" category2_Toy category2_Toys category2_Trading Cards \\\n",
"1482530 0 0 0 \n",
"1482531 0 0 0 \n",
"1482532 0 0 0 \n",
"1482533 0 0 0 \n",
"1482534 0 0 0 \n",
"\n",
" category2_Underwear category2_Video Games & Consoles \\\n",
"1482530 0 0 \n",
"1482531 0 0 \n",
"1482532 0 0 \n",
"1482533 0 0 \n",
"1482534 0 0 \n",
"\n",
" category2_Weddings category2_Women's Accessories \\\n",
"1482530 0 0 \n",
"1482531 0 0 \n",
"1482532 0 0 \n",
"1482533 0 0 \n",
"1482534 0 1 \n",
"\n",
" category2_Women's Handbags category2_Woodworking price_c \n",
"1482530 0 0 1.097189 \n",
"1482531 0 0 0.970422 \n",
"1482532 0 0 0.910235 \n",
"1482533 0 0 1.336753 \n",
"1482534 0 0 1.128508 \n",
"\n",
"[5 rows x 128 columns]"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df_t['price'] = df_t.price_c\n",
"df_t['price_c'] = np.log(df_t.price_log)\n",
"df_t = df_t.drop(['price_log','price'], axis=1)\n",
"df_t.tail()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"df_s = df_t.sample(frac=0.9)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train: 74083 \n",
"test: 1407578\n",
"train 0.22748858984632006\n",
"test 0.22280029240487786\n"
]
}
],
"source": [
"import xgboost as xgb\n",
"# clf = xgb.XGBRegressor(n_estimators=360, max_depth=2, learning_rate=0.1) \n",
"_, X_train, X_test, y_train, y_test = get_clf_datas(df_t, rate=0.95)\n",
"clf = xgb.XGBRegressor(\n",
"# n_estimators=360,\n",
" max_depth=5\n",
"# , learning_rate=0.001\n",
" ) \n",
"\n",
"# 予測モデルを作成\n",
"clf.fit(X_train, y_train)\n",
"\n",
"# 決定係数\n",
"print('train', clf.score(X_train, y_train))\n",
"# 決定係数\n",
"print('test', clf.score(X_test, y_test))\n",
"\n",
"\n",
"# 偏回帰係数\n",
"# print(pd.DataFrame({\"Name\":df_t.columns[:-1],\n",
"# \"Coefficients\":clf.coef_}).sort_values(by='Coefficients') )\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment