Skip to content

Instantly share code, notes, and snippets.

@thiagolcks
Created December 10, 2017 02:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save thiagolcks/a46947123eb9f3d7c4656682c4837527 to your computer and use it in GitHub Desktop.
Save thiagolcks/a46947123eb9f3d7c4656682c4837527 to your computer and use it in GitHub Desktop.
CDiscount - Error
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import bson\n",
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"from tqdm import tqdm_notebook"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test Files"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ff0cef286f9f427d96004de64e06285b",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, max=1768182), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"out_folder = 'data/Cdiscount/test'\n",
"\n",
"# Create output folder\n",
"if not os.path.exists(out_folder):\n",
" os.makedirs(out_folder)\n",
"\n",
"num_products = 1768182 # 7069896 for train and 1768182 for test\n",
"\n",
"bar = tqdm_notebook(total=num_products)\n",
"with open('data/cdiscount/test.bson', 'rb') as fbson:\n",
"\n",
" data = bson.decode_file_iter(fbson)\n",
" \n",
" for c, d in enumerate(data):\n",
" _id = d['_id']\n",
" for e, pic in enumerate(d['imgs']):\n",
" fname = os.path.join(out_folder, '{}-{}.jpg'.format(_id, e))\n",
" with open(fname, 'wb') as f:\n",
" f.write(pic['picture'])\n",
"\n",
" bar.update()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train Files"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"out_folder = 'data/cdiscount/train'\n",
"\n",
"# Create output folder\n",
"if not os.path.exists(out_folder):\n",
" os.makedirs(out_folder)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "68b2c0e61aaa4275862113914a68a152",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, max=5270), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Create categories folders\n",
"categories = pd.read_csv('data/cdiscount/category_names.csv', index_col='category_id')\n",
"\n",
"for category in tqdm_notebook(categories.index):\n",
" os.mkdir(os.path.join(out_folder, str(category)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cd9ddd1cf1124bc2ae2a449042354792",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, max=7069896), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Exception in thread Thread-4:\n",
"Traceback (most recent call last):\n",
" File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/threading.py\", line 916, in _bootstrap_inner\n",
" self.run()\n",
" File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/tqdm/_tqdm.py\", line 144, in run\n",
" for instance in self.tqdm_cls._instances:\n",
" File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/_weakrefset.py\", line 60, in __iter__\n",
" for itemref in self.data:\n",
"RuntimeError: Set changed size during iteration\n",
"\n"
]
}
],
"source": [
"num_products = 7069896 # 7069896 for train and 1768182 for test\n",
"\n",
"bar = tqdm_notebook(total=num_products)\n",
"with open('data/cdiscount/train.bson', 'rb') as fbson:\n",
"\n",
" data = bson.decode_file_iter(fbson)\n",
" \n",
" for c, d in enumerate(data):\n",
" category = d['category_id']\n",
" _id = d['_id']\n",
" for e, pic in enumerate(d['imgs']):\n",
" fname = os.path.join(out_folder, str(category), '{}-{}.jpg'.format(_id, e))\n",
" with open(fname, 'wb') as f:\n",
" f.write(pic['picture'])\n",
"\n",
" bar.update()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment