Skip to content

Instantly share code, notes, and snippets.

@pilifjed
Created May 21, 2020 15:39
Show Gist options
  • Save pilifjed/3fb5c14037e92ef0a767d0db84a50b98 to your computer and use it in GitHub Desktop.
Save pilifjed/3fb5c14037e92ef0a767d0db84a50b98 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<script>\n",
"code_show=true; \n",
"function code_toggle() {\n",
" if (code_show){\n",
" $('div.input').hide();\n",
" } else {\n",
" $('div.input').show();\n",
" }\n",
" code_show = !code_show\n",
"} \n",
"$( document ).ready(code_toggle);\n",
"</script>\n",
"The raw code for this IPython notebook is by default hidden for easier reading.\n",
"To toggle on/off the raw code, click <a href=\"javascript:code_toggle()\">here</a>."
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import HTML\n",
"HTML('''<script>\n",
"code_show=true; \n",
"function code_toggle() {\n",
" if (code_show){\n",
" $('div.input').hide();\n",
" } else {\n",
" $('div.input').show();\n",
" }\n",
" code_show = !code_show\n",
"} \n",
"$( document ).ready(code_toggle);\n",
"</script>\n",
"The raw code for this IPython notebook is by default hidden for easier reading.\n",
"To toggle on/off the raw code, click <a href=\"javascript:code_toggle()\">here</a>.''')"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"from matplotlib import pyplot as plt\n",
"from time import ctime\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.manifold import TSNE\n",
"import ipywidgets as widgets\n",
"from IPython.display import display\n",
"from functools import reduce\n",
"from matplotlib.cm import get_cmap"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bd9c3669b3184d6f98078337d2bfd6f4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Text(value='../allegro-ED-project/db', description='Path to db: ')"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"db_path_widget = widgets.Text(description=\"Path to db: \",value=\"../allegro-ED-project/db\", disabled=False)\n",
"display(db_path_widget)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"DB_PATH = db_path_widget.value\n",
"\n",
"def extract_datetime(fname, name):\n",
" datetime = int(fname.replace(name+\"_\",\"\").replace(\".csv\",\"\"))\n",
" return datetime \n",
"\n",
"def read_series(product, features=['delivery_cost', 'cost', 'stock'], functions=[np.mean, np.std], db_path=DB_PATH,return_columns=False):\n",
" out = []\n",
" fnames = sorted([fname for fname in os.listdir(db_path) if product in fname])\n",
" assert fnames != [], \"Given product does not exist in database or provided directory is incorrect\"\n",
" for fname in fnames:\n",
" data = pd.read_csv(os.path.join(db_path,fname))\n",
" metrics = [extract_datetime(fname, product)]\n",
" functions = [np.mean, np.std]\n",
" for function in functions:\n",
" for feature in features:\n",
" metrics.append(function(data.loc[:, feature]))\n",
" out.append(metrics)\n",
" columns = ['time'] + [\"{0}_{1}\".format(feature, function.__name__) for function in functions for feature in features]\n",
" series = pd.DataFrame(np.array(out), columns=columns)\n",
" series.loc[:, 'time'] = pd.to_datetime(series.loc[:, 'time'], unit='s')\n",
" for column in columns[1:]:\n",
" series.loc[:,column] = series.loc[:,column].astype(np.float32)\n",
" series = series.set_index('time')\n",
" if return_columns:\n",
" return series, columns\n",
" return series\n",
"\n",
"def filter_latest(index):\n",
" prev = index[0]\n",
" out = []\n",
" for curr in index[1:]:\n",
" if prev.date() == curr.date():\n",
" prev = curr\n",
" else:\n",
" out.append(prev)\n",
" prev = curr\n",
" return out\n",
"\n",
"def standarize(data, features=None):\n",
" out_df = pd.DataFrame().reindex_like(data)\n",
" if features is None:\n",
" features = data.columns.values\n",
" for feature in features:\n",
" out_df.loc[:, feature] = (data.loc[:, feature] - np.mean(data.loc[:, feature])) / np.std(data.loc[:, feature])\n",
" return out_df\n",
"\n",
"def read_preprocessed_series(product):\n",
" series = read_series(product)\n",
" index = filter_latest(series.index)\n",
" filtered = series.loc[index]\n",
" filtered.index = filtered.index.map(lambda x: x.date())\n",
" return standarize(filtered)\n",
"\n",
"def get_db_available_products(db_path=DB_PATH):\n",
" return set(map(lambda x: x.split(\"_\")[0], os.listdir(DB_PATH)))\n",
"\n",
"def get_layout(checkboxes,width=4):\n",
" part = []\n",
" out = []\n",
" for ix, chb in enumerate(checkboxes):\n",
" part.append(chb)\n",
" if (ix+1)%4 ==0:\n",
" out.append(widgets.HBox(part))\n",
" part = []\n",
" if part != []:\n",
" out.append(widgets.HBox(part))\n",
" return widgets.VBox(out)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"products = get_db_available_products()\n",
"\n",
"series = []\n",
"for product in products:\n",
" series.append(read_preprocessed_series(product))\n",
"\n",
"indexes = reduce(lambda x, y: x | y, [set(s.index.values) for s in series])\n",
"interpolated = [pd.Series.interpolate(s.reindex(index=pd.to_datetime(sorted(list(indexes)))), method='time') for s in series]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0364d55df9074fb3bce3e09a8951087f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(HBox(children=(Dropdown(description='metric', index=1, options=('delivery_cost_mean', 'cost_mea…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9bba2330cb8347cc9e937ba68ccf9d3f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"metric_value = widgets.Dropdown(options=interpolated[0].columns.values, \n",
" value=interpolated[0].columns.values[1], \n",
" description=\"metric\")\n",
"n_clusters = widgets.BoundedIntText(value=4, min=2, max=len(products), description=\"clusters\")\n",
"show_clusters = widgets.Checkbox(True, description=\"show clusters\")\n",
"first_derivative = widgets.Checkbox(False, description=\"first derivative\")\n",
"\n",
"checkbox = {name: widgets.Checkbox(True, description=name) for name in products}\n",
"other = {\"metric_value\": metric_value, \"n_clusters\": n_clusters, \"show_clusters\": show_clusters, \"first_derivative\":first_derivative}\n",
"param_dict = {**other, **checkbox}\n",
"checkbox_layout = get_layout(list(checkbox.values()))\n",
"ui = widgets.VBox([widgets.HBox([metric_value,n_clusters, show_clusters, first_derivative]), checkbox_layout])\n",
"cmap = get_cmap('gist_ncar')\n",
"\n",
"def draw_metric(metric_value, n_clusters, show_clusters, first_derivative, **chb):\n",
" if first_derivative:\n",
" to_process = [i.diff()[1:] for i in interpolated]\n",
" else:\n",
" to_process = interpolated\n",
" clusterised = np.array([i[metric_value] for i in to_process])\n",
" if show_clusters:\n",
" color_number = n_clusters\n",
" predictions = KMeans(n_clusters=n_clusters).fit_predict(clusterised)\n",
" else:\n",
" color_number = len(chb.values())\n",
" predictions = list(range(0,color_number))\n",
" for s, c, p in zip(to_process, chb.values(), predictions):\n",
" if c:\n",
" s[metric_value].plot(figsize=(20,10), marker=\"o\", color = cmap(p/color_number))\n",
" filtered_products = [p for p, i in zip(products,chb.values()) if i]\n",
" plt.legend(filtered_products)\n",
"out = widgets.interactive_output(draw_metric, param_dict)\n",
"display(ui, out)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4c6622f7a932465dbbb57b08d85e29b4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(BoundedIntText(value=5, description='perplexity', min=1),))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0d8887069dc249d395147f51d1b54e7a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"perplexity = widgets.BoundedIntText(value=5, min=1, max=100, description=\"perplexity\")\n",
"tsne_ui = widgets.HBox([perplexity])\n",
"tsne_other = {\"metric_value\": metric_value, \"n_clusters\": n_clusters, \"show_clusters\":show_clusters,\"perplexity\": perplexity}\n",
"tsne_param_dict = {**tsne_other, **checkbox}\n",
"def draw_tsne(metric_value, n_clusters, perplexity, show_clusters, **chb):\n",
" if first_derivative:\n",
" to_process = [i.diff()[1:] for i in interpolated]\n",
" else:\n",
" to_process = interpolated\n",
" clusterised = np.array([i[metric_value] for i in to_process])\n",
" if show_clusters:\n",
" color_number = n_clusters\n",
" predictions = KMeans(n_clusters=n_clusters).fit_predict(clusterised)\n",
" else:\n",
" color_number = len(chb.values())\n",
" predictions = np.array(list(range(0,color_number)))\n",
" X_embedded = TSNE(n_components=2, perplexity=perplexity).fit_transform(clusterised)\n",
" filtered_X = X_embedded[np.array(list(chb.values())),:]\n",
" filtered_pred = predictions[np.array(list(chb.values()))] / color_number\n",
" plt.figure(figsize=(20,10))\n",
" plt.scatter(filtered_X[:,0], filtered_X[:,1], c=cmap(filtered_pred))\n",
"out = widgets.interactive_output(draw_tsne, tsne_param_dict)\n",
"display(tsne_ui, out)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment