Skip to content

Instantly share code, notes, and snippets.

@x1001000
Created November 4, 2022 01:22
Show Gist options
  • Save x1001000/9c3023e278d7a9c6b162cd5a42155ea9 to your computer and use it in GitHub Desktop.
Save x1001000/9c3023e278d7a9c6b162cd5a42155ea9 to your computer and use it in GitHub Desktop.
HuangShiuans code with its output
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyNC9F+cWBz7lhcWPRXiAczs",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/x1001000/9c3023e278d7a9c6b162cd5a42155ea9/huangshiuans-code-with-its-output.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"from sklearn.datasets import load_diabetes\n",
"\n",
"# == cut target into 10 level\n",
"data,target = load_diabetes(return_X_y=True)\n",
"bins = 10\n",
"bin_len = (max(target)-min(target))/bins\n",
"cuts = [min(target)+I*bin_len for I in range(1,bins)]\n",
"label = np.full(len(target),0)\n",
"for IBIN in range(bins):\n",
" if IBIN == 0:\n",
" IND = np.where(target<=cuts[IBIN])\n",
" elif IBIN == bins-1:\n",
" IND = np.where(target>=cuts[IBIN-1])\n",
" else:\n",
" IND = np.where((cuts[IBIN-1]<target) & (target<=cuts[IBIN]))\n",
" label[IND] = IBIN+1\n",
"\n",
"# == seperate data into train and test\n",
"data_tra = data[:-1,:]\n",
"label_tra = label[:-1].reshape(-1,1)\n",
"data_tes = data[-1,:].reshape(1,-1)\n",
"label_tes = label[-1].reshape(1,1)"
],
"metadata": {
"id": "lRGea8vDCJsP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "bRyTgBq8zI4J"
},
"outputs": [],
"source": [
"# %% initial data\n",
"def gen_data():\n",
" import numpy as np\n",
" from sklearn.datasets import load_diabetes\n",
" \n",
" # == cut target into 10 level\n",
" data,target = load_diabetes(return_X_y=True)\n",
" bins = 10\n",
" bin_len = (max(target)-min(target))/bins\n",
" cuts = [min(target)+I*bin_len for I in range(1,bins)]\n",
" label = np.full(len(target),0)\n",
" for IBIN in range(bins):\n",
" if IBIN == 0:\n",
" IND = np.where(target<=cuts[IBIN])\n",
" elif IBIN == bins-1:\n",
" IND = np.where(target>=cuts[IBIN-1])\n",
" else:\n",
" IND = np.where((cuts[IBIN-1]<target) & (target<=cuts[IBIN]))\n",
" label[IND] = IBIN+1\n",
" \n",
" # == seperate data into train and test\n",
" data_tra = data[:-1,:]\n",
" label_tra = label[:-1].reshape(-1,1)\n",
" data_tes = data[-1,:].reshape(1,-1)\n",
" label_tes = label[-1].reshape(1,1)\n",
" \n",
" return data_tra,label_tra,data_tes,label_tes"
]
},
{
"cell_type": "code",
"source": [
"# %% generate contour plot\n",
"def gen_contourplot(data_tra,label_tra,data_tes,label_tes):\n",
" import numpy as np\n",
" import matplotlib.pyplot as plt\n",
" from scipy.interpolate import griddata\n",
" from sklearn.manifold import TSNE\n",
" \n",
" # == tsne transfrom\n",
" tsne_data = np.vstack([data_tra,data_tes])\n",
" tsne_level = np.vstack([label_tra,label_tes])\n",
" tsne_trs = TSNE(learning_rate=200, init='pca').fit_transform(np.hstack([tsne_data,tsne_level]))\n",
" \n",
" # == generate plt data \n",
" plt_x = tsne_trs[:-1,0]\n",
" plt_y = tsne_trs[:-1,1]\n",
" plt_z = tsne_level[:-1,0]\n",
" plt_mesh = 2000\n",
" x_pt = np.linspace(min(plt_x)-1, max(plt_x)+1, plt_mesh)\n",
" y_pt = np.linspace(min(plt_y)-1, max(plt_y)+1, plt_mesh)\n",
" x_grid, y_grid = np.meshgrid(x_pt, y_pt)\n",
" z_pt = griddata((plt_x,plt_y), plt_z, (x_grid,y_grid), method='nearest')\n",
" \n",
" # == plot data in 2-D \n",
" fig = plt.figure(dpi=130, figsize=(5,4))\n",
" plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)\n",
" plt.contourf(x_pt, y_pt, z_pt, max(plt_z)+1, cmap='YlGnBu', alpha=0.8)\n",
" plt_data = tsne_trs[-1,:]\n",
" plt.scatter(plt_data[0], plt_data[1], s=80, marker='v', linewidths=1.5,\n",
" edgecolors='#008800', color='#00FF00')\n",
" \n",
" # == add info in plot \n",
" axes = plt.gca()\n",
" y_min, y_max = axes.get_ylim()\n",
" x_min, x_max = axes.get_xlim()\n",
" unit_y = (y_max-y_min)/100\n",
" unit_x = (x_max-x_min)/100\n",
" bbox_props = dict(boxstyle='round', fc='w', ec='0.5', alpha=0.5)\n",
" plt.annotate('demo version', xy=(x_max-15*unit_x,y_min+5*unit_y), fontsize=6, alpha=0.5, bbox=bbox_props)\n",
" plt.axis('off')\n",
" \n",
" return fig"
],
"metadata": {
"id": "wqfmtY-T0G_U"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data_tra,label_tra,data_tes,label_tes = gen_data()"
],
"metadata": {
"id": "uE30OjmY0J2p"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"fig = gen_contourplot(data_tra,label_tra,data_tes,label_tes)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 617
},
"id": "dFX9wPhd0M0H",
"outputId": "f94a1a99-758f-4b4f-dc7d-2a159a190f65"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:986: FutureWarning: The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.\n",
" FutureWarning,\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 650x520 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment