andreagrioni/python.ipynb

## python.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Python.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyM2oebI22AmhLNak77jlLql",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/andreagrioni/10fb5153b1b5480e229df807a26b6571/python.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RxipTB-R0Wmy"
      },
      "source": [
        "## Unbalance class\n",
        "\n",
        "Code from [sklearn documentation](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "NJ69tCqszcOW",
        "outputId": "0e282c7d-ce0e-4b92-8ece-bea440ff6c6e"
      },
      "source": [
        "# import modules\n",
        "from sklearn.model_selection import StratifiedKFold, KFold\n",
        "# import numpy\n",
        "import numpy as np\n",
        "## generate dummy dataset\n",
        "## the dummy dataset is binary labels, 45 labeled 0 and 5 labeld 1.\n",
        "X, y = np.ones((100, 1)), np.hstack(([0] * 80, [1] * 20))\n",
        "## create StratifiedKFold object\n",
        "## it will allow splitting of dataset into train test\n",
        "## while keeping class ratio\n",
        "skf = StratifiedKFold(n_splits=3, random_state=1, shuffle=True)\n",
        "## split dataset into\n",
        "## train and test\n",
        "for train, test in skf.split(X, y):\n",
        "    print('train -  {}   |   test -  {}'.format(\n",
        "        np.bincount(y[train]), np.bincount(y[test])))"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "train -  [53 13]   |   test -  [27  7]\n",
            "train -  [53 14]   |   test -  [27  6]\n",
            "train -  [54 13]   |   test -  [26  7]\n"
          ]
        }
      ]
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Python.ipynb",
	"provenance": [],
	"collapsed_sections": [],
	"authorship_tag": "ABX9TyM2oebI22AmhLNak77jlLql",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/andreagrioni/10fb5153b1b5480e229df807a26b6571/python.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "RxipTB-R0Wmy"
	},
	"source": [
	"## Unbalance class\n",
	"\n",
	"Code from [sklearn documentation](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "NJ69tCqszcOW",
	"outputId": "0e282c7d-ce0e-4b92-8ece-bea440ff6c6e"
	},
	"source": [
	"# import modules\n",
	"from sklearn.model_selection import StratifiedKFold, KFold\n",
	"# import numpy\n",
	"import numpy as np\n",
	"## generate dummy dataset\n",
	"## the dummy dataset is binary labels, 45 labeled 0 and 5 labeld 1.\n",
	"X, y = np.ones((100, 1)), np.hstack(([0] * 80, [1] * 20))\n",
	"## create StratifiedKFold object\n",
	"## it will allow splitting of dataset into train test\n",
	"## while keeping class ratio\n",
	"skf = StratifiedKFold(n_splits=3, random_state=1, shuffle=True)\n",
	"## split dataset into\n",
	"## train and test\n",
	"for train, test in skf.split(X, y):\n",
	" print('train - {} \| test - {}'.format(\n",
	" np.bincount(y[train]), np.bincount(y[test])))"
	],
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"train - [53 13] \| test - [27 7]\n",
	"train - [53 14] \| test - [27 6]\n",
	"train - [54 13] \| test - [26 7]\n"
	]
	}
	]
	}
	]
	}