Created
November 22, 2021 08:55
-
-
Save andreagrioni/10fb5153b1b5480e229df807a26b6571 to your computer and use it in GitHub Desktop.
Python.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Python.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyM2oebI22AmhLNak77jlLql", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/andreagrioni/10fb5153b1b5480e229df807a26b6571/python.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "RxipTB-R0Wmy" | |
}, | |
"source": [ | |
"## Unbalance class\n", | |
"\n", | |
"Code from [sklearn documentation](https://scikit-learn.org/stable/modules/cross_validation.html#stratification).\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "NJ69tCqszcOW", | |
"outputId": "0e282c7d-ce0e-4b92-8ece-bea440ff6c6e" | |
}, | |
"source": [ | |
"# import modules\n", | |
"from sklearn.model_selection import StratifiedKFold, KFold\n", | |
"# import numpy\n", | |
"import numpy as np\n", | |
"## generate dummy dataset\n", | |
"## the dummy dataset is binary labels, 45 labeled 0 and 5 labeld 1.\n", | |
"X, y = np.ones((100, 1)), np.hstack(([0] * 80, [1] * 20))\n", | |
"## create StratifiedKFold object\n", | |
"## it will allow splitting of dataset into train test\n", | |
"## while keeping class ratio\n", | |
"skf = StratifiedKFold(n_splits=3, random_state=1, shuffle=True)\n", | |
"## split dataset into\n", | |
"## train and test\n", | |
"for train, test in skf.split(X, y):\n", | |
" print('train - {} | test - {}'.format(\n", | |
" np.bincount(y[train]), np.bincount(y[test])))" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"train - [53 13] | test - [27 7]\n", | |
"train - [53 14] | test - [27 6]\n", | |
"train - [54 13] | test - [26 7]\n" | |
] | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment