Skip to content

Instantly share code, notes, and snippets.

@AhmedCoolProjects
Created November 11, 2022 11:14
Show Gist options
  • Save AhmedCoolProjects/28d7c1cf360f4e8519d7aa62a0e72be9 to your computer and use it in GitHub Desktop.
Save AhmedCoolProjects/28d7c1cf360f4e8519d7aa62a0e72be9 to your computer and use it in GitHub Desktop.
TP 0 - Data Mining 2022-2023.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyNL5OdIMKU188usJUK/ZYRw",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/AhmedCoolProjects/28d7c1cf360f4e8519d7aa62a0e72be9/tp-0-data-mining-2022-2023.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "8J2eDuxtKeBI"
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"source": [
"myarray = np.array([[1, 2, 3, 4], [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34]])"
],
"metadata": {
"id": "vLbkn-lpKjWc"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(myarray)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "C0hX_biNKloQ",
"outputId": "dd66c9e8-65be-4d0d-8eed-91c51901e4ac"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[ 1 2 3 4]\n",
" [11 12 13 14]\n",
" [21 22 23 24]\n",
" [31 32 33 34]]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(myarray.mean())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XsAktHOJKyCI",
"outputId": "6edaf6ba-9478-41b9-9dba-3886dbae4dc6"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"17.5\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(myarray.max())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5QEFFyHeK4J4",
"outputId": "fdc6530d-0b35-4224-b66b-68a91b775cf7"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"34\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import pandas as pd"
],
"metadata": {
"id": "krTPq6rAK5-_"
},
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"source": [
"ligne_noms = ['ali', 'badr', 'cimon','rachid']"
],
"metadata": {
"id": "jCq4r3Q7MGTz"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"colonne_noms = ['age', 'score', 'bonus', \"othernumber\"]"
],
"metadata": {
"id": "5HE_OTdgMWMt"
},
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data_df = pd.DataFrame(myarray, index=ligne_noms, columns=colonne_noms)"
],
"metadata": {
"id": "VyIClssfMXsI"
},
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(data_df)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BqQGZRoYMhMp",
"outputId": "a268ab4f-246f-464b-a3eb-de9320ff3008"
},
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" age score bonus othernumber\n",
"ali 1 2 3 4\n",
"badr 11 12 13 14\n",
"cimon 21 22 23 24\n",
"rachid 31 32 33 34\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"sorted_dataframe = data_df.sort_index()\n",
"print(sorted_dataframe)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4qBOhrXSMj-_",
"outputId": "fc748562-8912-4244-ca2b-e50596a62efd"
},
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" age score bonus othernumber\n",
"ali 1 2 3 4\n",
"badr 11 12 13 14\n",
"cimon 21 22 23 24\n",
"rachid 31 32 33 34\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"sorted_dataframe = data_df.sort_index(axis=1)\n",
"print(sorted_dataframe)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ckIx4N9qNAIP",
"outputId": "a1c86402-5727-45aa-9a53-dbbfd56c6ff8"
},
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" age bonus othernumber score\n",
"ali 1 3 4 2\n",
"badr 11 13 14 12\n",
"cimon 21 23 24 22\n",
"rachid 31 33 34 32\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"sorted_dataframe = data_df.sort_values(by='age')\n",
"print(sorted_dataframe)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aTq50acANi1N",
"outputId": "8d54643b-78a0-4e55-ab49-0327d254d24d"
},
"execution_count": 17,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" age score bonus othernumber\n",
"ali 1 2 3 4\n",
"badr 11 12 13 14\n",
"cimon 21 22 23 24\n",
"rachid 31 32 33 34\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"myarray = np.array([[15,5,1], [29,12,3], [43,33,5],[25,27,4]])\n",
"ligne_noms = ['ali', 'badr', 'cimon','rachid']\n",
"colonne_noms = ['age', 'score', 'bonus']\n",
"new_df = pd.DataFrame(myarray, index=ligne_noms, columns=colonne_noms)"
],
"metadata": {
"id": "7Qgd69XiOiv0"
},
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"source": [
"grouped = new_df.groupby('age')\n",
"print(grouped.head())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gzYkceOKNz2A",
"outputId": "981b1c04-1fa0-462c-c8da-f83eadc36f46"
},
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" age score bonus\n",
"ali 15 5 1\n",
"badr 29 12 3\n",
"cimon 43 33 5\n",
"rachid 25 27 4\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"grouped = data_df.groupby(['age', 'score'])\n",
"print(grouped.head())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-r-MvZBoOZPB",
"outputId": "fb8697f1-148d-4401-8b64-b361b4155d4b"
},
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" age score bonus othernumber\n",
"ali 1 2 3 4\n",
"badr 11 12 13 14\n",
"cimon 21 22 23 24\n",
"rachid 31 32 33 34\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(grouped.groups)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OMkN9axJPWvX",
"outputId": "7efce830-fd4e-4d4c-cdd8-c05dabeb0fc9"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{(1, 2): ['ali'], (11, 12): ['badr'], (21, 22): ['cimon'], (31, 32): ['rachid']}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(grouped.get_group((1, 2)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sZ-Xbc9ZPkvS",
"outputId": "e32c3aa9-e142-4398-d3b1-0c6793bbd743"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" age score bonus othernumber\n",
"ali 1 2 3 4\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"df = pd.DataFrame({'Nom': ['Amin', 'Adil', 'Ahmed', 'Badr','Mohamed'],\n",
" 'Diplome': ['Ingénieur', 'DESA', 'PHD',\n",
" 'ingénieur', 'Phd'],\n",
" 'Age': [22, 25, 35, 23, 30]})\n",
"print(df)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "i94cbfntPxWx",
"outputId": "c3d3b372-5f84-4112-e173-112232e4b061"
},
"execution_count": 27,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Nom Diplome Age\n",
"0 Amin Ingénieur 22\n",
"1 Adil DESA 25\n",
"2 Ahmed PHD 35\n",
"3 Badr ingénieur 23\n",
"4 Mohamed Phd 30\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.columns)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "oAXZd8k6Qnwq",
"outputId": "bf03689a-16a5-4697-9c2b-1c494a3dca14"
},
"execution_count": 29,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Index(['Nom', 'Diplome', 'Age'], dtype='object')\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.columns)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4QtNqkcYQ7yr",
"outputId": "3c5b0aab-f939-480e-ba62-e0832d6735e7"
},
"execution_count": 30,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Index(['Nom', 'Diplome', 'Age'], dtype='object')\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.shape)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BNcSOQFFQ-pt",
"outputId": "a675c872-773c-4d77-d064-b6fc45a72082"
},
"execution_count": 31,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(5, 3)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.head())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vY3Wj3GjRQkb",
"outputId": "542d4751-8872-43b6-a82a-fed58de80c05"
},
"execution_count": 32,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Nom Diplome Age\n",
"0 Amin Ingénieur 22\n",
"1 Adil DESA 25\n",
"2 Ahmed PHD 35\n",
"3 Badr ingénieur 23\n",
"4 Mohamed Phd 30\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.head(3))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Z1fJa65dRkF4",
"outputId": "69833628-49b1-417e-aca3-09a00f93e3a0"
},
"execution_count": 33,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Nom Diplome Age\n",
"0 Amin Ingénieur 22\n",
"1 Adil DESA 25\n",
"2 Ahmed PHD 35\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.tail())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "B8yS7mFnRlxE",
"outputId": "25b1b4ca-f1c2-4ac9-9701-278840db6b49"
},
"execution_count": 34,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Nom Diplome Age\n",
"0 Amin Ingénieur 22\n",
"1 Adil DESA 25\n",
"2 Ahmed PHD 35\n",
"3 Badr ingénieur 23\n",
"4 Mohamed Phd 30\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.tail(2))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-kmBkHmRR4fs",
"outputId": "08856427-9093-4eab-bb74-6fcfaeeb5048"
},
"execution_count": 35,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Nom Diplome Age\n",
"3 Badr ingénieur 23\n",
"4 Mohamed Phd 30\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df[1:3])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0VaKBy-9R5mK",
"outputId": "804efa96-d777-4c79-b8ab-f0f4c8716412"
},
"execution_count": 37,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Nom Diplome Age\n",
"1 Adil DESA 25\n",
"2 Ahmed PHD 35\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"df.Nom"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "J4_NzmmlSQxG",
"outputId": "5ee56d2f-e9f1-42a3-de6c-3de8fc48b1dd"
},
"execution_count": 38,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 Amin\n",
"1 Adil\n",
"2 Ahmed\n",
"3 Badr\n",
"4 Mohamed\n",
"Name: Nom, dtype: object"
]
},
"metadata": {},
"execution_count": 38
}
]
},
{
"cell_type": "code",
"source": [
"print(df[\"Diplome\"])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LO5lac9ySck_",
"outputId": "796a8d25-dc80-4ed1-d824-879366334054"
},
"execution_count": 39,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0 Ingénieur\n",
"1 DESA\n",
"2 PHD\n",
"3 ingénieur\n",
"4 Phd\n",
"Name: Diplome, dtype: object\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.loc[:10, [\"Nom\", \"Age\"]])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RyadRURYSmfM",
"outputId": "5115fb94-190a-4ccc-9243-9c02dd874b71"
},
"execution_count": 41,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Nom Age\n",
"0 Amin 22\n",
"1 Adil 25\n",
"2 Ahmed 35\n",
"3 Badr 23\n",
"4 Mohamed 30\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.iloc[:10, [0, 1]])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rI0pCvddS0Zy",
"outputId": "37c90278-9ffd-445b-d10e-ad434c197fb5"
},
"execution_count": 42,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Nom Diplome\n",
"0 Amin Ingénieur\n",
"1 Adil DESA\n",
"2 Ahmed PHD\n",
"3 Badr ingénieur\n",
"4 Mohamed Phd\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(df.describe())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "k4U1qoCKTHly",
"outputId": "4e5bf3db-216c-40c1-b657-6c4523d55199"
},
"execution_count": 43,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Age\n",
"count 5.00000\n",
"mean 27.00000\n",
"std 5.43139\n",
"min 22.00000\n",
"25% 23.00000\n",
"50% 25.00000\n",
"75% 30.00000\n",
"max 35.00000\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(new_df.describe())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Hlz9ujI3TVsm",
"outputId": "da16e816-5dd0-4521-f1df-24e613505185"
},
"execution_count": 44,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" age score bonus\n",
"count 4.000000 4.000000 4.000000\n",
"mean 28.000000 19.250000 3.250000\n",
"std 11.604597 12.971122 1.707825\n",
"min 15.000000 5.000000 1.000000\n",
"25% 22.500000 10.250000 2.500000\n",
"50% 27.000000 19.500000 3.500000\n",
"75% 32.500000 28.500000 4.250000\n",
"max 43.000000 33.000000 5.000000\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn import datasets"
],
"metadata": {
"id": "g7U1QFcYTkOp"
},
"execution_count": 45,
"outputs": []
},
{
"cell_type": "code",
"source": [
"iris=datasets.load_iris()"
],
"metadata": {
"id": "P47OZ9gLUtiZ"
},
"execution_count": 46,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(iris.data)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eV_eP6BRUuL7",
"outputId": "e0ce0e6b-d051-4c28-de01-a0df89e93913"
},
"execution_count": 48,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[5.1 3.5 1.4 0.2]\n",
" [4.9 3. 1.4 0.2]\n",
" [4.7 3.2 1.3 0.2]\n",
" [4.6 3.1 1.5 0.2]\n",
" [5. 3.6 1.4 0.2]\n",
" [5.4 3.9 1.7 0.4]\n",
" [4.6 3.4 1.4 0.3]\n",
" [5. 3.4 1.5 0.2]\n",
" [4.4 2.9 1.4 0.2]\n",
" [4.9 3.1 1.5 0.1]\n",
" [5.4 3.7 1.5 0.2]\n",
" [4.8 3.4 1.6 0.2]\n",
" [4.8 3. 1.4 0.1]\n",
" [4.3 3. 1.1 0.1]\n",
" [5.8 4. 1.2 0.2]\n",
" [5.7 4.4 1.5 0.4]\n",
" [5.4 3.9 1.3 0.4]\n",
" [5.1 3.5 1.4 0.3]\n",
" [5.7 3.8 1.7 0.3]\n",
" [5.1 3.8 1.5 0.3]\n",
" [5.4 3.4 1.7 0.2]\n",
" [5.1 3.7 1.5 0.4]\n",
" [4.6 3.6 1. 0.2]\n",
" [5.1 3.3 1.7 0.5]\n",
" [4.8 3.4 1.9 0.2]\n",
" [5. 3. 1.6 0.2]\n",
" [5. 3.4 1.6 0.4]\n",
" [5.2 3.5 1.5 0.2]\n",
" [5.2 3.4 1.4 0.2]\n",
" [4.7 3.2 1.6 0.2]\n",
" [4.8 3.1 1.6 0.2]\n",
" [5.4 3.4 1.5 0.4]\n",
" [5.2 4.1 1.5 0.1]\n",
" [5.5 4.2 1.4 0.2]\n",
" [4.9 3.1 1.5 0.2]\n",
" [5. 3.2 1.2 0.2]\n",
" [5.5 3.5 1.3 0.2]\n",
" [4.9 3.6 1.4 0.1]\n",
" [4.4 3. 1.3 0.2]\n",
" [5.1 3.4 1.5 0.2]\n",
" [5. 3.5 1.3 0.3]\n",
" [4.5 2.3 1.3 0.3]\n",
" [4.4 3.2 1.3 0.2]\n",
" [5. 3.5 1.6 0.6]\n",
" [5.1 3.8 1.9 0.4]\n",
" [4.8 3. 1.4 0.3]\n",
" [5.1 3.8 1.6 0.2]\n",
" [4.6 3.2 1.4 0.2]\n",
" [5.3 3.7 1.5 0.2]\n",
" [5. 3.3 1.4 0.2]\n",
" [7. 3.2 4.7 1.4]\n",
" [6.4 3.2 4.5 1.5]\n",
" [6.9 3.1 4.9 1.5]\n",
" [5.5 2.3 4. 1.3]\n",
" [6.5 2.8 4.6 1.5]\n",
" [5.7 2.8 4.5 1.3]\n",
" [6.3 3.3 4.7 1.6]\n",
" [4.9 2.4 3.3 1. ]\n",
" [6.6 2.9 4.6 1.3]\n",
" [5.2 2.7 3.9 1.4]\n",
" [5. 2. 3.5 1. ]\n",
" [5.9 3. 4.2 1.5]\n",
" [6. 2.2 4. 1. ]\n",
" [6.1 2.9 4.7 1.4]\n",
" [5.6 2.9 3.6 1.3]\n",
" [6.7 3.1 4.4 1.4]\n",
" [5.6 3. 4.5 1.5]\n",
" [5.8 2.7 4.1 1. ]\n",
" [6.2 2.2 4.5 1.5]\n",
" [5.6 2.5 3.9 1.1]\n",
" [5.9 3.2 4.8 1.8]\n",
" [6.1 2.8 4. 1.3]\n",
" [6.3 2.5 4.9 1.5]\n",
" [6.1 2.8 4.7 1.2]\n",
" [6.4 2.9 4.3 1.3]\n",
" [6.6 3. 4.4 1.4]\n",
" [6.8 2.8 4.8 1.4]\n",
" [6.7 3. 5. 1.7]\n",
" [6. 2.9 4.5 1.5]\n",
" [5.7 2.6 3.5 1. ]\n",
" [5.5 2.4 3.8 1.1]\n",
" [5.5 2.4 3.7 1. ]\n",
" [5.8 2.7 3.9 1.2]\n",
" [6. 2.7 5.1 1.6]\n",
" [5.4 3. 4.5 1.5]\n",
" [6. 3.4 4.5 1.6]\n",
" [6.7 3.1 4.7 1.5]\n",
" [6.3 2.3 4.4 1.3]\n",
" [5.6 3. 4.1 1.3]\n",
" [5.5 2.5 4. 1.3]\n",
" [5.5 2.6 4.4 1.2]\n",
" [6.1 3. 4.6 1.4]\n",
" [5.8 2.6 4. 1.2]\n",
" [5. 2.3 3.3 1. ]\n",
" [5.6 2.7 4.2 1.3]\n",
" [5.7 3. 4.2 1.2]\n",
" [5.7 2.9 4.2 1.3]\n",
" [6.2 2.9 4.3 1.3]\n",
" [5.1 2.5 3. 1.1]\n",
" [5.7 2.8 4.1 1.3]\n",
" [6.3 3.3 6. 2.5]\n",
" [5.8 2.7 5.1 1.9]\n",
" [7.1 3. 5.9 2.1]\n",
" [6.3 2.9 5.6 1.8]\n",
" [6.5 3. 5.8 2.2]\n",
" [7.6 3. 6.6 2.1]\n",
" [4.9 2.5 4.5 1.7]\n",
" [7.3 2.9 6.3 1.8]\n",
" [6.7 2.5 5.8 1.8]\n",
" [7.2 3.6 6.1 2.5]\n",
" [6.5 3.2 5.1 2. ]\n",
" [6.4 2.7 5.3 1.9]\n",
" [6.8 3. 5.5 2.1]\n",
" [5.7 2.5 5. 2. ]\n",
" [5.8 2.8 5.1 2.4]\n",
" [6.4 3.2 5.3 2.3]\n",
" [6.5 3. 5.5 1.8]\n",
" [7.7 3.8 6.7 2.2]\n",
" [7.7 2.6 6.9 2.3]\n",
" [6. 2.2 5. 1.5]\n",
" [6.9 3.2 5.7 2.3]\n",
" [5.6 2.8 4.9 2. ]\n",
" [7.7 2.8 6.7 2. ]\n",
" [6.3 2.7 4.9 1.8]\n",
" [6.7 3.3 5.7 2.1]\n",
" [7.2 3.2 6. 1.8]\n",
" [6.2 2.8 4.8 1.8]\n",
" [6.1 3. 4.9 1.8]\n",
" [6.4 2.8 5.6 2.1]\n",
" [7.2 3. 5.8 1.6]\n",
" [7.4 2.8 6.1 1.9]\n",
" [7.9 3.8 6.4 2. ]\n",
" [6.4 2.8 5.6 2.2]\n",
" [6.3 2.8 5.1 1.5]\n",
" [6.1 2.6 5.6 1.4]\n",
" [7.7 3. 6.1 2.3]\n",
" [6.3 3.4 5.6 2.4]\n",
" [6.4 3.1 5.5 1.8]\n",
" [6. 3. 4.8 1.8]\n",
" [6.9 3.1 5.4 2.1]\n",
" [6.7 3.1 5.6 2.4]\n",
" [6.9 3.1 5.1 2.3]\n",
" [5.8 2.7 5.1 1.9]\n",
" [6.8 3.2 5.9 2.3]\n",
" [6.7 3.3 5.7 2.5]\n",
" [6.7 3. 5.2 2.3]\n",
" [6.3 2.5 5. 1.9]\n",
" [6.5 3. 5.2 2. ]\n",
" [6.2 3.4 5.4 2.3]\n",
" [5.9 3. 5.1 1.8]]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(iris.target)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DcDrV4gDVSAD",
"outputId": "cbd20dab-261b-45ad-da81-50a482fe9974"
},
"execution_count": 49,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n",
" 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n",
" 2 2]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(iris.target_names)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3D8dr0qTVhsv",
"outputId": "6dbfb470-14e8-4dec-ad10-6784583fd681"
},
"execution_count": 50,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['setosa' 'versicolor' 'virginica']\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(iris.feature_names)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "h7cGttciVrEy",
"outputId": "71df74f2-66a4-4dcc-8413-32a9d2290476"
},
"execution_count": 51,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"data_df= pd.DataFrame(iris.data)"
],
"metadata": {
"id": "O4Mt1ORPVzVx"
},
"execution_count": 52,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(data_df.columns)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LfXHFrkvWCKR",
"outputId": "61fe4ce9-abdb-4284-a683-31ac0e53e3d7"
},
"execution_count": 56,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"RangeIndex(start=0, stop=4, step=1)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.index)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OskcgBnHWJ58",
"outputId": "af4bc582-af6e-4f74-dac2-232b08f7f1c6"
},
"execution_count": 57,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"RangeIndex(start=0, stop=150, step=1)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"data_df.columns = iris.feature_names"
],
"metadata": {
"id": "Y_NuzDPFWl4y"
},
"execution_count": 58,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(data_df.head())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hOZNSPyVW3cw",
"outputId": "1a012a97-3897-4c6a-cc71-8089daa053dd"
},
"execution_count": 59,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n",
"0 5.1 3.5 1.4 0.2\n",
"1 4.9 3.0 1.4 0.2\n",
"2 4.7 3.2 1.3 0.2\n",
"3 4.6 3.1 1.5 0.2\n",
"4 5.0 3.6 1.4 0.2\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.info())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "o4qGV6SkW57X",
"outputId": "7247c705-736f-41dd-e21a-fd4e4589d029"
},
"execution_count": 62,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 150 entries, 0 to 149\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 sepal length (cm) 150 non-null float64\n",
" 1 sepal width (cm) 150 non-null float64\n",
" 2 petal length (cm) 150 non-null float64\n",
" 3 petal width (cm) 150 non-null float64\n",
"dtypes: float64(4)\n",
"memory usage: 4.8 KB\n",
"None\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.columns)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "UWgaAfISXFoQ",
"outputId": "80cf4b80-036d-4f21-dcbc-d25fe553ffce"
},
"execution_count": 63,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',\n",
" 'petal width (cm)'],\n",
" dtype='object')\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.shape)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iLvTkThQXmuZ",
"outputId": "39908695-9049-4610-ddb8-898738bb322b"
},
"execution_count": 64,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(150, 4)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.head(3))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "m0b0yM_kXyb9",
"outputId": "d407d232-c65b-403c-d9c6-2c93004cf09d"
},
"execution_count": 65,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n",
"0 5.1 3.5 1.4 0.2\n",
"1 4.9 3.0 1.4 0.2\n",
"2 4.7 3.2 1.3 0.2\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.tail(3))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7o1gIHD_X_bA",
"outputId": "b9ec0bee-fb8a-402f-f5ed-6b8ec5f48f8a"
},
"execution_count": 66,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n",
"147 6.5 3.0 5.2 2.0\n",
"148 6.2 3.4 5.4 2.3\n",
"149 5.9 3.0 5.1 1.8\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df[1:3])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "I3eftiQSYInw",
"outputId": "a011cdc8-845b-4512-ed54-02b88edf5c3e"
},
"execution_count": 67,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n",
"1 4.9 3.0 1.4 0.2\n",
"2 4.7 3.2 1.3 0.2\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df['sepal length (cm)'])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WW3bReM3YYPo",
"outputId": "7e83151d-3c25-4c6f-93ee-1c665fe7c11e"
},
"execution_count": 68,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0 5.1\n",
"1 4.9\n",
"2 4.7\n",
"3 4.6\n",
"4 5.0\n",
" ... \n",
"145 6.7\n",
"146 6.3\n",
"147 6.5\n",
"148 6.2\n",
"149 5.9\n",
"Name: sepal length (cm), Length: 150, dtype: float64\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.loc[1:3, 'sepal length (cm)'])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XpjCDmj4YikQ",
"outputId": "95c82555-459e-46ff-cae7-8ea8d9413ded"
},
"execution_count": 71,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1 4.9\n",
"2 4.7\n",
"3 4.6\n",
"Name: sepal length (cm), dtype: float64\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.iloc[1:3, [0, 1]])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SDKXXVqEYrbK",
"outputId": "877bbb08-110a-49c1-ee6c-4fdf6db03c34"
},
"execution_count": 73,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" sepal length (cm) sepal width (cm)\n",
"1 4.9 3.0\n",
"2 4.7 3.2\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data_df.describe())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vT5YgK_cY7Xw",
"outputId": "8f3541e8-c6bb-462c-b853-92c0269a76a8"
},
"execution_count": 74,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" sepal length (cm) sepal width (cm) petal length (cm) \\\n",
"count 150.000000 150.000000 150.000000 \n",
"mean 5.843333 3.057333 3.758000 \n",
"std 0.828066 0.435866 1.765298 \n",
"min 4.300000 2.000000 1.000000 \n",
"25% 5.100000 2.800000 1.600000 \n",
"50% 5.800000 3.000000 4.350000 \n",
"75% 6.400000 3.300000 5.100000 \n",
"max 7.900000 4.400000 6.900000 \n",
"\n",
" petal width (cm) \n",
"count 150.000000 \n",
"mean 1.199333 \n",
"std 0.762238 \n",
"min 0.100000 \n",
"25% 0.300000 \n",
"50% 1.300000 \n",
"75% 1.800000 \n",
"max 2.500000 \n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "6TmEtLlkZFU1"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment