Created
October 6, 2019 14:30
-
-
Save ChunML/f88601a76fdc9f70967b0d6de954cdb1 to your computer and use it in GitHub Desktop.
muffy_stata.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "muffy_stata.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/ChunML/f88601a76fdc9f70967b0d6de954cdb1/muffy_stata.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_kykMyQCPzAV", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"import os" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "pTGnYp_rRf36", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 102 | |
}, | |
"outputId": "6892ca75-217a-4d3c-f645-d965d0e48c1c" | |
}, | |
"source": [ | |
"!pip install pandas" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (0.25.1)\n", | |
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas) (1.16.5)\n", | |
"Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.8.0)\n", | |
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2018.9)\n", | |
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas) (1.12.0)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mbkkvNsnQeKk", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "ea0a1c09-a2d3-436a-965a-2ef217ecc25a" | |
}, | |
"source": [ | |
"from google.colab import drive\n", | |
"drive.mount('/gdrive')" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount(\"/gdrive\", force_remount=True).\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yPMOc6BVQr_e", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"data_dir = '/gdrive/My Drive/stata'" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mHXsVVr5Q6TR", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
}, | |
"outputId": "cc703162-ecb0-4455-cd9a-7a1653f2504f" | |
}, | |
"source": [ | |
"data_1a = pd.read_stata(os.path.join(data_dir, 'Muc1A.dta'))\n", | |
"data_2ab = pd.read_stata(os.path.join(data_dir, 'Muc2AB.dta'))\n", | |
"data_3a = pd.read_stata(os.path.join(data_dir, 'Muc3A.dta'))\n", | |
"data_3b = pd.read_stata(os.path.join(data_dir, 'Muc3B.dta'))\n", | |
"data_4A = pd.read_stata(os.path.join(data_dir, 'Muc4a.dta'))\n", | |
"data_16 = pd.read_stata(os.path.join(data_dir, 'wt16.dta'))" | |
], | |
"execution_count": 41, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/pandas/io/stata.py:1453: UnicodeWarning: \n", | |
"One or more strings in the dta file could not be decoded using utf-8, and\n", | |
"so the fallback encoding of latin-1 is being used. This can happen when a file\n", | |
"has been incorrectly encoded by Stata or some other software. You should verify\n", | |
"the string values returned are correct.\n", | |
" warnings.warn(msg.format(encoding=self._encoding), UnicodeWarning)\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "1yy0_Qm1ZYQm", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 629 | |
}, | |
"outputId": "b70bb8bd-400f-4b5f-b4c2-e83f1321f684" | |
}, | |
"source": [ | |
"data_1a.info()" | |
], | |
"execution_count": 48, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Int64Index: 35788 entries, 0 to 35787\n", | |
"Data columns (total 31 columns):\n", | |
"tinh 35788 non-null category\n", | |
"huyen 35788 non-null int16\n", | |
"xa 35788 non-null int32\n", | |
"diaban 35788 non-null int16\n", | |
"hoso 35788 non-null int16\n", | |
"matv 35787 non-null float64\n", | |
"m1ac2 35787 non-null category\n", | |
"m1ac3 35787 non-null category\n", | |
"m1ac4a 35787 non-null category\n", | |
"m1ac4b 35787 non-null float64\n", | |
"m1ac5 35787 non-null float64\n", | |
"m1ac6 9238 non-null category\n", | |
"m1ac7a 9238 non-null category\n", | |
"m1ac7b 9238 non-null category\n", | |
"m1ac7c 491 non-null category\n", | |
"m1ac8 28237 non-null category\n", | |
"m1ac9 35787 non-null category\n", | |
"m1ac10 1123 non-null category\n", | |
"m1ama1 35787 non-null float64\n", | |
"m1ac11 35787 non-null category\n", | |
"m1ac12 35787 non-null category\n", | |
"m1ac13 361 non-null category\n", | |
"m1ac14a 395 non-null category\n", | |
"m1ac14b 395 non-null float64\n", | |
"m1ac15a 27124 non-null category\n", | |
"m1ac15b 27124 non-null category\n", | |
"m1ac15c 27124 non-null category\n", | |
"m1ac15d 27124 non-null category\n", | |
"m1ac16 32350 non-null category\n", | |
"agecat 35787 non-null float32\n", | |
"allage 35787 non-null float32\n", | |
"dtypes: category(20), float32(2), float64(5), int16(3), int32(1)\n", | |
"memory usage: 2.9 MB\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MW2Jl1ThZg_w", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 357 | |
}, | |
"outputId": "4cb5b5d7-64f1-4ba5-cce9-196d290e0cb4" | |
}, | |
"source": [ | |
"data_2ab.info()" | |
], | |
"execution_count": 53, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Int64Index: 35787 entries, 0 to 35786\n", | |
"Data columns (total 15 columns):\n", | |
"tinh 35787 non-null category\n", | |
"huyen 35787 non-null int16\n", | |
"xa 35787 non-null int32\n", | |
"diaban 35787 non-null int16\n", | |
"hoso 35787 non-null int16\n", | |
"matv 35787 non-null int8\n", | |
"m2ac1 35787 non-null object\n", | |
"m2ac2a 32050 non-null category\n", | |
"m2ac2b 32050 non-null category\n", | |
"m2ac3 32050 non-null category\n", | |
"m2ac4 32050 non-null category\n", | |
"m2ac5 23266 non-null category\n", | |
"m2ac6 9010 non-null category\n", | |
"m2ac7 6438 non-null category\n", | |
"m2ac8 9010 non-null category\n", | |
"dtypes: category(9), int16(3), int32(1), int8(1), object(1)\n", | |
"memory usage: 1.2+ MB\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "7_pSiZiwZpxL", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"keys = ['tinh', 'huyen', 'xa', 'diaban', 'hoso', 'matv']\n", | |
"data_1a_2ab = data_1a.merge(data_2ab, left_on=keys, right_on=keys)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nhmuQJ0ka0NU", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 479 | |
}, | |
"outputId": "710932f7-403a-4785-d188-deafce9e67ee" | |
}, | |
"source": [ | |
"data_1a_3a = data_1a_2ab.merge(data_3a, left_on=keys, right_on=keys)\n", | |
"data_1a_3a.head()" | |
], | |
"execution_count": 58, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tinh</th>\n", | |
" <th>huyen</th>\n", | |
" <th>xa</th>\n", | |
" <th>diaban</th>\n", | |
" <th>hoso</th>\n", | |
" <th>matv</th>\n", | |
" <th>m1ac2</th>\n", | |
" <th>m1ac3</th>\n", | |
" <th>m1ac4a</th>\n", | |
" <th>m1ac4b</th>\n", | |
" <th>m1ac5</th>\n", | |
" <th>m1ac6</th>\n", | |
" <th>m1ac7a</th>\n", | |
" <th>m1ac7b</th>\n", | |
" <th>m1ac7c</th>\n", | |
" <th>m1ac8</th>\n", | |
" <th>m1ac9</th>\n", | |
" <th>m1ac10</th>\n", | |
" <th>m1ama1</th>\n", | |
" <th>m1ac11</th>\n", | |
" <th>m1ac12</th>\n", | |
" <th>m1ac13</th>\n", | |
" <th>m1ac14a</th>\n", | |
" <th>m1ac14b</th>\n", | |
" <th>m1ac15a</th>\n", | |
" <th>m1ac15b</th>\n", | |
" <th>m1ac15c</th>\n", | |
" <th>m1ac15d</th>\n", | |
" <th>m1ac16</th>\n", | |
" <th>agecat</th>\n", | |
" <th>allage</th>\n", | |
" <th>m2ac1</th>\n", | |
" <th>m2ac2a</th>\n", | |
" <th>m2ac2b</th>\n", | |
" <th>m2ac3</th>\n", | |
" <th>m2ac4</th>\n", | |
" <th>m2ac5</th>\n", | |
" <th>m2ac6</th>\n", | |
" <th>m2ac7</th>\n", | |
" <th>m2ac8</th>\n", | |
" <th>m3c1a</th>\n", | |
" <th>m3c1b</th>\n", | |
" <th>m3c1c</th>\n", | |
" <th>m3c1d</th>\n", | |
" <th>m3c1e</th>\n", | |
" <th>m3c1fg</th>\n", | |
" <th>m3c1fp</th>\n", | |
" <th>m3ma1</th>\n", | |
" <th>m3c9</th>\n", | |
" <th>m3c10a</th>\n", | |
" <th>m3c10b</th>\n", | |
" <th>m3c11</th>\n", | |
" <th>m3c12a</th>\n", | |
" <th>m3c12b</th>\n", | |
" <th>m3c13</th>\n", | |
" <th>m3c14</th>\n", | |
" <th>m3c15</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>14</td>\n", | |
" <td>1.0</td>\n", | |
" <td>N?</td>\n", | |
" <td>Ch? h?</td>\n", | |
" <td>4</td>\n", | |
" <td>1953.0</td>\n", | |
" <td>63.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Ly hôn</td>\n", | |
" <td>12.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.0</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>Trong xã ph??ng</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>1.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>12</td>\n", | |
" <td>??i h?c</td>\n", | |
" <td>Không</td>\n", | |
" <td>Công l?p</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.0</td>\n", | |
" <td>Có</td>\n", | |
" <td>Nhà n??c khác</td>\n", | |
" <td>Khong co</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Có</td>\n", | |
" <td>Không</td>\n", | |
" <td>200.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>14</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Nam</td>\n", | |
" <td>Khác</td>\n", | |
" <td>7</td>\n", | |
" <td>1996.0</td>\n", | |
" <td>20.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Ch?a VC</td>\n", | |
" <td>12.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>Trong xã ph??ng</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Có</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>12</td>\n", | |
" <td>THPT</td>\n", | |
" <td>Không</td>\n", | |
" <td>Công l?p</td>\n", | |
" <td>Có</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Cao ??ng</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Công l?p</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Có</td>\n", | |
" <td>H?c sinh t? nguy?n</td>\n", | |
" <td>Khong co</td>\n", | |
" <td>621</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>15</td>\n", | |
" <td>1.0</td>\n", | |
" <td>Nam</td>\n", | |
" <td>Ch? h?</td>\n", | |
" <td>6</td>\n", | |
" <td>1979.0</td>\n", | |
" <td>37.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>?ang có VC</td>\n", | |
" <td>12.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.0</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>Trong xã ph??ng</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Có</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>12</td>\n", | |
" <td>??i h?c</td>\n", | |
" <td>Không</td>\n", | |
" <td>Công l?p</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.0</td>\n", | |
" <td>Có</td>\n", | |
" <td>Ngoài nhà n??c</td>\n", | |
" <td>Khong co</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>670.0</td>\n", | |
" <td>200.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>15</td>\n", | |
" <td>2.0</td>\n", | |
" <td>N?</td>\n", | |
" <td>V? ch?ng</td>\n", | |
" <td>11</td>\n", | |
" <td>1981.0</td>\n", | |
" <td>34.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>?ang có VC</td>\n", | |
" <td>12.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>Trong xã ph??ng</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>Có</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>Có</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>12</td>\n", | |
" <td>??i h?c</td>\n", | |
" <td>Không</td>\n", | |
" <td>Công l?p</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" <td>Có</td>\n", | |
" <td>Nhà n??c khác</td>\n", | |
" <td>Khong co</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>15</td>\n", | |
" <td>3.0</td>\n", | |
" <td>N?</td>\n", | |
" <td>Con</td>\n", | |
" <td>1</td>\n", | |
" <td>2008.0</td>\n", | |
" <td>8.0</td>\n", | |
" <td>Có</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>12.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>3.0</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>Trong xã ph??ng</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Có</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>2</td>\n", | |
" <td>K0 b?ng c?p</td>\n", | |
" <td>Không</td>\n", | |
" <td>Công l?p</td>\n", | |
" <td>Có</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Ti?u h?c</td>\n", | |
" <td>3.0</td>\n", | |
" <td>Công l?p</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>3.0</td>\n", | |
" <td>Có</td>\n", | |
" <td>H?c sinh t? nguy?n</td>\n", | |
" <td>Khong co</td>\n", | |
" <td>457</td>\n", | |
" <td>Không</td>\n", | |
" <td>Không</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tinh huyen xa diaban hoso ... m3c12a m3c12b m3c13 m3c14 m3c15\n", | |
"0 Thành ph? Hà N?i 1 4 8 14 ... Có Không 200.0 0.0 0.0\n", | |
"1 Thành ph? Hà N?i 1 4 8 14 ... Không Không NaN NaN NaN\n", | |
"2 Thành ph? Hà N?i 1 4 8 15 ... Không Không 670.0 200.0 0.0\n", | |
"3 Thành ph? Hà N?i 1 4 8 15 ... Không Không NaN NaN NaN\n", | |
"4 Thành ph? Hà N?i 1 4 8 15 ... Không Không NaN NaN NaN\n", | |
"\n", | |
"[5 rows x 57 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 58 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "QIz5OLgIbHte", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 419 | |
}, | |
"outputId": "af95338c-67ec-425f-ecc9-cdd2458fd308" | |
}, | |
"source": [ | |
"id_vars = ['tinh', 'huyen', 'xa', 'diaban', 'hoso', 'matv']\n", | |
"value_vars = ['m3c2', 'm3c3a', 'm3c3b', 'm3c4', 'm3c5a', 'm3c5b', 'm3c6a', 'm3c6b', 'm3c7', 'm3c8']\n", | |
"data_3b.melt(id_vars=id_vars)" | |
], | |
"execution_count": 67, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tinh</th>\n", | |
" <th>huyen</th>\n", | |
" <th>xa</th>\n", | |
" <th>diaban</th>\n", | |
" <th>hoso</th>\n", | |
" <th>matv</th>\n", | |
" <th>variable</th>\n", | |
" <th>value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>14</td>\n", | |
" <td>1</td>\n", | |
" <td>m3c2</td>\n", | |
" <td>h?nh</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>19</td>\n", | |
" <td>1</td>\n", | |
" <td>m3c2</td>\n", | |
" <td>kh?i</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>19</td>\n", | |
" <td>2</td>\n", | |
" <td>m3c2</td>\n", | |
" <td>phuong</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>7</td>\n", | |
" <td>6</td>\n", | |
" <td>14</td>\n", | |
" <td>2</td>\n", | |
" <td>m3c2</td>\n", | |
" <td>nga</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>Thành ph? Hà N?i</td>\n", | |
" <td>1</td>\n", | |
" <td>7</td>\n", | |
" <td>6</td>\n", | |
" <td>14</td>\n", | |
" <td>5</td>\n", | |
" <td>m3c2</td>\n", | |
" <td>ng?c</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>179065</td>\n", | |
" <td>T?nh Cà Mau</td>\n", | |
" <td>973</td>\n", | |
" <td>32242</td>\n", | |
" <td>18</td>\n", | |
" <td>15</td>\n", | |
" <td>1</td>\n", | |
" <td>m3c8</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>179066</td>\n", | |
" <td>T?nh Cà Mau</td>\n", | |
" <td>973</td>\n", | |
" <td>32248</td>\n", | |
" <td>9</td>\n", | |
" <td>13</td>\n", | |
" <td>4</td>\n", | |
" <td>m3c8</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>179067</td>\n", | |
" <td>T?nh Cà Mau</td>\n", | |
" <td>973</td>\n", | |
" <td>32248</td>\n", | |
" <td>9</td>\n", | |
" <td>13</td>\n", | |
" <td>1</td>\n", | |
" <td>m3c8</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>179068</td>\n", | |
" <td>T?nh Cà Mau</td>\n", | |
" <td>973</td>\n", | |
" <td>32248</td>\n", | |
" <td>9</td>\n", | |
" <td>14</td>\n", | |
" <td>1</td>\n", | |
" <td>m3c8</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>179069</td>\n", | |
" <td>T?nh Cà Mau</td>\n", | |
" <td>973</td>\n", | |
" <td>32248</td>\n", | |
" <td>9</td>\n", | |
" <td>19</td>\n", | |
" <td>1</td>\n", | |
" <td>m3c8</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>179070 rows × 8 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tinh huyen xa diaban hoso matv variable value\n", | |
"0 Thành ph? Hà N?i 1 4 8 14 1 m3c2 h?nh\n", | |
"1 Thành ph? Hà N?i 1 4 8 19 1 m3c2 kh?i\n", | |
"2 Thành ph? Hà N?i 1 4 8 19 2 m3c2 phuong\n", | |
"3 Thành ph? Hà N?i 1 7 6 14 2 m3c2 nga\n", | |
"4 Thành ph? Hà N?i 1 7 6 14 5 m3c2 ng?c\n", | |
"... ... ... ... ... ... ... ... ...\n", | |
"179065 T?nh Cà Mau 973 32242 18 15 1 m3c8 NaN\n", | |
"179066 T?nh Cà Mau 973 32248 9 13 4 m3c8 NaN\n", | |
"179067 T?nh Cà Mau 973 32248 9 13 1 m3c8 NaN\n", | |
"179068 T?nh Cà Mau 973 32248 9 14 1 m3c8 NaN\n", | |
"179069 T?nh Cà Mau 973 32248 9 19 1 m3c8 NaN\n", | |
"\n", | |
"[179070 rows x 8 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 67 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment