Created
May 28, 2018 05:57
-
-
Save va2577/bc42ec5c8b4c0af92ad1594947712559 to your computer and use it in GitHub Desktop.
Python と NumPy と pandas のデータのやり取り
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Python と NumPy と pandas" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import random" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## list → ndarray → Series → ndarray → list\n", | |
"\n", | |
"* [4.6. Sequence Types — list, tuple, range](https://docs.python.org/3.6/library/stdtypes.html#sequence-types-list-tuple-range)\n", | |
"* [class list([iterable])](https://docs.python.org/3.6/library/stdtypes.html#list)\n", | |
"* [class range(stop)](https://docs.python.org/3.6/library/stdtypes.html#range)\n", | |
"* [numpy.array](https://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html)\n", | |
"* [numpy.ndarray.tolist](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.tolist.html)\n", | |
"* [pandas.Series](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html)\n", | |
"* [pandas.Series.values](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.values.html)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'list'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[7, 6, 6, 1, 1, 8, 8, 4, 1, 2]" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"l = [random.randint(0, 9) for x in range(10)]\n", | |
"print(type(l))\n", | |
"l" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'numpy.ndarray'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"array([7, 6, 6, 1, 1, 8, 8, 4, 1, 2])" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"a = np.array(l)\n", | |
"print(type(a))\n", | |
"a" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.series.Series'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"0 7\n", | |
"1 6\n", | |
"2 6\n", | |
"3 1\n", | |
"4 1\n", | |
"5 8\n", | |
"6 8\n", | |
"7 4\n", | |
"8 1\n", | |
"9 2\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"s = pd.Series(a)\n", | |
"print(type(s))\n", | |
"s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'numpy.ndarray'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"array([7, 6, 6, 1, 1, 8, 8, 4, 1, 2])" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"a2 = s.values\n", | |
"print(type(a2))\n", | |
"a2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'list'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[7, 6, 6, 1, 1, 8, 8, 4, 1, 2]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"l2 = a2.tolist()\n", | |
"print(type(l2))\n", | |
"l2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## list → ndarray → DataFrame → ndarray → list\n", | |
"\n", | |
"* [pandas.DataFrame](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)\n", | |
"* [pandas.DataFrame.values](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.values.html)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'list'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[[0, 8],\n", | |
" [2, 2],\n", | |
" [0, 3],\n", | |
" [6, 8],\n", | |
" [6, 2],\n", | |
" [1, 1],\n", | |
" [3, 3],\n", | |
" [8, 8],\n", | |
" [0, 5],\n", | |
" [2, 9]]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"l3 = [[random.randint(0, 9), random.randint(0, 9)] for x in range(10)]\n", | |
"print(type(l3))\n", | |
"l3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'numpy.ndarray'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0, 8],\n", | |
" [2, 2],\n", | |
" [0, 3],\n", | |
" [6, 8],\n", | |
" [6, 2],\n", | |
" [1, 1],\n", | |
" [3, 3],\n", | |
" [8, 8],\n", | |
" [0, 5],\n", | |
" [2, 9]])" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"a3 = np.array(l3)\n", | |
"print(type(a3))\n", | |
"a3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>6</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>2</td>\n", | |
" <td>9</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1\n", | |
"0 0 8\n", | |
"1 2 2\n", | |
"2 0 3\n", | |
"3 6 8\n", | |
"4 6 2\n", | |
"5 1 1\n", | |
"6 3 3\n", | |
"7 8 8\n", | |
"8 0 5\n", | |
"9 2 9" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df3 = pd.DataFrame(a3)\n", | |
"print(type(df3))\n", | |
"df3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'numpy.ndarray'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0, 8],\n", | |
" [2, 2],\n", | |
" [0, 3],\n", | |
" [6, 8],\n", | |
" [6, 2],\n", | |
" [1, 1],\n", | |
" [3, 3],\n", | |
" [8, 8],\n", | |
" [0, 5],\n", | |
" [2, 9]])" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"a4 = df3.values\n", | |
"print(type(a4))\n", | |
"a4" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'list'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[[0, 8],\n", | |
" [2, 2],\n", | |
" [0, 3],\n", | |
" [6, 8],\n", | |
" [6, 2],\n", | |
" [1, 1],\n", | |
" [3, 3],\n", | |
" [8, 8],\n", | |
" [0, 5],\n", | |
" [2, 9]]" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"l4 = a4.tolist()\n", | |
"print(type(l4))\n", | |
"l4" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## NumPy と pandas" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### dropna\n", | |
"\n", | |
"* [python - Removing nan values from an array - Stack Overflow](python - Removing nan values from an array - Stack Overflow)\n", | |
"* [pandas.Series.dropna — pandas 0.23.0 documentation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.dropna.html)\n", | |
"* [pandas.DataFrame.dropna — pandas 0.23.0 documentation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.dropna.html)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([ 1., 2., nan, 4., 5., nan, 7., 8., nan])" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"a11 = np.array([1, 2, np.nan, 4, 5, np.nan, 7, 8, np.nan])\n", | |
"a11" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([1., 2., 4., 5., 7., 8.])" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"a11[~np.isnan(a11)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 1.0\n", | |
"1 2.0\n", | |
"2 NaN\n", | |
"3 4.0\n", | |
"4 5.0\n", | |
"5 NaN\n", | |
"6 7.0\n", | |
"7 8.0\n", | |
"8 NaN\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"s11 = pd.Series(a11)\n", | |
"s11" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 1.0\n", | |
"1 2.0\n", | |
"3 4.0\n", | |
"4 5.0\n", | |
"6 7.0\n", | |
"7 8.0\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"s11.dropna()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### shift\n", | |
"\n", | |
"* [python - Shift elements in a numpy array - Stack Overflow](https://stackoverflow.com/questions/30399534/shift-elements-in-a-numpy-array)\n", | |
"* [numpy.roll — NumPy v1.14 Manual](https://docs.scipy.org/doc/numpy/reference/generated/numpy.roll.html)\n", | |
"* [pandas.Series.shift — pandas 0.23.0 documentation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.shift.html)\n", | |
"* [pandas.DataFrame.shift — pandas 0.23.0 documentation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.shift.html)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([nan, nan, 1., 2., nan, 4., 5., nan, 7.])" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"shift = 2\n", | |
"np.concatenate((np.full(shift, np.nan), a11[:-shift])) if shift >= 0 else np.concatenate((a11[-shift:], np.full(-shift, np.nan)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 NaN\n", | |
"1 NaN\n", | |
"2 1.0\n", | |
"3 2.0\n", | |
"4 NaN\n", | |
"5 4.0\n", | |
"6 5.0\n", | |
"7 NaN\n", | |
"8 7.0\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.Series(a11).shift(2)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment