Created
July 25, 2018 11:46
-
-
Save naviarh/c9aa9b9de36fe9ec553403a50c5b6b81 to your computer and use it in GitHub Desktop.
pandas.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "pandas.ipynb", | |
"version": "0.3.2", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"[View in Colaboratory](https://colab.research.google.com/gist/naviarh/c9aa9b9de36fe9ec553403a50c5b6b81/pandas.ipynb)" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "9XRMYCuLD8E5", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"#!/usr/bin/env python3\n", | |
"# -*- coding: utf-8 -*-" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "8Lr1I6CzECCb", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "312d1731-67e0-43e1-dfe8-22a9da5eed0e" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Put these at the top of every notebook, to get automatic reloading and inline plotting\n", | |
"%reload_ext autoreload\n", | |
"%autoreload 2\n", | |
"%matplotlib inline\n", | |
"!pwd" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/content\r\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "t4og7bFaEM61", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 59 | |
}, | |
"outputId": "47072aa4-cca6-445c-d1e3-885afd307336" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"import subprocess, os\n", | |
"os.uname()" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"posix.uname_result(sysname='Linux', nodename='8911bc6be2c6', release='4.14.33+', version='#1 SMP Wed Jun 20 01:15:52 PDT 2018', machine='x86_64')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "C6JFy_KHEXhT", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"# Pandas\n", | |
"\n", | |
"\n", | |
"---\n", | |
"\n", | |
"**Series** - ряды\n", | |
"\n", | |
"**DataFrame** - таблицы\n", | |
"\n", | |
"\n", | |
"---\n", | |
"\n" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "amyzmBPJRIA9", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"import numpy as np\n", | |
"!pip install --upgrade pip\n", | |
"!pip3 install pytest moto\n", | |
"!pip3 show pytest moto | grep Name -A 1" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "f8ndhOHMENct", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"import pandas as pd\n", | |
"pd.test() # Тестировать будет долго" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "o4e7X6P4U49j", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"## Series - создание" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "tyuKOkbMWsbM", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"Конструктор класса Series выглядит следующим образом:\n", | |
"\n", | |
"**pandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)**\n", | |
"\n", | |
"**data** – массив, словарь или скалярное значение, на базе которого будет построен Series;\n", | |
"\n", | |
"**index** – список меток, который будет использоваться для доступа к элементам Series. Длина списка должна быть равна длине data;\n", | |
"\n", | |
"**dtype** – объект numpy.dtype, определяющий тип данных;\n", | |
"\n", | |
"**copy** – создает копию массива данных, если параметр равен True в ином случае ничего не делает." | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "F63vqveTVr4k", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 238 | |
}, | |
"outputId": "69da264e-68de-4f40-b0e2-30e2dc1372ee" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание Series из списка Python\n", | |
"print(pd.Series([1, 2, 3, 4, 5]), '\\n')\n", | |
"print(pd.Series([1, 2, 3, 4, 5], ['a', 'b', 'c', 'd', 'e'], dtype=float))" | |
], | |
"execution_count": 91, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"0 1\n", | |
"1 2\n", | |
"2 3\n", | |
"3 4\n", | |
"4 5\n", | |
"dtype: int64 \n", | |
"\n", | |
"a 1.0\n", | |
"b 2.0\n", | |
"c 3.0\n", | |
"d 4.0\n", | |
"e 5.0\n", | |
"dtype: float64\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "KfKSA1MZaJBE", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
}, | |
"outputId": "0b040458-d81c-46c6-ca1c-ab5478ca4317" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание Series из ndarray массива numpy\n", | |
"ndarr = np.array([1, 2, 3, 4, 5])\n", | |
"type(ndarr)\n", | |
"print(pd.Series(ndarr, [.1, .2, .3, .4, .5]))" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"0.1 1\n", | |
"0.2 2\n", | |
"0.3 3\n", | |
"0.4 4\n", | |
"0.5 5\n", | |
"dtype: int64\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "Lx7KhhY5auUJ", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 85 | |
}, | |
"outputId": "277d2664-51b5-4f71-da22-4ee6845f1b46" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание Series из словаря (dict)\n", | |
"d = {'a0':1, 'a1':2, 'a3':3}\n", | |
"print(pd.Series(d))" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"a0 1\n", | |
"a1 2\n", | |
"a3 3\n", | |
"dtype: int64\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "eDWUbKpjbWhO", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
}, | |
"outputId": "febe5172-9b86-4391-9b6e-bdb12e0b2cb2" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание Series с использованием константы\n", | |
"print(pd.Series('word', [0,1,2,3,4]))" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"0 word\n", | |
"1 word\n", | |
"2 word\n", | |
"3 word\n", | |
"4 word\n", | |
"dtype: object\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "zQkYgUUHdv14", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"## DataFrame - создание" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "HnMbE1VneCNM", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"Конструктор класса DataFrame выглядит так:\n", | |
"\n", | |
"**class pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)**\n", | |
"\n", | |
"**data** – массив ndarray, словарь (dict) или другой DataFrame;\n", | |
"\n", | |
"**index** – список меток для записей (имена строк таблицы);\n", | |
"\n", | |
"**columns** – список меток для полей (имена столбцов таблицы);\n", | |
"\n", | |
"**dtype** – объект numpy.dtype, определяющий тип данных;\n", | |
"\n", | |
"**copy** – создает копию массива данных, если параметр равен True в ином случае ничего не делает." | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "KEW_aaLDguoB", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 111 | |
}, | |
"outputId": "2ca07a9c-f80f-4bc6-86f2-29f2002e6c10" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание DataFrame из списка словарей\n", | |
"d = [{\"price\": 3, \"count\":8}, {\"price\": 4, \"count\": 11}]\n", | |
"dd = pd.DataFrame(d, ['one','two'])\n", | |
"dd" | |
], | |
"execution_count": 34, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" <th>price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td>8</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td>11</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count price\n", | |
"one 8 3\n", | |
"two 11 4" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 34 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "mM5q7nD5iyrX", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "b04f75bc-1f58-44e7-b85f-38acea1a95a3" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание DataFrame из словаря списков\n", | |
"print(pd.DataFrame({0:['ax','ay'], 1:['bx','by'], 2:['cx','cy']}), '\\n')\n", | |
"print(pd.DataFrame({0:np.array(['ax','ay']), 1:np.array(['bx','by']), 2:np.array(['cx','cy'])}), '\\n')\n", | |
"print(pd.DataFrame({0:pd.Series(['ax','ay']), 1:pd.Series(['bx','by']), 2:pd.Series(['cx','cy'])}))" | |
], | |
"execution_count": 92, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
" 0 1 2\n", | |
"0 ax bx cx\n", | |
"1 ay by cy \n", | |
"\n", | |
" 0 1 2\n", | |
"0 ax bx cx\n", | |
"1 ay by cy \n", | |
"\n", | |
" 0 1 2\n", | |
"0 ax bx cx\n", | |
"1 ay by cy\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "P0wkRUmAkyC2", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 85 | |
}, | |
"outputId": "f222fab2-d34c-4cf8-cea0-7d05a1c9e93c" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание DataFrame из двумерного массива numpy\n", | |
"print(pd.DataFrame(np.zeros((3,5))))" | |
], | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
" 0 1 2 3 4\n", | |
"0 0.0 0.0 0.0 0.0 0.0\n", | |
"1 0.0 0.0 0.0 0.0 0.0\n", | |
"2 0.0 0.0 0.0 0.0 0.0\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "u7ALENH0lY87", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 179 | |
}, | |
"outputId": "5a6a6ef9-1489-4cdd-bc5d-df5bdd9d1bc3" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание DataFrame из другого DataFrame\n", | |
"df1 = pd.DataFrame(np.zeros((2,3)))\n", | |
"df2 = pd.DataFrame(df1, dtype=int)\n", | |
"print(df1, '\\n')\n", | |
"df2" | |
], | |
"execution_count": 93, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
" 0 1 2\n", | |
"0 0.0 0.0 0.0\n", | |
"1 0.0 0.0 0.0 \n", | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2\n", | |
"0 0 0 0\n", | |
"1 0 0 0" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 93 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "WsXRWxfpoHQH", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 111 | |
}, | |
"outputId": "f9bea09b-7d24-4cd8-a6f2-401c8f248fc5" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Создание Series с использованием константы\n", | |
"pd.DataFrame('word', [0,1], [0,1,2,3,4])" | |
], | |
"execution_count": 94, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>word</td>\n", | |
" <td>word</td>\n", | |
" <td>word</td>\n", | |
" <td>word</td>\n", | |
" <td>word</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>word</td>\n", | |
" <td>word</td>\n", | |
" <td>word</td>\n", | |
" <td>word</td>\n", | |
" <td>word</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2 3 4\n", | |
"0 word word word word word\n", | |
"1 word word word word word" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 94 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "c781a4HsnUsW", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"## Доступ к данным в pandas" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "F1urdwLV3a0b", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"При работе со структурами Series и DataFrame из библиотеки pandas, как правило, используют два основных способа получения значений элементов.\n", | |
"\n", | |
"- по индексам (.loc[ ])\n", | |
"- по меткам (.iloc[ ])\n", | |
"\n", | |
".loc[ ] и .iloc[ ] - можно не писать полностью, достаточно квадратных скобок [ ]" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "_o6PbAp539Nz", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"**доступ по меткам ( .loc[ ] )**\n", | |
"- метки в виде отдельных символов [‘a’] или чисел [5], числа используются в качестве меток, если при создании структуры не был указан список с метками;\n", | |
"- список меток [‘a’, ‘b’, ‘c’];\n", | |
"- слайс меток [‘a’:’c’];\n", | |
"- массив логических переменных;\n", | |
"- callable функция с одним аргументом." | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "NSECHKZU4WGC", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"**доступ по индексам ( .iloc[ ] )**\n", | |
"- отдельные целые числа для доступа к элементам структуры;\n", | |
"- массивы целых чисел [0, 1, 2];\n", | |
"- слайсы целых чисел [1:4];\n", | |
"- массивы логических переменных;\n", | |
"- callable функцию с одним аргументом." | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "_psdV55k-AAt", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"\n", | |
"\n", | |
"---\n", | |
"\n", | |
"\n", | |
"С помощью синтаксиса через точку:" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "IxYJ--L925pE", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "01042e72-3381-4eef-8db8-153c1e2d5c49" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"print(dd, '\\n\\n')\n", | |
"print(dd.price, '\\n\\n')\n", | |
"print(dd.price.one)" | |
], | |
"execution_count": 64, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
" count price\n", | |
"one 8 3\n", | |
"two 11 4 \n", | |
"\n", | |
"\n", | |
"one 3\n", | |
"two 4\n", | |
"Name: price, dtype: int64 \n", | |
"\n", | |
"\n", | |
"3\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "o3r3yexLBZXA", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"### Получение случайного набора из структур pandas\n", | |
"метод **sample()**" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "E9MWDAD1-tXl", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
}, | |
"outputId": "747ac519-a73a-4ca3-cf6a-aabcbfc7a428" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"s = pd.Series([10, 20, 30, 40, 50], ['a', 'b', 'c', 'd', 'e'])\n", | |
"s" | |
], | |
"execution_count": 66, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"a 10\n", | |
"b 20\n", | |
"c 30\n", | |
"d 40\n", | |
"e 50\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 66 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "YkekVhosBr3w", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
}, | |
"outputId": "7b55c838-2c5b-4bb4-fc24-3e4283e0e34f" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Выборка случайного элемента\n", | |
"s.sample()" | |
], | |
"execution_count": 76, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"d 40\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 76 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "r2lYASaFBv29", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 85 | |
}, | |
"outputId": "696b5abf-1717-492d-e88b-701755fd6ee2" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Выборка нескольких элементов\n", | |
"s.sample(n=3)" | |
], | |
"execution_count": 77, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"e 50\n", | |
"b 20\n", | |
"a 10\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 77 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "FkcZ5DSSB5OK", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 68 | |
}, | |
"outputId": "ed91d3b6-5d3f-41fc-90c6-d202a2db457e" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Доля от общего числа объектов в структуре, используя параметр\n", | |
"s.sample(frac=0.3)" | |
], | |
"execution_count": 78, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"c 30\n", | |
"a 10\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 78 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "wJgHHG8RCR5Y", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 85 | |
}, | |
"outputId": "b2a4c588-e14d-424d-fd23-b94d1adf2de3" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Мы можем передать вектор весов, длина которого должна быть равна количеству \n", | |
"# элементов в структуре. Сумма весов должна быть равна единице, вес, \n", | |
"# в данном случае, это вероятность появления элемента в выборке\n", | |
"w = [0.1, 0.2, 0.5, 0.1, 0.1]\n", | |
"s.sample(n = 3, weights=w)" | |
], | |
"execution_count": 79, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"c 30\n", | |
"a 10\n", | |
"d 40\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 79 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "gWQOurfXCozi", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 216 | |
}, | |
"outputId": "b58112b1-4f05-4881-87dc-5c4a338d11a9" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Для структуры DataFrame\n", | |
"df = pd.DataFrame({\"price\":[1, 2, 3, 5, 6], \n", | |
" \"count\": [10, 20, 30, 40, 50], \n", | |
" \"percent\": [24, 51, 71, 25, 42]})\n", | |
"print(d, '\\n\\n')\n", | |
"df.sample()" | |
], | |
"execution_count": 86, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
" count percent price\n", | |
"0 10 24 1\n", | |
"1 20 51 2\n", | |
"2 30 71 3\n", | |
"3 40 25 5\n", | |
"4 50 42 6 \n", | |
"\n", | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" <th>percent</th>\n", | |
" <th>price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>10</td>\n", | |
" <td>24</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count percent price\n", | |
"0 10 24 1" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 86 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "sTiDZ09VDbLj", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "5256ab16-4499-4c97-9fef-7a6870e06b5a" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# При работе с DataFrame можно указать ось\n", | |
"df.sample(axis=1)" | |
], | |
"execution_count": 87, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>percent</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>24</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>51</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>71</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>25</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" percent\n", | |
"0 24\n", | |
"1 51\n", | |
"2 71\n", | |
"3 25\n", | |
"4 42" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 87 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "z1-9qQj8Di3z", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 111 | |
}, | |
"outputId": "9694244d-dade-409f-f79c-40d95664428d" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"df.sample(n=2, axis=0)" | |
], | |
"execution_count": 89, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" <th>percent</th>\n", | |
" <th>price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>30</td>\n", | |
" <td>71</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>10</td>\n", | |
" <td>24</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count percent price\n", | |
"2 30 71 3\n", | |
"0 10 24 1" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 89 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "4_ZrYND1Ekrj", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"### Добавление и изменение данных в pandas" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "Yw5VcWqjFqLq", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"\n", | |
"\n", | |
"---\n", | |
"\n", | |
"\n", | |
"**Для структуры Series**" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "YZCeCsESElEk", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
}, | |
"outputId": "4da875b6-db7f-46fd-d306-fe02016b40b7" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"s = pd.Series([10, 20, 30, 40, 50], ['a', 'b', 'c', 'd', 'e'])\n", | |
"s" | |
], | |
"execution_count": 95, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"a 10\n", | |
"b 20\n", | |
"c 30\n", | |
"d 40\n", | |
"e 50\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 95 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "vBVSJwnKEqlz", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 136 | |
}, | |
"outputId": "4ba934b7-c558-40a6-c3bf-fac5a46169c3" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Добавление нового элемента\n", | |
"s['f'] = 60\n", | |
"s" | |
], | |
"execution_count": 96, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"a 10\n", | |
"b 20\n", | |
"c 30\n", | |
"d 40\n", | |
"e 50\n", | |
"f 60\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 96 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "VKDUEg3vEu5u", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 136 | |
}, | |
"outputId": "58f27a6e-5f05-4116-d52d-4c030948af6b" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Изменение существующего элемента\n", | |
"s['a'] = 100\n", | |
"s" | |
], | |
"execution_count": 97, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"a 100\n", | |
"b 20\n", | |
"c 30\n", | |
"d 40\n", | |
"e 50\n", | |
"f 60\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 97 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "x3vyYn1oFjjI", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"\n", | |
"\n", | |
"---\n", | |
"\n", | |
"\n", | |
"**Для структуры DataFrame**" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "PXXxsBsmFRW6", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "6ccbdc66-5e9f-4612-ae3e-ea602635a098" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Для структуры DataFrame\n", | |
"df = pd.DataFrame({\"price\":[1, 2, 3, 5, 6], \n", | |
" \"count\": [10, 20, 30, 40, 50], \n", | |
" \"percent\": [24, 51, 71, 25, 42]})\n", | |
"df" | |
], | |
"execution_count": 155, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" <th>percent</th>\n", | |
" <th>price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>10</td>\n", | |
" <td>24</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>20</td>\n", | |
" <td>51</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>30</td>\n", | |
" <td>71</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>40</td>\n", | |
" <td>25</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>50</td>\n", | |
" <td>42</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count percent price\n", | |
"0 10 24 1\n", | |
"1 20 51 2\n", | |
"2 30 71 3\n", | |
"3 40 25 5\n", | |
"4 50 42 6" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 155 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "y_5L5lNAF6WR", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "effbd99d-2424-4128-b8c4-d4ffc19e8113" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Добавление (столбец)\n", | |
"df['value'] = [3, 14, 7, 91, 5]\n", | |
"df" | |
], | |
"execution_count": 156, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" <th>percent</th>\n", | |
" <th>price</th>\n", | |
" <th>value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>10</td>\n", | |
" <td>24</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>20</td>\n", | |
" <td>51</td>\n", | |
" <td>2</td>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>30</td>\n", | |
" <td>71</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>40</td>\n", | |
" <td>25</td>\n", | |
" <td>5</td>\n", | |
" <td>91</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>50</td>\n", | |
" <td>42</td>\n", | |
" <td>6</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count percent price value\n", | |
"0 10 24 1 3\n", | |
"1 20 51 2 14\n", | |
"2 30 71 3 7\n", | |
"3 40 25 5 91\n", | |
"4 50 42 6 5" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 156 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "5n1YaMpoGDAi", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "0089f050-5fc3-4a7d-be9b-96effd44d5d8" | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Изменение существующего столбца\n", | |
"df['count'] = [0, 0, 0, 0, 0]\n", | |
"df" | |
], | |
"execution_count": 157, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" <th>percent</th>\n", | |
" <th>price</th>\n", | |
" <th>value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>24</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>51</td>\n", | |
" <td>2</td>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>71</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>25</td>\n", | |
" <td>5</td>\n", | |
" <td>91</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>42</td>\n", | |
" <td>6</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count percent price value\n", | |
"0 0 24 1 3\n", | |
"1 0 51 2 14\n", | |
"2 0 71 3 7\n", | |
"3 0 25 5 91\n", | |
"4 0 42 6 5" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 157 | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "VXzvzaPyGP2F", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Добавление и изменение строк\n", | |
"# Полуение размерностей и индексов" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "6GBnl8YXQmO1", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment