Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.read_csv('https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/diamonds.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>carat</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>26970.500000</td>\n",
" <td>0.797940</td>\n",
" <td>61.749405</td>\n",
" <td>57.457184</td>\n",
" <td>3932.799722</td>\n",
" <td>5.731157</td>\n",
" <td>5.734526</td>\n",
" <td>3.538734</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>15571.281097</td>\n",
" <td>0.474011</td>\n",
" <td>1.432621</td>\n",
" <td>2.234491</td>\n",
" <td>3989.439738</td>\n",
" <td>1.121761</td>\n",
" <td>1.142135</td>\n",
" <td>0.705699</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>0.200000</td>\n",
" <td>43.000000</td>\n",
" <td>43.000000</td>\n",
" <td>326.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>13485.750000</td>\n",
" <td>0.400000</td>\n",
" <td>61.000000</td>\n",
" <td>56.000000</td>\n",
" <td>950.000000</td>\n",
" <td>4.710000</td>\n",
" <td>4.720000</td>\n",
" <td>2.910000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>26970.500000</td>\n",
" <td>0.700000</td>\n",
" <td>61.800000</td>\n",
" <td>57.000000</td>\n",
" <td>2401.000000</td>\n",
" <td>5.700000</td>\n",
" <td>5.710000</td>\n",
" <td>3.530000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>40455.250000</td>\n",
" <td>1.040000</td>\n",
" <td>62.500000</td>\n",
" <td>59.000000</td>\n",
" <td>5324.250000</td>\n",
" <td>6.540000</td>\n",
" <td>6.540000</td>\n",
" <td>4.040000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>53940.000000</td>\n",
" <td>5.010000</td>\n",
" <td>79.000000</td>\n",
" <td>95.000000</td>\n",
" <td>18823.000000</td>\n",
" <td>10.740000</td>\n",
" <td>58.900000</td>\n",
" <td>31.800000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 carat depth table price \\\n",
"count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n",
"mean 26970.500000 0.797940 61.749405 57.457184 3932.799722 \n",
"std 15571.281097 0.474011 1.432621 2.234491 3989.439738 \n",
"min 1.000000 0.200000 43.000000 43.000000 326.000000 \n",
"25% 13485.750000 0.400000 61.000000 56.000000 950.000000 \n",
"50% 26970.500000 0.700000 61.800000 57.000000 2401.000000 \n",
"75% 40455.250000 1.040000 62.500000 59.000000 5324.250000 \n",
"max 53940.000000 5.010000 79.000000 95.000000 18823.000000 \n",
"\n",
" x y z \n",
"count 53940.000000 53940.000000 53940.000000 \n",
"mean 5.731157 5.734526 3.538734 \n",
"std 1.121761 1.142135 0.705699 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 4.710000 4.720000 2.910000 \n",
"50% 5.700000 5.710000 3.530000 \n",
"75% 6.540000 6.540000 4.040000 \n",
"max 10.740000 58.900000 31.800000 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sub_df = df.loc[df.price > 10000]\n",
"sub_df_copy = sub_df.copy()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 23.1 ms per loop\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/jiffyclub/miniconda3/lib/python3.4/site-packages/pandas/core/indexing.py:415: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n"
]
}
],
"source": [
"%timeit sub_df.loc[sub_df.depth < 61, 'depth'] = 0"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 loops, best of 3: 715 µs per loop\n"
]
}
],
"source": [
"%timeit sub_df_copy.loc[sub_df_copy.depth < 61, 'depth'] = 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.