Last active
August 3, 2018 01:37
-
-
Save palewire/b18042a66cf40d9c1f9b2f61715dfc54 to your computer and use it in GitHub Desktop.
pandas-string-slice-example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# pandas string slice example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/palewire/.local/share/virtualenvs/pandas-string-slice-example-CyHc_5BR/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n", | |
" return f(*args, **kwds)\n" | |
] | |
} | |
], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv(\"ftp://ftp.cde.ca.gov/demo/acgr/cohort1617.txt\", delimiter=\"\\t\", dtype=str)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"trimmed_df = df[~pd.isnull(df.SchoolName)][[\n", | |
" 'CountyCode',\n", | |
" 'DistrictCode',\n", | |
" 'SchoolCode',\n", | |
" 'SchoolName'\n", | |
"]]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Combining strings columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"trimmed_df['CDSCode'] = trimmed_df.CountyCode + trimmed_df.DistrictCode + trimmed_df.SchoolCode" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>CountyCode</th>\n", | |
" <th>DistrictCode</th>\n", | |
" <th>SchoolCode</th>\n", | |
" <th>SchoolName</th>\n", | |
" <th>CDSCode</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>7943</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7944</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7945</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7946</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7947</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" CountyCode DistrictCode SchoolCode SchoolName CDSCode\n", | |
"7943 01 10017 0000000 District Office 01100170000000\n", | |
"7944 01 10017 0000000 District Office 01100170000000\n", | |
"7945 01 10017 0000000 District Office 01100170000000\n", | |
"7946 01 10017 0000000 District Office 01100170000000\n", | |
"7947 01 10017 0000000 District Office 01100170000000" | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"trimmed_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Splitting string columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"trimmed_df['NewSchoolCode'] = trimmed_df.CDSCode.str[7:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"trimmed_df['NewDistrictCode'] = trimmed_df.CDSCode.str[2:7]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>CountyCode</th>\n", | |
" <th>DistrictCode</th>\n", | |
" <th>SchoolCode</th>\n", | |
" <th>SchoolName</th>\n", | |
" <th>CDSCode</th>\n", | |
" <th>foo</th>\n", | |
" <th>NewSchoolCode</th>\n", | |
" <th>NewDistrictCode</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>7943</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" <td></td>\n", | |
" <td>0000000</td>\n", | |
" <td>10017</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7944</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" <td></td>\n", | |
" <td>0000000</td>\n", | |
" <td>10017</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7945</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" <td></td>\n", | |
" <td>0000000</td>\n", | |
" <td>10017</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7946</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" <td></td>\n", | |
" <td>0000000</td>\n", | |
" <td>10017</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7947</th>\n", | |
" <td>01</td>\n", | |
" <td>10017</td>\n", | |
" <td>0000000</td>\n", | |
" <td>District Office</td>\n", | |
" <td>01100170000000</td>\n", | |
" <td></td>\n", | |
" <td>0000000</td>\n", | |
" <td>10017</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" CountyCode DistrictCode SchoolCode SchoolName CDSCode foo \\\n", | |
"7943 01 10017 0000000 District Office 01100170000000 \n", | |
"7944 01 10017 0000000 District Office 01100170000000 \n", | |
"7945 01 10017 0000000 District Office 01100170000000 \n", | |
"7946 01 10017 0000000 District Office 01100170000000 \n", | |
"7947 01 10017 0000000 District Office 01100170000000 \n", | |
"\n", | |
" NewSchoolCode NewDistrictCode \n", | |
"7943 0000000 10017 \n", | |
"7944 0000000 10017 \n", | |
"7945 0000000 10017 \n", | |
"7946 0000000 10017 \n", | |
"7947 0000000 10017 " | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"trimmed_df.head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment