Skip to content

Instantly share code, notes, and snippets.

@palewire
Last active August 3, 2018 01:37
Show Gist options
  • Save palewire/b18042a66cf40d9c1f9b2f61715dfc54 to your computer and use it in GitHub Desktop.
Save palewire/b18042a66cf40d9c1f9b2f61715dfc54 to your computer and use it in GitHub Desktop.
pandas-string-slice-example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# pandas string slice example"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/palewire/.local/share/virtualenvs/pandas-string-slice-example-CyHc_5BR/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
" return f(*args, **kwds)\n"
]
}
],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"ftp://ftp.cde.ca.gov/demo/acgr/cohort1617.txt\", delimiter=\"\\t\", dtype=str)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"trimmed_df = df[~pd.isnull(df.SchoolName)][[\n",
" 'CountyCode',\n",
" 'DistrictCode',\n",
" 'SchoolCode',\n",
" 'SchoolName'\n",
"]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Combining strings columns"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"trimmed_df['CDSCode'] = trimmed_df.CountyCode + trimmed_df.DistrictCode + trimmed_df.SchoolCode"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CountyCode</th>\n",
" <th>DistrictCode</th>\n",
" <th>SchoolCode</th>\n",
" <th>SchoolName</th>\n",
" <th>CDSCode</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7943</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7944</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7945</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7946</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7947</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CountyCode DistrictCode SchoolCode SchoolName CDSCode\n",
"7943 01 10017 0000000 District Office 01100170000000\n",
"7944 01 10017 0000000 District Office 01100170000000\n",
"7945 01 10017 0000000 District Office 01100170000000\n",
"7946 01 10017 0000000 District Office 01100170000000\n",
"7947 01 10017 0000000 District Office 01100170000000"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trimmed_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Splitting string columns"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"trimmed_df['NewSchoolCode'] = trimmed_df.CDSCode.str[7:]"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"trimmed_df['NewDistrictCode'] = trimmed_df.CDSCode.str[2:7]"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CountyCode</th>\n",
" <th>DistrictCode</th>\n",
" <th>SchoolCode</th>\n",
" <th>SchoolName</th>\n",
" <th>CDSCode</th>\n",
" <th>foo</th>\n",
" <th>NewSchoolCode</th>\n",
" <th>NewDistrictCode</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7943</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" <td></td>\n",
" <td>0000000</td>\n",
" <td>10017</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7944</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" <td></td>\n",
" <td>0000000</td>\n",
" <td>10017</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7945</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" <td></td>\n",
" <td>0000000</td>\n",
" <td>10017</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7946</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" <td></td>\n",
" <td>0000000</td>\n",
" <td>10017</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7947</th>\n",
" <td>01</td>\n",
" <td>10017</td>\n",
" <td>0000000</td>\n",
" <td>District Office</td>\n",
" <td>01100170000000</td>\n",
" <td></td>\n",
" <td>0000000</td>\n",
" <td>10017</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CountyCode DistrictCode SchoolCode SchoolName CDSCode foo \\\n",
"7943 01 10017 0000000 District Office 01100170000000 \n",
"7944 01 10017 0000000 District Office 01100170000000 \n",
"7945 01 10017 0000000 District Office 01100170000000 \n",
"7946 01 10017 0000000 District Office 01100170000000 \n",
"7947 01 10017 0000000 District Office 01100170000000 \n",
"\n",
" NewSchoolCode NewDistrictCode \n",
"7943 0000000 10017 \n",
"7944 0000000 10017 \n",
"7945 0000000 10017 \n",
"7946 0000000 10017 \n",
"7947 0000000 10017 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trimmed_df.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment