Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mattwigway/76083ed7d296cf781e043cd7132d4f01 to your computer and use it in GitHub Desktop.
Save mattwigway/76083ed7d296cf781e043cd7132d4f01 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Explore trips that have data for household members that don't exist\n",
"\n",
"Every trip in the 2017 NHTS has ONTD_P<i>n</i> variables, that indicate if person <i>n</i> is on the trip. If a household has young children, they will be represented as well, even if they aren't in the person file. So a household with 2 adults and 2 children under 5 should have values populated for ONTD_P{1-4}. However, sometimes, more ONTD_P<i>n</i> variables are populated than should be."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"trips17 = pd.read_csv('../data/trippub.csv')\n",
"hh17 = pd.read_csv('../data/hhpub.csv')\n",
"pers17 = pd.read_csv('../data/perpub.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Merge in HH and person variables\n",
"tripsyc = trips17.merge(\n",
" hh17.set_index('HOUSEID')[['YOUNGCHILD']],\n",
" how='left', left_on='HOUSEID', right_index=True, validate='m:1')\n",
"tripsyc = tripsyc.merge(\n",
" pd.DataFrame(pers17.groupby('HOUSEID').PERSONID.max().rename('maxPersonId')),\n",
" how='left', left_on='HOUSEID', right_index=True, validate='m:1')\n",
"tripsyc = tripsyc.merge(\n",
" pd.DataFrame(pers17.groupby('HOUSEID').size().rename('nPersonRecords')),\n",
" how='left', left_on='HOUSEID', right_index=True, validate='m:1')\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>HOUSEID</th>\n",
" <th>PERSONID</th>\n",
" <th>TDTRPNUM</th>\n",
" <th>maxPersonId</th>\n",
" <th>nPersonRecords</th>\n",
" <th>YOUNGCHILD</th>\n",
" <th>HHSIZE</th>\n",
" <th>ONTD_P1</th>\n",
" <th>ONTD_P2</th>\n",
" <th>ONTD_P3</th>\n",
" <th>ONTD_P4</th>\n",
" <th>ONTD_P5</th>\n",
" <th>ONTD_P6</th>\n",
" <th>ONTD_P7</th>\n",
" <th>ONTD_P8</th>\n",
" <th>ONTD_P9</th>\n",
" <th>ONTD_P10</th>\n",
" <th>ONTD_P11</th>\n",
" <th>ONTD_P12</th>\n",
" <th>ONTD_P13</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>33772</th>\n",
" <td>30037248</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33775</th>\n",
" <td>30037248</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33776</th>\n",
" <td>30037248</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33779</th>\n",
" <td>30037248</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33780</th>\n",
" <td>30037248</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" HOUSEID PERSONID TDTRPNUM maxPersonId nPersonRecords YOUNGCHILD \\\n",
"33772 30037248 1 1 4 4 0 \n",
"33775 30037248 1 4 4 4 0 \n",
"33776 30037248 1 5 4 4 0 \n",
"33779 30037248 3 1 4 4 0 \n",
"33780 30037248 3 2 4 4 0 \n",
"\n",
" HHSIZE ONTD_P1 ONTD_P2 ONTD_P3 ONTD_P4 ONTD_P5 ONTD_P6 ONTD_P7 \\\n",
"33772 4 1 2 1 1 1 -1 -1 \n",
"33775 4 1 2 1 1 1 -1 -1 \n",
"33776 4 1 2 1 1 1 -1 -1 \n",
"33779 4 1 2 1 1 1 -1 -1 \n",
"33780 4 1 2 1 1 1 -1 -1 \n",
"\n",
" ONTD_P8 ONTD_P9 ONTD_P10 ONTD_P11 ONTD_P12 ONTD_P13 \n",
"33772 -1 -1 -1 -1 -1 -1 \n",
"33775 -1 -1 -1 -1 -1 -1 \n",
"33776 -1 -1 -1 -1 -1 -1 \n",
"33779 -1 -1 -1 -1 -1 -1 \n",
"33780 -1 -1 -1 -1 -1 -1 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create table\n",
"out = []\n",
"for i in range(1, 14):\n",
" # NOT (ontd_pn is appropriate skip OR n is less than the max person ID plus YOUNGCHILD (i.e. there are at least n people in the HH))\n",
" out.append(tripsyc.loc[~((tripsyc[f'ONTD_P{i}'] == -1) | ((tripsyc.maxPersonId + tripsyc.YOUNGCHILD) >= i)),\n",
" ['HOUSEID', 'PERSONID', 'TDTRPNUM', 'maxPersonId', 'nPersonRecords', 'YOUNGCHILD', 'HHSIZE', *[c for c in tripsyc.columns if c.startswith('ONTD_P')]]])\n",
" \n",
"tooMany = pd.concat(out).sort_values(['HOUSEID', 'PERSONID', 'TDTRPNUM'])\n",
"tooMany.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## What about if less ONTD_Pn variables are populated?\n",
"\n",
"There should never be a -1 in OTND_Pn if person n is in the household."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>HOUSEID</th>\n",
" <th>PERSONID</th>\n",
" <th>TDTRPNUM</th>\n",
" <th>maxPersonId</th>\n",
" <th>nPersonRecords</th>\n",
" <th>YOUNGCHILD</th>\n",
" <th>HHSIZE</th>\n",
" <th>ONTD_P1</th>\n",
" <th>ONTD_P2</th>\n",
" <th>ONTD_P3</th>\n",
" <th>ONTD_P4</th>\n",
" <th>ONTD_P5</th>\n",
" <th>ONTD_P6</th>\n",
" <th>ONTD_P7</th>\n",
" <th>ONTD_P8</th>\n",
" <th>ONTD_P9</th>\n",
" <th>ONTD_P10</th>\n",
" <th>ONTD_P11</th>\n",
" <th>ONTD_P12</th>\n",
" <th>ONTD_P13</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2672</th>\n",
" <td>30003412</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2673</th>\n",
" <td>30003412</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2674</th>\n",
" <td>30003412</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2675</th>\n",
" <td>30003412</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2676</th>\n",
" <td>30003412</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2677</th>\n",
" <td>30003412</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2678</th>\n",
" <td>30003412</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2679</th>\n",
" <td>30003412</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680</th>\n",
" <td>30003412</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2681</th>\n",
" <td>30003412</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2682</th>\n",
" <td>30003412</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2683</th>\n",
" <td>30003412</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2684</th>\n",
" <td>30003412</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2685</th>\n",
" <td>30003412</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2686</th>\n",
" <td>30003412</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2687</th>\n",
" <td>30003412</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2688</th>\n",
" <td>30003412</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2689</th>\n",
" <td>30003412</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4123</th>\n",
" <td>30004831</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4124</th>\n",
" <td>30004831</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" <td>-1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" HOUSEID PERSONID TDTRPNUM maxPersonId nPersonRecords YOUNGCHILD \\\n",
"2672 30003412 1 1 4 3 2 \n",
"2673 30003412 1 2 4 3 2 \n",
"2674 30003412 1 3 4 3 2 \n",
"2675 30003412 1 4 4 3 2 \n",
"2676 30003412 1 5 4 3 2 \n",
"2677 30003412 1 6 4 3 2 \n",
"2678 30003412 1 7 4 3 2 \n",
"2679 30003412 2 1 4 3 2 \n",
"2680 30003412 2 2 4 3 2 \n",
"2681 30003412 2 3 4 3 2 \n",
"2682 30003412 2 4 4 3 2 \n",
"2683 30003412 2 5 4 3 2 \n",
"2684 30003412 2 6 4 3 2 \n",
"2685 30003412 4 1 4 3 2 \n",
"2686 30003412 4 2 4 3 2 \n",
"2687 30003412 4 3 4 3 2 \n",
"2688 30003412 4 4 4 3 2 \n",
"2689 30003412 4 5 4 3 2 \n",
"4123 30004831 1 1 5 4 1 \n",
"4124 30004831 1 2 5 4 1 \n",
"\n",
" HHSIZE ONTD_P1 ONTD_P2 ONTD_P3 ONTD_P4 ONTD_P5 ONTD_P6 ONTD_P7 \\\n",
"2672 5 1 2 2 2 2 -1 -1 \n",
"2673 5 1 2 2 2 2 -1 -1 \n",
"2674 5 1 2 2 2 2 -1 -1 \n",
"2675 5 1 2 2 2 2 -1 -1 \n",
"2676 5 1 2 2 2 2 -1 -1 \n",
"2677 5 1 2 2 1 2 -1 -1 \n",
"2678 5 1 2 2 1 2 -1 -1 \n",
"2679 5 2 1 1 1 1 -1 -1 \n",
"2680 5 2 1 2 1 2 -1 -1 \n",
"2681 5 2 1 2 2 2 -1 -1 \n",
"2682 5 2 1 2 2 2 -1 -1 \n",
"2683 5 2 1 2 1 2 -1 -1 \n",
"2684 5 2 1 1 1 1 -1 -1 \n",
"2685 5 2 1 1 1 1 -1 -1 \n",
"2686 5 2 2 2 1 2 -1 -1 \n",
"2687 5 2 1 1 1 1 -1 -1 \n",
"2688 5 1 2 2 1 2 -1 -1 \n",
"2689 5 1 2 2 1 2 -1 -1 \n",
"4123 5 1 2 2 2 2 -1 -1 \n",
"4124 5 1 1 1 1 1 -1 -1 \n",
"\n",
" ONTD_P8 ONTD_P9 ONTD_P10 ONTD_P11 ONTD_P12 ONTD_P13 \n",
"2672 -1 -1 -1 -1 -1 -1 \n",
"2673 -1 -1 -1 -1 -1 -1 \n",
"2674 -1 -1 -1 -1 -1 -1 \n",
"2675 -1 -1 -1 -1 -1 -1 \n",
"2676 -1 -1 -1 -1 -1 -1 \n",
"2677 -1 -1 -1 -1 -1 -1 \n",
"2678 -1 -1 -1 -1 -1 -1 \n",
"2679 -1 -1 -1 -1 -1 -1 \n",
"2680 -1 -1 -1 -1 -1 -1 \n",
"2681 -1 -1 -1 -1 -1 -1 \n",
"2682 -1 -1 -1 -1 -1 -1 \n",
"2683 -1 -1 -1 -1 -1 -1 \n",
"2684 -1 -1 -1 -1 -1 -1 \n",
"2685 -1 -1 -1 -1 -1 -1 \n",
"2686 -1 -1 -1 -1 -1 -1 \n",
"2687 -1 -1 -1 -1 -1 -1 \n",
"2688 -1 -1 -1 -1 -1 -1 \n",
"2689 -1 -1 -1 -1 -1 -1 \n",
"4123 -1 -1 -1 -1 -1 -1 \n",
"4124 -1 -1 -1 -1 -1 -1 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create table\n",
"out = []\n",
"for i in range(1, 14):\n",
" # NOT (ontd_pn is not appropriate skip OR n is more than the max person ID plus YOUNGCHILD (i.e. there are less than n people in the HH so it should be appropriate skip))\n",
" out.append(tripsyc.loc[~((tripsyc[f'ONTD_P{i}'] != -1) | ((tripsyc.maxPersonId + tripsyc.YOUNGCHILD) < i)),\n",
" ['HOUSEID', 'PERSONID', 'TDTRPNUM', 'maxPersonId', 'nPersonRecords', 'YOUNGCHILD', 'HHSIZE', *[c for c in tripsyc.columns if c.startswith('ONTD_P')]]])\n",
" \n",
"tooFew = pd.concat(out).sort_values(['HOUSEID', 'PERSONID', 'TDTRPNUM'])\n",
"tooFew.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>HOUSEID</th>\n",
" <th>PERSONID</th>\n",
" <th>TDTRPNUM</th>\n",
" <th>maxPersonId</th>\n",
" <th>nPersonRecords</th>\n",
" <th>YOUNGCHILD</th>\n",
" <th>HHSIZE</th>\n",
" <th>ONTD_P1</th>\n",
" <th>ONTD_P2</th>\n",
" <th>ONTD_P3</th>\n",
" <th>ONTD_P4</th>\n",
" <th>ONTD_P5</th>\n",
" <th>ONTD_P6</th>\n",
" <th>ONTD_P7</th>\n",
" <th>ONTD_P8</th>\n",
" <th>ONTD_P9</th>\n",
" <th>ONTD_P10</th>\n",
" <th>ONTD_P11</th>\n",
" <th>ONTD_P12</th>\n",
" <th>ONTD_P13</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [HOUSEID, PERSONID, TDTRPNUM, maxPersonId, nPersonRecords, YOUNGCHILD, HHSIZE, ONTD_P1, ONTD_P2, ONTD_P3, ONTD_P4, ONTD_P5, ONTD_P6, ONTD_P7, ONTD_P8, ONTD_P9, ONTD_P10, ONTD_P11, ONTD_P12, ONTD_P13]\n",
"Index: []"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# probably because people left the household\n",
"tooFew[tooFew.nPersonRecords >= tooFew.maxPersonId]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# https://xlsxwriter.readthedocs.io/example_pandas_multiple.html\n",
"out = pd.ExcelWriter('wrong_number_of_ontd_vars.xlsx', engine='xlsxwriter')\n",
"tooMany.to_excel(out, sheet_name='Too many ONTD vars populated')\n",
"tooFew.to_excel(out, sheet_name='Too few ONTD vars populated')\n",
"out.save()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment