Skip to content

Instantly share code, notes, and snippets.

@jminas
Created October 23, 2013 17:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jminas/7122910 to your computer and use it in GitHub Desktop.
Save jminas/7122910 to your computer and use it in GitHub Desktop.
LAPdata_jm
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"#data = pd.read_csv('MASTER_ALL.csv')\n",
"data = pd.read_csv('LAP_Behavioral_MASTER_10232013.csv')\n",
"keys = pd.read_csv('ALL_DATA_KEY.csv')\n",
"\n",
"behav_index = data.keys().tolist().index('Music_exp_years')\n",
"\n",
"print \"Counting each column (number of participants with no missing data): \"\n",
"print zip(data.keys()[:(behav_index + 1)], data.ix[:, :(behav_index + 1)].count().values)\n",
"\n",
"print \"Number of participants left when removing any missing data: \", data.ix[:, :(behav_index + 1)].dropna().count().values.min()\n",
"\n",
"sortidx = np.argsort(data.ix[:, :(behav_index + 1)].count().values)\n",
"\n",
"ivkeys = data.keys()[:(behav_index + 1)].values[sortidx].tolist()\n",
"print \"\\nComputing participants remaining as columns are removed (starting with the columns with least data)\" \n",
"print \"No removed, key, number of participants remaining\"\n",
"for idx, key in enumerate(ivkeys[:-1]):\n",
" print idx + 1, key, data[ivkeys[idx+1:]].dropna().count().values.min()\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Counting each column (number of participants with no missing data): \n",
"[('participant', 42), ('Gender', 42), ('DOB', 42), ('Age', 42), ('Yrs_Ed', 42), ('Date_S1', 42), ('Date_S2', 42), ('KBIT_verbalstd', 42), ('KBIT_nonverbalstd', 42), ('KBIT_overall', 42), ('WJ3_raw', 42), ('WJ3_std', 41), ('TONI_raw', 42), ('TONI_std', 42), ('CNREPtotalraw', 42), ('Caplan_ACC', 42), ('Caplan_RT', 42), ('CVMT_dPrime', 42), ('PCT_ACC', 42), ('PCT_Opt_Acc', 41), ('RSPAN_Load_%3', 42), ('RSPAN_Load_%4', 42), ('RSPAN_Load_%5', 42), ('RSPAN_Load_%6', 42), ('RSPAN_Load_%7', 42), ('RSPAN_LetterScore', 42), ('RSPAN_sent_acc', 42), ('MLAT_Spelling_Clues_Raw', 42), ('MLAT_WordsSent_Raw', 42), ('MLAT_PairedAssoc_Raw', 42), ('MLAT_Total_Raw', 42), ('MLAT_Total_PercAir', 42), ('MLAT_Total_Perc', 42), ('statcat_WS', 38), ('statcat_3words', 38), ('statcat_2words', 38), ('rotary_delta', 42), ('mirror_deltaE', 41), ('mirror_deltaT', 41), ('CVLT_ListATotal_raw', 42), ('CVLTListATotal_Scaled', 42), ('CVLT_ListA_ShortDelay_raw', 42), ('CVLT_ListA_ShortDelay_scaled', 42), ('CVLT_LongDelay_raw', 42), ('CVLT_LongDelayScaled', 42), ('CVLT_LongDelayHIT', 42), ('CVLT_Discriminability_Recognition', 42), ('Conscientiousness', 42), ('DWECK', 42), ('GRIT', 42), ('Music_exp_years', 42)]\n",
"Number of participants left when removing any missing data: 36\n",
"\n",
"Computing participants remaining as columns are removed (starting with the columns with least data)\n",
"No removed, key, number of participants remaining\n",
"1 statcat_WS 36\n",
"2 statcat_3words 36\n",
"3 statcat_2words 39\n",
"4 mirror_deltaT 39\n",
"5 PCT_Opt_Acc 40\n",
"6 WJ3_std 41\n",
"7 mirror_deltaE 42\n",
"8 MLAT_WordsSent_Raw 42\n",
"9 MLAT_PairedAssoc_Raw 42\n",
"10 MLAT_Total_Raw 42\n",
"11 MLAT_Total_PercAir "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"42\n",
"12 MLAT_Total_Perc 42\n",
"13 rotary_delta 42\n",
"14 participant 42\n",
"15 CVLT_ListATotal_raw 42\n",
"16 CVLTListATotal_Scaled 42\n",
"17 CVLT_ListA_ShortDelay_raw 42\n",
"18 CVLT_ListA_ShortDelay_scaled 42\n",
"19 CVLT_LongDelay_raw 42\n",
"20 CVLT_LongDelayScaled 42\n",
"21 CVLT_LongDelayHIT 42\n",
"22 CVLT_Discriminability_Recognition 42\n",
"23 Conscientiousness 42\n",
"24 DWECK 42\n",
"25 MLAT_Spelling_Clues_Raw 42\n",
"26 RSPAN_sent_acc 42\n",
"27 RSPAN_LetterScore 42\n",
"28 RSPAN_Load_%7 42\n",
"29 Gender "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"42\n",
"30 DOB 42\n",
"31 Age 42\n",
"32 Yrs_Ed 42\n",
"33 Date_S1 42\n",
"34 Date_S2 42\n",
"35 KBIT_verbalstd 42\n",
"36 KBIT_nonverbalstd 42\n",
"37 KBIT_overall 42\n",
"38 WJ3_raw 42\n",
"39 TONI_raw 42\n",
"40 TONI_std 42\n",
"41 CNREPtotalraw 42\n",
"42 Caplan_ACC 42\n",
"43 Caplan_RT 42\n",
"44 CVMT_dPrime 42\n",
"45 PCT_ACC 42\n",
"46 RSPAN_Load_%3 42\n",
"47 RSPAN_Load_%4 42\n",
"48 RSPAN_Load_%5 42\n",
"49 RSPAN_Load_%6 42\n",
"50 GRIT 42\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment