Created
October 23, 2013 17:24
-
-
Save jminas/7122910 to your computer and use it in GitHub Desktop.
LAPdata_jm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"\n", | |
"#data = pd.read_csv('MASTER_ALL.csv')\n", | |
"data = pd.read_csv('LAP_Behavioral_MASTER_10232013.csv')\n", | |
"keys = pd.read_csv('ALL_DATA_KEY.csv')\n", | |
"\n", | |
"behav_index = data.keys().tolist().index('Music_exp_years')\n", | |
"\n", | |
"print \"Counting each column (number of participants with no missing data): \"\n", | |
"print zip(data.keys()[:(behav_index + 1)], data.ix[:, :(behav_index + 1)].count().values)\n", | |
"\n", | |
"print \"Number of participants left when removing any missing data: \", data.ix[:, :(behav_index + 1)].dropna().count().values.min()\n", | |
"\n", | |
"sortidx = np.argsort(data.ix[:, :(behav_index + 1)].count().values)\n", | |
"\n", | |
"ivkeys = data.keys()[:(behav_index + 1)].values[sortidx].tolist()\n", | |
"print \"\\nComputing participants remaining as columns are removed (starting with the columns with least data)\" \n", | |
"print \"No removed, key, number of participants remaining\"\n", | |
"for idx, key in enumerate(ivkeys[:-1]):\n", | |
" print idx + 1, key, data[ivkeys[idx+1:]].dropna().count().values.min()\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Counting each column (number of participants with no missing data): \n", | |
"[('participant', 42), ('Gender', 42), ('DOB', 42), ('Age', 42), ('Yrs_Ed', 42), ('Date_S1', 42), ('Date_S2', 42), ('KBIT_verbalstd', 42), ('KBIT_nonverbalstd', 42), ('KBIT_overall', 42), ('WJ3_raw', 42), ('WJ3_std', 41), ('TONI_raw', 42), ('TONI_std', 42), ('CNREPtotalraw', 42), ('Caplan_ACC', 42), ('Caplan_RT', 42), ('CVMT_dPrime', 42), ('PCT_ACC', 42), ('PCT_Opt_Acc', 41), ('RSPAN_Load_%3', 42), ('RSPAN_Load_%4', 42), ('RSPAN_Load_%5', 42), ('RSPAN_Load_%6', 42), ('RSPAN_Load_%7', 42), ('RSPAN_LetterScore', 42), ('RSPAN_sent_acc', 42), ('MLAT_Spelling_Clues_Raw', 42), ('MLAT_WordsSent_Raw', 42), ('MLAT_PairedAssoc_Raw', 42), ('MLAT_Total_Raw', 42), ('MLAT_Total_PercAir', 42), ('MLAT_Total_Perc', 42), ('statcat_WS', 38), ('statcat_3words', 38), ('statcat_2words', 38), ('rotary_delta', 42), ('mirror_deltaE', 41), ('mirror_deltaT', 41), ('CVLT_ListATotal_raw', 42), ('CVLTListATotal_Scaled', 42), ('CVLT_ListA_ShortDelay_raw', 42), ('CVLT_ListA_ShortDelay_scaled', 42), ('CVLT_LongDelay_raw', 42), ('CVLT_LongDelayScaled', 42), ('CVLT_LongDelayHIT', 42), ('CVLT_Discriminability_Recognition', 42), ('Conscientiousness', 42), ('DWECK', 42), ('GRIT', 42), ('Music_exp_years', 42)]\n", | |
"Number of participants left when removing any missing data: 36\n", | |
"\n", | |
"Computing participants remaining as columns are removed (starting with the columns with least data)\n", | |
"No removed, key, number of participants remaining\n", | |
"1 statcat_WS 36\n", | |
"2 statcat_3words 36\n", | |
"3 statcat_2words 39\n", | |
"4 mirror_deltaT 39\n", | |
"5 PCT_Opt_Acc 40\n", | |
"6 WJ3_std 41\n", | |
"7 mirror_deltaE 42\n", | |
"8 MLAT_WordsSent_Raw 42\n", | |
"9 MLAT_PairedAssoc_Raw 42\n", | |
"10 MLAT_Total_Raw 42\n", | |
"11 MLAT_Total_PercAir " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"42\n", | |
"12 MLAT_Total_Perc 42\n", | |
"13 rotary_delta 42\n", | |
"14 participant 42\n", | |
"15 CVLT_ListATotal_raw 42\n", | |
"16 CVLTListATotal_Scaled 42\n", | |
"17 CVLT_ListA_ShortDelay_raw 42\n", | |
"18 CVLT_ListA_ShortDelay_scaled 42\n", | |
"19 CVLT_LongDelay_raw 42\n", | |
"20 CVLT_LongDelayScaled 42\n", | |
"21 CVLT_LongDelayHIT 42\n", | |
"22 CVLT_Discriminability_Recognition 42\n", | |
"23 Conscientiousness 42\n", | |
"24 DWECK 42\n", | |
"25 MLAT_Spelling_Clues_Raw 42\n", | |
"26 RSPAN_sent_acc 42\n", | |
"27 RSPAN_LetterScore 42\n", | |
"28 RSPAN_Load_%7 42\n", | |
"29 Gender " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"42\n", | |
"30 DOB 42\n", | |
"31 Age 42\n", | |
"32 Yrs_Ed 42\n", | |
"33 Date_S1 42\n", | |
"34 Date_S2 42\n", | |
"35 KBIT_verbalstd 42\n", | |
"36 KBIT_nonverbalstd 42\n", | |
"37 KBIT_overall 42\n", | |
"38 WJ3_raw 42\n", | |
"39 TONI_raw 42\n", | |
"40 TONI_std 42\n", | |
"41 CNREPtotalraw 42\n", | |
"42 Caplan_ACC 42\n", | |
"43 Caplan_RT 42\n", | |
"44 CVMT_dPrime 42\n", | |
"45 PCT_ACC 42\n", | |
"46 RSPAN_Load_%3 42\n", | |
"47 RSPAN_Load_%4 42\n", | |
"48 RSPAN_Load_%5 42\n", | |
"49 RSPAN_Load_%6 42\n", | |
"50 GRIT 42\n" | |
] | |
} | |
], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment