jminas/LAPdata

## LAPdata
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd\n",
      "import numpy as np\n",
      "\n",
      "#data = pd.read_csv('MASTER_ALL.csv')\n",
      "data = pd.read_csv('LAP_Behavioral_MASTER_10232013.csv')\n",
      "keys = pd.read_csv('ALL_DATA_KEY.csv')\n",
      "\n",
      "behav_index = data.keys().tolist().index('Music_exp_years')\n",
      "\n",
      "print \"Counting each column (number of participants with no missing data): \"\n",
      "print zip(data.keys()[:(behav_index + 1)], data.ix[:, :(behav_index + 1)].count().values)\n",
      "\n",
      "print \"Number of participants left when removing any missing data: \", data.ix[:, :(behav_index + 1)].dropna().count().values.min()\n",
      "\n",
      "sortidx = np.argsort(data.ix[:, :(behav_index + 1)].count().values)\n",
      "\n",
      "ivkeys = data.keys()[:(behav_index + 1)].values[sortidx].tolist()\n",
      "print \"\\nComputing participants remaining as columns are removed (starting with the columns with least data)\" \n",
      "print \"No removed, key, number of participants remaining\"\n",
      "for idx, key in enumerate(ivkeys[:-1]):\n",
      "    print idx + 1, key, data[ivkeys[idx+1:]].dropna().count().values.min()\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Counting each column (number of participants with no missing data): \n",
        "[('participant', 42), ('Gender', 42), ('DOB', 42), ('Age', 42), ('Yrs_Ed', 42), ('Date_S1', 42), ('Date_S2', 42), ('KBIT_verbalstd', 42), ('KBIT_nonverbalstd', 42), ('KBIT_overall', 42), ('WJ3_raw', 42), ('WJ3_std', 41), ('TONI_raw', 42), ('TONI_std', 42), ('CNREPtotalraw', 42), ('Caplan_ACC', 42), ('Caplan_RT', 42), ('CVMT_dPrime', 42), ('PCT_ACC', 42), ('PCT_Opt_Acc', 41), ('RSPAN_Load_%3', 42), ('RSPAN_Load_%4', 42), ('RSPAN_Load_%5', 42), ('RSPAN_Load_%6', 42), ('RSPAN_Load_%7', 42), ('RSPAN_LetterScore', 42), ('RSPAN_sent_acc', 42), ('MLAT_Spelling_Clues_Raw', 42), ('MLAT_WordsSent_Raw', 42), ('MLAT_PairedAssoc_Raw', 42), ('MLAT_Total_Raw', 42), ('MLAT_Total_PercAir', 42), ('MLAT_Total_Perc', 42), ('statcat_WS', 38), ('statcat_3words', 38), ('statcat_2words', 38), ('rotary_delta', 42), ('mirror_deltaE', 41), ('mirror_deltaT', 41), ('CVLT_ListATotal_raw', 42), ('CVLTListATotal_Scaled', 42), ('CVLT_ListA_ShortDelay_raw', 42), ('CVLT_ListA_ShortDelay_scaled', 42), ('CVLT_LongDelay_raw', 42), ('CVLT_LongDelayScaled', 42), ('CVLT_LongDelayHIT', 42), ('CVLT_Discriminability_Recognition', 42), ('Conscientiousness', 42), ('DWECK', 42), ('GRIT', 42), ('Music_exp_years', 42)]\n",
        "Number of participants left when removing any missing data:  36\n",
        "\n",
        "Computing participants remaining as columns are removed (starting with the columns with least data)\n",
        "No removed, key, number of participants remaining\n",
        "1 statcat_WS 36\n",
        "2 statcat_3words 36\n",
        "3 statcat_2words 39\n",
        "4 mirror_deltaT 39\n",
        "5 PCT_Opt_Acc 40\n",
        "6 WJ3_std 41\n",
        "7 mirror_deltaE 42\n",
        "8 MLAT_WordsSent_Raw 42\n",
        "9 MLAT_PairedAssoc_Raw 42\n",
        "10 MLAT_Total_Raw 42\n",
        "11 MLAT_Total_PercAir "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "42\n",
        "12 MLAT_Total_Perc 42\n",
        "13 rotary_delta 42\n",
        "14 participant 42\n",
        "15 CVLT_ListATotal_raw 42\n",
        "16 CVLTListATotal_Scaled 42\n",
        "17 CVLT_ListA_ShortDelay_raw 42\n",
        "18 CVLT_ListA_ShortDelay_scaled 42\n",
        "19 CVLT_LongDelay_raw 42\n",
        "20 CVLT_LongDelayScaled 42\n",
        "21 CVLT_LongDelayHIT 42\n",
        "22 CVLT_Discriminability_Recognition 42\n",
        "23 Conscientiousness 42\n",
        "24 DWECK 42\n",
        "25 MLAT_Spelling_Clues_Raw 42\n",
        "26 RSPAN_sent_acc 42\n",
        "27 RSPAN_LetterScore 42\n",
        "28 RSPAN_Load_%7 42\n",
        "29 Gender "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "42\n",
        "30 DOB 42\n",
        "31 Age 42\n",
        "32 Yrs_Ed 42\n",
        "33 Date_S1 42\n",
        "34 Date_S2 42\n",
        "35 KBIT_verbalstd 42\n",
        "36 KBIT_nonverbalstd 42\n",
        "37 KBIT_overall 42\n",
        "38 WJ3_raw 42\n",
        "39 TONI_raw 42\n",
        "40 TONI_std 42\n",
        "41 CNREPtotalraw 42\n",
        "42 Caplan_ACC 42\n",
        "43 Caplan_RT 42\n",
        "44 CVMT_dPrime 42\n",
        "45 PCT_ACC 42\n",
        "46 RSPAN_Load_%3 42\n",
        "47 RSPAN_Load_%4 42\n",
        "48 RSPAN_Load_%5 42\n",
        "49 RSPAN_Load_%6 42\n",
        "50 GRIT 42\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}
	{
	"metadata": {
	"name": ""
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"\n",
	"#data = pd.read_csv('MASTER_ALL.csv')\n",
	"data = pd.read_csv('LAP_Behavioral_MASTER_10232013.csv')\n",
	"keys = pd.read_csv('ALL_DATA_KEY.csv')\n",
	"\n",
	"behav_index = data.keys().tolist().index('Music_exp_years')\n",
	"\n",
	"print \"Counting each column (number of participants with no missing data): \"\n",
	"print zip(data.keys()[:(behav_index + 1)], data.ix[:, :(behav_index + 1)].count().values)\n",
	"\n",
	"print \"Number of participants left when removing any missing data: \", data.ix[:, :(behav_index + 1)].dropna().count().values.min()\n",
	"\n",
	"sortidx = np.argsort(data.ix[:, :(behav_index + 1)].count().values)\n",
	"\n",
	"ivkeys = data.keys()[:(behav_index + 1)].values[sortidx].tolist()\n",
	"print \"\\nComputing participants remaining as columns are removed (starting with the columns with least data)\" \n",
	"print \"No removed, key, number of participants remaining\"\n",
	"for idx, key in enumerate(ivkeys[:-1]):\n",
	" print idx + 1, key, data[ivkeys[idx+1:]].dropna().count().values.min()\n"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"Counting each column (number of participants with no missing data): \n",
	"[('participant', 42), ('Gender', 42), ('DOB', 42), ('Age', 42), ('Yrs_Ed', 42), ('Date_S1', 42), ('Date_S2', 42), ('KBIT_verbalstd', 42), ('KBIT_nonverbalstd', 42), ('KBIT_overall', 42), ('WJ3_raw', 42), ('WJ3_std', 41), ('TONI_raw', 42), ('TONI_std', 42), ('CNREPtotalraw', 42), ('Caplan_ACC', 42), ('Caplan_RT', 42), ('CVMT_dPrime', 42), ('PCT_ACC', 42), ('PCT_Opt_Acc', 41), ('RSPAN_Load_%3', 42), ('RSPAN_Load_%4', 42), ('RSPAN_Load_%5', 42), ('RSPAN_Load_%6', 42), ('RSPAN_Load_%7', 42), ('RSPAN_LetterScore', 42), ('RSPAN_sent_acc', 42), ('MLAT_Spelling_Clues_Raw', 42), ('MLAT_WordsSent_Raw', 42), ('MLAT_PairedAssoc_Raw', 42), ('MLAT_Total_Raw', 42), ('MLAT_Total_PercAir', 42), ('MLAT_Total_Perc', 42), ('statcat_WS', 38), ('statcat_3words', 38), ('statcat_2words', 38), ('rotary_delta', 42), ('mirror_deltaE', 41), ('mirror_deltaT', 41), ('CVLT_ListATotal_raw', 42), ('CVLTListATotal_Scaled', 42), ('CVLT_ListA_ShortDelay_raw', 42), ('CVLT_ListA_ShortDelay_scaled', 42), ('CVLT_LongDelay_raw', 42), ('CVLT_LongDelayScaled', 42), ('CVLT_LongDelayHIT', 42), ('CVLT_Discriminability_Recognition', 42), ('Conscientiousness', 42), ('DWECK', 42), ('GRIT', 42), ('Music_exp_years', 42)]\n",
	"Number of participants left when removing any missing data: 36\n",
	"\n",
	"Computing participants remaining as columns are removed (starting with the columns with least data)\n",
	"No removed, key, number of participants remaining\n",
	"1 statcat_WS 36\n",
	"2 statcat_3words 36\n",
	"3 statcat_2words 39\n",
	"4 mirror_deltaT 39\n",
	"5 PCT_Opt_Acc 40\n",
	"6 WJ3_std 41\n",
	"7 mirror_deltaE 42\n",
	"8 MLAT_WordsSent_Raw 42\n",
	"9 MLAT_PairedAssoc_Raw 42\n",
	"10 MLAT_Total_Raw 42\n",
	"11 MLAT_Total_PercAir "
	]
	},
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"42\n",
	"12 MLAT_Total_Perc 42\n",
	"13 rotary_delta 42\n",
	"14 participant 42\n",
	"15 CVLT_ListATotal_raw 42\n",
	"16 CVLTListATotal_Scaled 42\n",
	"17 CVLT_ListA_ShortDelay_raw 42\n",
	"18 CVLT_ListA_ShortDelay_scaled 42\n",
	"19 CVLT_LongDelay_raw 42\n",
	"20 CVLT_LongDelayScaled 42\n",
	"21 CVLT_LongDelayHIT 42\n",
	"22 CVLT_Discriminability_Recognition 42\n",
	"23 Conscientiousness 42\n",
	"24 DWECK 42\n",
	"25 MLAT_Spelling_Clues_Raw 42\n",
	"26 RSPAN_sent_acc 42\n",
	"27 RSPAN_LetterScore 42\n",
	"28 RSPAN_Load_%7 42\n",
	"29 Gender "
	]
	},
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"42\n",
	"30 DOB 42\n",
	"31 Age 42\n",
	"32 Yrs_Ed 42\n",
	"33 Date_S1 42\n",
	"34 Date_S2 42\n",
	"35 KBIT_verbalstd 42\n",
	"36 KBIT_nonverbalstd 42\n",
	"37 KBIT_overall 42\n",
	"38 WJ3_raw 42\n",
	"39 TONI_raw 42\n",
	"40 TONI_std 42\n",
	"41 CNREPtotalraw 42\n",
	"42 Caplan_ACC 42\n",
	"43 Caplan_RT 42\n",
	"44 CVMT_dPrime 42\n",
	"45 PCT_ACC 42\n",
	"46 RSPAN_Load_%3 42\n",
	"47 RSPAN_Load_%4 42\n",
	"48 RSPAN_Load_%5 42\n",
	"49 RSPAN_Load_%6 42\n",
	"50 GRIT 42\n"
	]
	}
	],
	"prompt_number": 1
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [],
	"language": "python",
	"metadata": {},
	"outputs": []
	}
	],
	"metadata": {}
	}
	]
	}