Skip to content

Instantly share code, notes, and snippets.

@chrisgorgo
Created February 13, 2018 00:27
Show Gist options
  • Save chrisgorgo/6ca1357f8077f7243d6605faeb1cdfb7 to your computer and use it in GitHub Desktop.
Save chrisgorgo/6ca1357f8077f7243d6605faeb1cdfb7 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import boto3\n",
"from botocore import UNSIGNED\n",
"from botocore.client import Config\n",
"from distutils.version import LooseVersion, StrictVersion\n",
"import pandas as pd\n",
"import io\n",
"import numpy as np\n",
"s3 = boto3.client(\"s3\", config=Config(signature_version=UNSIGNED))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ds000001\n",
"1.0.0/ 2.0.4/\n",
"{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 10, 'male_count': 6}\n",
"ds000002\n",
"1.0.0/ 2.0.5/\n",
"{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 10, 'male_count': 7}\n",
"ds000003\n",
"1.0.0/ 2.0.2/\n",
"{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 5, 'male_count': 8}\n",
"ds000005\n",
"1.0.0/ 2.0.1/\n",
"{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 8, 'male_count': 8}\n",
"ds000006\n",
"1.0.0/ 2.0.1/\n",
"{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 9, 'male_count': 5}\n",
"ds000007\n",
"1.0.1/ 2.0.1/\n",
"{'participant_count': 20, 'below_18': 0, 'above_18': 20, 'female_count': 9, 'male_count': 11}\n",
"ds000008\n",
"1.0.1/ 2.0.0/\n",
"{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 4, 'male_count': 10}\n",
"ds000009\n",
"1.0.0/ 2.0.3/\n",
"{'participant_count': 24, 'below_18': 0, 'above_18': 24, 'female_count': 10, 'male_count': 14}\n",
"ds000011\n",
"1.0.0/ 2.0.1/\n",
"{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 9, 'male_count': 5}\n",
"ds000017\n",
"1.0.0/ 2.0.1/\n",
"{'participant_count': 8, 'below_18': 0, 'above_18': 5, 'female_count': 1, 'male_count': 4}\n",
"ds000030\n",
"1.0.2/ 1.0.5/\n",
"{'participant_count': 272, 'below_18': 0, 'above_18': 272, 'female_count': 117, 'male_count': 155}\n",
"ds000031\n",
"1.0.2/ 1.0.4/\n",
"no participants.tsv file\n",
"ds000051\n",
"no participants.tsv file\n",
"ds000052\n",
"1.0.0/ 2.0.0/\n",
"no participants.tsv file\n",
"ds000053\n",
"1.0.0/ 1.0.2/\n",
"{'participant_count': 59, 'below_18': 0, 'above_18': 59, 'female_count': 31, 'male_count': 28}\n",
"ds000101\n",
"1.0.0/ 2.0.0/\n",
"{'participant_count': 21, 'below_18': 0, 'above_18': 21, 'female_count': 9, 'male_count': 12}\n",
"ds000102\n",
"1.0.0/ 2.0.0/\n",
"{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 1, 'male_count': 16}\n",
"ds000105\n",
"1.0.1/ 2.0.2/\n",
"no participants.tsv file\n",
"ds000107\n",
"1.0.0/ 2.0.2/\n",
"no participants.tsv file\n",
"ds000108\n",
"1.0.1/ 2.0.0/\n",
"{'participant_count': 34, 'below_18': 0, 'above_18': 34, 'female_count': 17, 'male_count': 17}\n",
"ds000110\n",
"1.0.0/ 2.0.1/\n",
"{'participant_count': 18, 'below_18': 0, 'above_18': 18, 'female_count': 9, 'male_count': 9}\n",
"ds000113b\n",
"2.0.0/ 2.0.1/\n",
"no participants.tsv file\n",
"ds000113c\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 7, 'below_18': 0, 'above_18': 7, 'female_count': 2, 'male_count': 5}\n",
"ds000113d\n",
"1.0.0/ 2.0.0/\n",
"{'participant_count': 30, 'below_18': 2, 'above_18': 28, 'female_count': 16, 'male_count': 14}\n",
"ds000114\n",
"2.0.0/ 2.0.1/\n",
"no participants.tsv file\n",
"ds000115\n",
"1.0.2/ 2.0.0/\n",
"{'participant_count': 99, 'below_18': 12, 'above_18': 87, 'female_count': 40, 'male_count': 59}\n",
"ds000116\n",
"1.0.0/ 2.0.0/\n",
"{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 6, 'male_count': 11}\n",
"ds000117\n",
"0.1.0/ 1.0.0/\n",
"{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 7, 'male_count': 9}\n",
"ds000119\n",
"1.0.0/ 2.0.1/\n",
"{'participant_count': 73, 'below_18': 46, 'above_18': 27, 'female_count': 43, 'male_count': 30}\n",
"ds000120\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 26, 'below_18': 16, 'above_18': 10, 'female_count': 15, 'male_count': 11}\n",
"ds000121\n",
"1.0.0/ 2.0.2/\n",
"{'participant_count': 28, 'below_18': 12, 'above_18': 16, 'female_count': 16, 'male_count': 12}\n",
"ds000122\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 8, 'male_count': 9}\n",
"ds000133\n",
"1.0.0/ 1.0.0/\n",
"Index(['participant_id', 'study_group', 'handedness', 'gender'], dtype='object')\n",
"{'participant_count': 26}\n",
"ds000138\n",
"1.0.0/ 2.0.0/\n",
"{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 7, 'male_count': 9}\n",
"ds000140\n",
"2.0.0/ 2.0.0/\n",
"{'participant_count': 33, 'below_18': 0, 'above_18': 33, 'female_count': 22, 'male_count': 11}\n",
"ds000144\n",
"{'participant_count': 45, 'below_18': 45, 'above_18': 0, 'female_count': 29, 'male_count': 16}\n",
"ds000148\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 49, 'below_18': 0, 'above_18': 49, 'female_count': 26, 'male_count': 23}\n",
"ds000157\n",
"1.0.0/ 1.0.3/\n",
"{'participant_count': 30, 'below_18': 0, 'above_18': 30, 'female_count': 30, 'male_count': 0}\n",
"ds000158\n",
"1.0.0/ 1.0.1/\n",
"no participants.tsv file\n",
"ds000164\n",
"1.0.0/ 1.0.1/\n",
"no participants.tsv file\n",
"ds000168\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 35, 'below_18': 0, 'above_18': 25, 'female_count': 15, 'male_count': 20}\n",
"ds000170\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 15, 'below_18': 0, 'above_18': 15, 'female_count': 3, 'male_count': 12}\n",
"ds000171\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 39, 'below_18': 0, 'above_18': 39, 'female_count': 22, 'male_count': 17}\n",
"ds000172\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 7, 'male_count': 6}\n",
"ds000174\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 42, 'below_18': 2, 'above_18': 40, 'female_count': 13, 'male_count': 29}\n",
"ds000177\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 11, 'below_18': 0, 'above_18': 11, 'female_count': 4, 'male_count': 7}\n",
"ds000200\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 14, 'below_18': 12, 'above_18': 2, 'female_count': 6, 'male_count': 8}\n",
"ds000201\n",
"1.0.0/ 1.0.5/\n",
"{'participant_count': 86, 'below_18': 0, 'above_18': 86, 'female_count': 44, 'male_count': 42}\n",
"ds000202\n",
"{'participant_count': 95, 'below_18': 0, 'above_18': 95, 'female_count': 95, 'male_count': 0}\n",
"ds000203\n",
"1.0.0/ 1.0.2/\n",
"{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 10, 'male_count': 16}\n",
"ds000204\n",
"1.0.0/ 1.0.2/\n",
"no participants.tsv file\n",
"ds000205\n",
"1.0.0/ 1.0.0/\n",
"Index(['participant_id', 'handedness'], dtype='object')\n",
"{'participant_count': 11}\n",
"ds000206\n",
"1.0.0/ 1.0.0/\n",
"no participants.tsv file\n",
"ds000208\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 76, 'below_18': 0, 'above_18': 76, 'female_count': 40, 'male_count': 36}\n",
"ds000210\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 31, 'below_18': 0, 'above_18': 31, 'female_count': 16, 'male_count': 15}\n",
"ds000212\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 39, 'below_18': 0, 'above_18': 39, 'female_count': 10, 'male_count': 29}\n",
"ds000213\n",
"1.0.0/ 1.0.2/\n",
"Index(['participant_id', 'gender', 'group'], dtype='object')\n",
"{'participant_count': 26}\n",
"ds000214\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 36, 'below_18': 0, 'above_18': 36, 'female_count': 30, 'male_count': 6}\n",
"ds000216\n",
"1.0.0/ 1.0.1/\n",
"no participants.tsv file\n",
"ds000217\n",
"1.0.1/ 1.0.1/\n",
"{'participant_count': 41, 'below_18': 0, 'above_18': 41, 'female_count': 23, 'male_count': 18}\n",
"ds000218\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 19, 'below_18': 0, 'above_18': 19, 'female_count': 0, 'male_count': 19}\n",
"ds000219\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 21, 'below_18': 0, 'above_18': 21, 'female_count': 0, 'male_count': 21}\n",
"ds000220\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 13, 'male_count': 13}\n",
"ds000221\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 318, 'below_18': 0, 'above_18': 316, 'female_count': 129, 'male_count': 189}\n",
"ds000222\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 79, 'below_18': 0, 'above_18': 79, 'female_count': 41, 'male_count': 38}\n",
"ds000223\n",
"1.0.0/ 2.0.0/\n",
"Index(['participant_id'], dtype='object')\n",
"{'participant_count': 19}\n",
"ds000224\n",
"1.0.0/ 1.0.2/\n",
"{'participant_count': 10, 'below_18': 0, 'above_18': 10, 'female_count': 5, 'male_count': 5}\n",
"ds000228\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 155, 'below_18': 122, 'above_18': 33, 'female_count': 84, 'male_count': 71}\n",
"ds000229\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 15, 'below_18': 0, 'above_18': 15, 'female_count': 7, 'male_count': 8}\n",
"ds000231\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 9, 'below_18': 0, 'above_18': 9, 'female_count': 7, 'male_count': 2}\n",
"ds000232\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 10, 'below_18': 0, 'above_18': 10, 'female_count': 7, 'male_count': 3}\n",
"ds000233\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 12, 'below_18': 0, 'above_18': 12, 'female_count': 7, 'male_count': 5}\n",
"ds000234\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 5, 'below_18': 0, 'above_18': 5, 'female_count': 1, 'male_count': 4}\n",
"ds000235\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 4, 'below_18': 0, 'above_18': 4, 'female_count': 2, 'male_count': 2}\n",
"ds000236\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 18, 'below_18': 0, 'above_18': 18, 'female_count': 12, 'male_count': 6}\n",
"ds000237\n",
"no participants.tsv file\n",
"ds000238\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 35, 'below_18': 0, 'above_18': 35, 'female_count': 17, 'male_count': 18}\n",
"ds000239\n",
"1.0.0/ 1.0.1/\n",
"{'participant_count': 3, 'below_18': 0, 'above_18': 3, 'female_count': 1, 'male_count': 2}\n",
"ds000240\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 63, 'below_18': 0, 'above_18': 63, 'female_count': 35, 'male_count': 28}\n",
"ds000241\n",
"1.0.0/ 1.0.0/\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"no participants.tsv file\n",
"ds000243\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 120, 'below_18': 0, 'above_18': 120, 'female_count': 61, 'male_count': 59}\n",
"ds000244\n",
"{'participant_count': 12, 'below_18': 0, 'above_18': 12, 'female_count': 9, 'male_count': 3}\n",
"ds000245\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 45, 'below_18': 0, 'above_18': 45, 'female_count': 25, 'male_count': 20}\n",
"ds000246\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 1, 'below_18': 0, 'above_18': 1, 'female_count': 0, 'male_count': 1}\n",
"ds000247\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 6, 'below_18': 0, 'above_18': 5, 'female_count': 2, 'male_count': 3}\n",
"ds000248\n",
"no participants.tsv file\n",
"ds000249\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 13, 'male_count': 13}\n",
"ds000253\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 20, 'below_18': 0, 'above_18': 20, 'female_count': 20, 'male_count': 0}\n",
"ds000254\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 7, 'male_count': 6}\n",
"ds000255\n",
"1.0.0/ 1.0.0/\n",
"no participants.tsv file\n",
"ds000256\n",
"1.0.0/ 1.0.0/\n",
"{'participant_count': 24, 'below_18': 24, 'above_18': 0, 'female_count': 10, 'male_count': 14}\n",
"ds000258\n",
"1.0.0/ 1.0.0/\n",
"no participants.tsv file\n"
]
}
],
"source": [
"def sanitize(x):\n",
" if pd.isnull(x):\n",
" return None\n",
" if '-' in x:\n",
" return float(x.split('-')[0])\n",
" elif '+' in x:\n",
" return (float(x.replace('+','')))\n",
" if x == 'Young':\n",
" return 20\n",
" elif x == 'Old':\n",
" return 65\n",
" else:\n",
" return float(x)\n",
"\n",
"swap_age_sex = ['ds000119', 'ds000121']\n",
"no_header = ['ds000109']\n",
"skip = ['ds000006A', 'ds000113', 'ds000149', 'ds000109']\n",
"ds_meta = {}\n",
"for k in s3.list_objects(Bucket = 'openneuro', Prefix='', Delimiter='/')['CommonPrefixes']:\n",
" accession_number = k['Prefix'][0:-1]\n",
" if accession_number in skip:\n",
" continue\n",
" print(accession_number)\n",
" ds_meta[accession_number] = {}\n",
" valid_versions = [k for k in s3.list_objects(Bucket = 'openneuro', Prefix=k['Prefix'], Delimiter='/')['CommonPrefixes'] if '_R' in k['Prefix']]\n",
" if accession_number == 'ds000202':\n",
" Key_prefix = 'ds000202/ds000202_R1.0.2/uncompressed/ds202_R1.0.2/'\n",
" elif accession_number == 'ds000248':\n",
" Key_prefix = 'ds000248/ds000248_R1.0.0/ds000248_R1.0.0/uncompressed/'\n",
" elif accession_number == 'ds000051':\n",
" Key_prefix = 'ds000051/ds000051_R2.0.2/uncompressed/ds000051_R2.0.2/'\n",
" elif valid_versions:\n",
" lv = [LooseVersion(v['Prefix'].split('_R')[-1]) for v in valid_versions]\n",
" lv.sort()\n",
" print(str(lv[0]), str(lv[-1]))\n",
" Key_prefix=accession_number + '/' + accession_number +'_R' + str(lv[-1]) + 'uncompressed/'\n",
" else:\n",
" Key_prefix=accession_number + '/uncompressed/'\n",
" \n",
" participant_count = len(s3.list_objects(Bucket='openneuro', Prefix=Key_prefix + 'sub-', Delimiter=\"/\")['CommonPrefixes'])\n",
" ds_meta[accession_number]['participant_count'] = participant_count\n",
" try:\n",
" obj = s3.get_object(Bucket='openneuro', Key=Key_prefix + 'participants.tsv')\n",
" except:\n",
" print(\"no participants.tsv file\")\n",
" else:\n",
" no_age = False\n",
" no_sex = False\n",
" if accession_number in no_header:\n",
" args = {'names': ['participant_id', 'sex', 'age'], 'header':None, 'delimiter':r\"\\s+\"}\n",
" else:\n",
" args = {'delimiter':'\\t'}\n",
"\n",
" df = pd.read_csv(io.BytesIO(obj['Body'].read()), na_values=['n/a', 'D'], **args)\n",
" if accession_number in swap_age_sex:\n",
" #print(df.sex)\n",
" age = df.sex\n",
" elif 'age' in df.columns:\n",
" #print(df.age)\n",
" age = df.age\n",
" elif 'Age' in df.columns:\n",
" #print(df.Age)\n",
" age = df.Age\n",
" elif 'ageAtFirstScanYears' in df.columns:\n",
" age = df.ageAtFirstScanYears\n",
" elif 'age at baseline ' in df.columns:\n",
" age = df['age at baseline ']\n",
" elif 'AgeGroup' in df.columns:\n",
" age = df.AgeGroup\n",
" elif 'age (5-year bins)' in df.columns:\n",
" age = df['age (5-year bins)']\n",
" elif 'ScanAge' in df.columns:\n",
" age = df.ScanAge\n",
" elif 'Age (years)' in df.columns:\n",
" age = df['Age (years)']\n",
" else:\n",
" no_age = True\n",
"\n",
" if not no_age:\n",
" if age.dtype == 'O':\n",
" age = age.apply(sanitize)\n",
" ds_meta[accession_number]['below_18'] = (age<18).sum()\n",
" ds_meta[accession_number]['above_18'] = (age>=18).sum()\n",
"\n",
" if accession_number in swap_age_sex:\n",
" #print(df.sex)\n",
" sex = df.age\n",
" elif 'sex' in df.columns:\n",
" #print(df.age)\n",
" sex = df.sex\n",
" elif 'Sex' in df.columns:\n",
" #print(df.age)\n",
" sex = df.Sex\n",
" elif 'gender' in df.columns:\n",
" sex = df.gender\n",
" elif 'Gender' in df.columns:\n",
" sex = df.Gender\n",
" elif 'jsex' in df.columns:\n",
" sex = df.jsex\n",
" elif 'gender_F' in df.columns:\n",
" sex = df.gender_F\n",
" else:\n",
" no_sex = True\n",
"\n",
" if accession_number == 'ds000203':\n",
" sex = sex.apply(lambda x: {1:'M', 2:'F'}[x])\n",
" if accession_number == 'ds000249':\n",
" sex = sex.apply(lambda x: {1:'F', 0:'M'}[x]) \n",
" else:\n",
" sex = sex.apply(lambda x: {'M':'M', 'F':'F', 'M,':'M', 'm':'M', 'f':'F', \n",
" 'MALE':'M', 'FEMALE':'F', 'male':'M', 'female':'F',\n",
" 'Male':'M', 'Female':'F', 'm':'M', 'f':'F',\n",
" 'M ':'M', 'F ':'F', np.nan: np.nan}[x])\n",
"\n",
" if not no_age:\n",
" if set(sex.unique()).intersection(set(['M', 'F', np.nan])) != set(sex.unique()):\n",
" print(sex.unique())\n",
" break\n",
" ds_meta[accession_number]['female_count'] = (sex == 'F').sum()\n",
" ds_meta[accession_number]['male_count'] = (sex == 'M').sum()\n",
"\n",
" if no_age or no_sex:\n",
" print(df.columns)\n",
"\n",
" print(ds_meta[accession_number])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>participant_count</th>\n",
" <th>below_18</th>\n",
" <th>above_18</th>\n",
" <th>female_count</th>\n",
" <th>male_count</th>\n",
" <th>missing_age</th>\n",
" <th>missing_sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ds000001</th>\n",
" <td>16</td>\n",
" <td>0.0</td>\n",
" <td>16.0</td>\n",
" <td>10.0</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000002</th>\n",
" <td>17</td>\n",
" <td>0.0</td>\n",
" <td>17.0</td>\n",
" <td>10.0</td>\n",
" <td>7.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000003</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>13.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000005</th>\n",
" <td>16</td>\n",
" <td>0.0</td>\n",
" <td>16.0</td>\n",
" <td>8.0</td>\n",
" <td>8.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000006</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>14.0</td>\n",
" <td>9.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000007</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>20.0</td>\n",
" <td>9.0</td>\n",
" <td>11.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000008</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>14.0</td>\n",
" <td>4.0</td>\n",
" <td>10.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000009</th>\n",
" <td>24</td>\n",
" <td>0.0</td>\n",
" <td>24.0</td>\n",
" <td>10.0</td>\n",
" <td>14.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000011</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>14.0</td>\n",
" <td>9.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000017</th>\n",
" <td>8</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000030</th>\n",
" <td>272</td>\n",
" <td>0.0</td>\n",
" <td>272.0</td>\n",
" <td>117.0</td>\n",
" <td>155.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000031</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000051</th>\n",
" <td>13</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000052</th>\n",
" <td>13</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000053</th>\n",
" <td>59</td>\n",
" <td>0.0</td>\n",
" <td>59.0</td>\n",
" <td>31.0</td>\n",
" <td>28.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000101</th>\n",
" <td>21</td>\n",
" <td>0.0</td>\n",
" <td>21.0</td>\n",
" <td>9.0</td>\n",
" <td>12.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000102</th>\n",
" <td>26</td>\n",
" <td>0.0</td>\n",
" <td>26.0</td>\n",
" <td>1.0</td>\n",
" <td>16.0</td>\n",
" <td>0.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000105</th>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000107</th>\n",
" <td>49</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>49.0</td>\n",
" <td>49.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000108</th>\n",
" <td>34</td>\n",
" <td>0.0</td>\n",
" <td>34.0</td>\n",
" <td>17.0</td>\n",
" <td>17.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000110</th>\n",
" <td>18</td>\n",
" <td>0.0</td>\n",
" <td>18.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000113b</th>\n",
" <td>16</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000113c</th>\n",
" <td>7</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000113d</th>\n",
" <td>30</td>\n",
" <td>2.0</td>\n",
" <td>28.0</td>\n",
" <td>16.0</td>\n",
" <td>14.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000114</th>\n",
" <td>10</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000115</th>\n",
" <td>99</td>\n",
" <td>12.0</td>\n",
" <td>87.0</td>\n",
" <td>40.0</td>\n",
" <td>59.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000116</th>\n",
" <td>17</td>\n",
" <td>0.0</td>\n",
" <td>17.0</td>\n",
" <td>6.0</td>\n",
" <td>11.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000117</th>\n",
" <td>16</td>\n",
" <td>0.0</td>\n",
" <td>16.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000119</th>\n",
" <td>73</td>\n",
" <td>46.0</td>\n",
" <td>27.0</td>\n",
" <td>43.0</td>\n",
" <td>30.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000120</th>\n",
" <td>26</td>\n",
" <td>16.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>11.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000220</th>\n",
" <td>26</td>\n",
" <td>0.0</td>\n",
" <td>26.0</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000221</th>\n",
" <td>318</td>\n",
" <td>0.0</td>\n",
" <td>316.0</td>\n",
" <td>129.0</td>\n",
" <td>189.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000222</th>\n",
" <td>79</td>\n",
" <td>0.0</td>\n",
" <td>79.0</td>\n",
" <td>41.0</td>\n",
" <td>38.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000223</th>\n",
" <td>19</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>19.0</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000224</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000228</th>\n",
" <td>155</td>\n",
" <td>122.0</td>\n",
" <td>33.0</td>\n",
" <td>84.0</td>\n",
" <td>71.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000229</th>\n",
" <td>15</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000231</th>\n",
" <td>9</td>\n",
" <td>0.0</td>\n",
" <td>9.0</td>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000232</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>10.0</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000233</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>7.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000234</th>\n",
" <td>5</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000235</th>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000236</th>\n",
" <td>18</td>\n",
" <td>0.0</td>\n",
" <td>18.0</td>\n",
" <td>12.0</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000237</th>\n",
" <td>13</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000238</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>35.0</td>\n",
" <td>17.0</td>\n",
" <td>18.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000239</th>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000240</th>\n",
" <td>63</td>\n",
" <td>0.0</td>\n",
" <td>63.0</td>\n",
" <td>35.0</td>\n",
" <td>28.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000241</th>\n",
" <td>12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000243</th>\n",
" <td>120</td>\n",
" <td>0.0</td>\n",
" <td>120.0</td>\n",
" <td>61.0</td>\n",
" <td>59.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000244</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000245</th>\n",
" <td>45</td>\n",
" <td>0.0</td>\n",
" <td>45.0</td>\n",
" <td>25.0</td>\n",
" <td>20.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000246</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000247</th>\n",
" <td>6</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000248</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000249</th>\n",
" <td>26</td>\n",
" <td>0.0</td>\n",
" <td>26.0</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000253</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>20.0</td>\n",
" <td>20.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000254</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>13.0</td>\n",
" <td>7.0</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000255</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000256</th>\n",
" <td>24</td>\n",
" <td>24.0</td>\n",
" <td>0.0</td>\n",
" <td>10.0</td>\n",
" <td>14.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000258</th>\n",
" <td>89</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>89.0</td>\n",
" <td>89.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>92 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" participant_count below_18 above_18 female_count male_count \\\n",
"ds000001 16 0.0 16.0 10.0 6.0 \n",
"ds000002 17 0.0 17.0 10.0 7.0 \n",
"ds000003 13 0.0 13.0 5.0 8.0 \n",
"ds000005 16 0.0 16.0 8.0 8.0 \n",
"ds000006 14 0.0 14.0 9.0 5.0 \n",
"ds000007 20 0.0 20.0 9.0 11.0 \n",
"ds000008 14 0.0 14.0 4.0 10.0 \n",
"ds000009 24 0.0 24.0 10.0 14.0 \n",
"ds000011 14 0.0 14.0 9.0 5.0 \n",
"ds000017 8 0.0 5.0 1.0 4.0 \n",
"ds000030 272 0.0 272.0 117.0 155.0 \n",
"ds000031 1 NaN NaN NaN NaN \n",
"ds000051 13 NaN NaN NaN NaN \n",
"ds000052 13 NaN NaN NaN NaN \n",
"ds000053 59 0.0 59.0 31.0 28.0 \n",
"ds000101 21 0.0 21.0 9.0 12.0 \n",
"ds000102 26 0.0 26.0 1.0 16.0 \n",
"ds000105 6 NaN NaN NaN NaN \n",
"ds000107 49 NaN NaN NaN NaN \n",
"ds000108 34 0.0 34.0 17.0 17.0 \n",
"ds000110 18 0.0 18.0 9.0 9.0 \n",
"ds000113b 16 NaN NaN NaN NaN \n",
"ds000113c 7 0.0 7.0 2.0 5.0 \n",
"ds000113d 30 2.0 28.0 16.0 14.0 \n",
"ds000114 10 NaN NaN NaN NaN \n",
"ds000115 99 12.0 87.0 40.0 59.0 \n",
"ds000116 17 0.0 17.0 6.0 11.0 \n",
"ds000117 16 0.0 16.0 7.0 9.0 \n",
"ds000119 73 46.0 27.0 43.0 30.0 \n",
"ds000120 26 16.0 10.0 15.0 11.0 \n",
"... ... ... ... ... ... \n",
"ds000220 26 0.0 26.0 13.0 13.0 \n",
"ds000221 318 0.0 316.0 129.0 189.0 \n",
"ds000222 79 0.0 79.0 41.0 38.0 \n",
"ds000223 19 NaN NaN NaN NaN \n",
"ds000224 10 0.0 10.0 5.0 5.0 \n",
"ds000228 155 122.0 33.0 84.0 71.0 \n",
"ds000229 15 0.0 15.0 7.0 8.0 \n",
"ds000231 9 0.0 9.0 7.0 2.0 \n",
"ds000232 10 0.0 10.0 7.0 3.0 \n",
"ds000233 12 0.0 12.0 7.0 5.0 \n",
"ds000234 5 0.0 5.0 1.0 4.0 \n",
"ds000235 4 0.0 4.0 2.0 2.0 \n",
"ds000236 18 0.0 18.0 12.0 6.0 \n",
"ds000237 13 NaN NaN NaN NaN \n",
"ds000238 35 0.0 35.0 17.0 18.0 \n",
"ds000239 3 0.0 3.0 1.0 2.0 \n",
"ds000240 63 0.0 63.0 35.0 28.0 \n",
"ds000241 12 NaN NaN NaN NaN \n",
"ds000243 120 0.0 120.0 61.0 59.0 \n",
"ds000244 12 0.0 12.0 9.0 3.0 \n",
"ds000245 45 0.0 45.0 25.0 20.0 \n",
"ds000246 1 0.0 1.0 0.0 1.0 \n",
"ds000247 6 0.0 5.0 2.0 3.0 \n",
"ds000248 2 NaN NaN NaN NaN \n",
"ds000249 26 0.0 26.0 13.0 13.0 \n",
"ds000253 20 0.0 20.0 20.0 0.0 \n",
"ds000254 13 0.0 13.0 7.0 6.0 \n",
"ds000255 2 NaN NaN NaN NaN \n",
"ds000256 24 24.0 0.0 10.0 14.0 \n",
"ds000258 89 NaN NaN NaN NaN \n",
"\n",
" missing_age missing_sex \n",
"ds000001 0.0 0.0 \n",
"ds000002 0.0 0.0 \n",
"ds000003 0.0 0.0 \n",
"ds000005 0.0 0.0 \n",
"ds000006 0.0 0.0 \n",
"ds000007 0.0 0.0 \n",
"ds000008 0.0 0.0 \n",
"ds000009 0.0 0.0 \n",
"ds000011 0.0 0.0 \n",
"ds000017 3.0 3.0 \n",
"ds000030 0.0 0.0 \n",
"ds000031 1.0 1.0 \n",
"ds000051 13.0 13.0 \n",
"ds000052 13.0 13.0 \n",
"ds000053 0.0 0.0 \n",
"ds000101 0.0 0.0 \n",
"ds000102 0.0 9.0 \n",
"ds000105 6.0 6.0 \n",
"ds000107 49.0 49.0 \n",
"ds000108 0.0 0.0 \n",
"ds000110 0.0 0.0 \n",
"ds000113b 16.0 16.0 \n",
"ds000113c 0.0 0.0 \n",
"ds000113d 0.0 0.0 \n",
"ds000114 10.0 10.0 \n",
"ds000115 0.0 0.0 \n",
"ds000116 0.0 0.0 \n",
"ds000117 0.0 0.0 \n",
"ds000119 0.0 0.0 \n",
"ds000120 0.0 0.0 \n",
"... ... ... \n",
"ds000220 0.0 0.0 \n",
"ds000221 2.0 0.0 \n",
"ds000222 0.0 0.0 \n",
"ds000223 19.0 19.0 \n",
"ds000224 0.0 0.0 \n",
"ds000228 0.0 0.0 \n",
"ds000229 0.0 0.0 \n",
"ds000231 0.0 0.0 \n",
"ds000232 0.0 0.0 \n",
"ds000233 0.0 0.0 \n",
"ds000234 0.0 0.0 \n",
"ds000235 0.0 0.0 \n",
"ds000236 0.0 0.0 \n",
"ds000237 13.0 13.0 \n",
"ds000238 0.0 0.0 \n",
"ds000239 0.0 0.0 \n",
"ds000240 0.0 0.0 \n",
"ds000241 12.0 12.0 \n",
"ds000243 0.0 0.0 \n",
"ds000244 0.0 0.0 \n",
"ds000245 0.0 0.0 \n",
"ds000246 0.0 0.0 \n",
"ds000247 1.0 1.0 \n",
"ds000248 2.0 2.0 \n",
"ds000249 0.0 0.0 \n",
"ds000253 0.0 0.0 \n",
"ds000254 0.0 0.0 \n",
"ds000255 2.0 2.0 \n",
"ds000256 0.0 0.0 \n",
"ds000258 89.0 89.0 \n",
"\n",
"[92 rows x 7 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df = pd.DataFrame.from_dict(ds_meta, orient='index')\n",
"openfmri_df['missing_age'] = openfmri_df.participant_count - (openfmri_df.below_18.fillna(0) + openfmri_df.above_18.fillna(0))\n",
"openfmri_df['missing_sex'] = openfmri_df.participant_count - (openfmri_df.female_count.fillna(0) + openfmri_df.male_count.fillna(0))\n",
"openfmri_df"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>participant_count</th>\n",
" <th>below_18</th>\n",
" <th>above_18</th>\n",
" <th>female_count</th>\n",
" <th>male_count</th>\n",
" <th>missing_age</th>\n",
" <th>missing_sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ds000017</th>\n",
" <td>8</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000031</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000051</th>\n",
" <td>13</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000052</th>\n",
" <td>13</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000105</th>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000107</th>\n",
" <td>49</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>49.0</td>\n",
" <td>49.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000113b</th>\n",
" <td>16</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000114</th>\n",
" <td>10</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000133</th>\n",
" <td>26</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>26.0</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000158</th>\n",
" <td>217</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>217.0</td>\n",
" <td>217.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000164</th>\n",
" <td>28</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>28.0</td>\n",
" <td>28.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000168</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>25.0</td>\n",
" <td>15.0</td>\n",
" <td>20.0</td>\n",
" <td>10.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000204</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000205</th>\n",
" <td>11</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>11.0</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000206</th>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000213</th>\n",
" <td>26</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>26.0</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000216</th>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000221</th>\n",
" <td>318</td>\n",
" <td>0.0</td>\n",
" <td>316.0</td>\n",
" <td>129.0</td>\n",
" <td>189.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000223</th>\n",
" <td>19</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>19.0</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000237</th>\n",
" <td>13</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000241</th>\n",
" <td>12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000247</th>\n",
" <td>6</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000248</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000255</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ds000258</th>\n",
" <td>89</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>89.0</td>\n",
" <td>89.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" participant_count below_18 above_18 female_count male_count \\\n",
"ds000017 8 0.0 5.0 1.0 4.0 \n",
"ds000031 1 NaN NaN NaN NaN \n",
"ds000051 13 NaN NaN NaN NaN \n",
"ds000052 13 NaN NaN NaN NaN \n",
"ds000105 6 NaN NaN NaN NaN \n",
"ds000107 49 NaN NaN NaN NaN \n",
"ds000113b 16 NaN NaN NaN NaN \n",
"ds000114 10 NaN NaN NaN NaN \n",
"ds000133 26 NaN NaN NaN NaN \n",
"ds000158 217 NaN NaN NaN NaN \n",
"ds000164 28 NaN NaN NaN NaN \n",
"ds000168 35 0.0 25.0 15.0 20.0 \n",
"ds000204 1 NaN NaN NaN NaN \n",
"ds000205 11 NaN NaN NaN NaN \n",
"ds000206 6 NaN NaN NaN NaN \n",
"ds000213 26 NaN NaN NaN NaN \n",
"ds000216 7 NaN NaN NaN NaN \n",
"ds000221 318 0.0 316.0 129.0 189.0 \n",
"ds000223 19 NaN NaN NaN NaN \n",
"ds000237 13 NaN NaN NaN NaN \n",
"ds000241 12 NaN NaN NaN NaN \n",
"ds000247 6 0.0 5.0 2.0 3.0 \n",
"ds000248 2 NaN NaN NaN NaN \n",
"ds000255 2 NaN NaN NaN NaN \n",
"ds000258 89 NaN NaN NaN NaN \n",
"\n",
" missing_age missing_sex \n",
"ds000017 3.0 3.0 \n",
"ds000031 1.0 1.0 \n",
"ds000051 13.0 13.0 \n",
"ds000052 13.0 13.0 \n",
"ds000105 6.0 6.0 \n",
"ds000107 49.0 49.0 \n",
"ds000113b 16.0 16.0 \n",
"ds000114 10.0 10.0 \n",
"ds000133 26.0 26.0 \n",
"ds000158 217.0 217.0 \n",
"ds000164 28.0 28.0 \n",
"ds000168 10.0 0.0 \n",
"ds000204 1.0 1.0 \n",
"ds000205 11.0 11.0 \n",
"ds000206 6.0 6.0 \n",
"ds000213 26.0 26.0 \n",
"ds000216 7.0 7.0 \n",
"ds000221 2.0 0.0 \n",
"ds000223 19.0 19.0 \n",
"ds000237 13.0 13.0 \n",
"ds000241 12.0 12.0 \n",
"ds000247 1.0 1.0 \n",
"ds000248 2.0 2.0 \n",
"ds000255 2.0 2.0 \n",
"ds000258 89.0 89.0 "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df[openfmri_df.missing_age > 0]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3307"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df.participant_count.sum()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"583.0"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df.missing_age.sum()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"580.0"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df.missing_sex.sum()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"293.0"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df.below_18.sum()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2431.0"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df.above_18.sum()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1379.0"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df.female_count.sum()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1348.0"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"openfmri_df.male_count.sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment