Created
February 13, 2018 00:27
-
-
Save chrisgorgo/6ca1357f8077f7243d6605faeb1cdfb7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import boto3\n", | |
"from botocore import UNSIGNED\n", | |
"from botocore.client import Config\n", | |
"from distutils.version import LooseVersion, StrictVersion\n", | |
"import pandas as pd\n", | |
"import io\n", | |
"import numpy as np\n", | |
"s3 = boto3.client(\"s3\", config=Config(signature_version=UNSIGNED))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"ds000001\n", | |
"1.0.0/ 2.0.4/\n", | |
"{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 10, 'male_count': 6}\n", | |
"ds000002\n", | |
"1.0.0/ 2.0.5/\n", | |
"{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 10, 'male_count': 7}\n", | |
"ds000003\n", | |
"1.0.0/ 2.0.2/\n", | |
"{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 5, 'male_count': 8}\n", | |
"ds000005\n", | |
"1.0.0/ 2.0.1/\n", | |
"{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 8, 'male_count': 8}\n", | |
"ds000006\n", | |
"1.0.0/ 2.0.1/\n", | |
"{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 9, 'male_count': 5}\n", | |
"ds000007\n", | |
"1.0.1/ 2.0.1/\n", | |
"{'participant_count': 20, 'below_18': 0, 'above_18': 20, 'female_count': 9, 'male_count': 11}\n", | |
"ds000008\n", | |
"1.0.1/ 2.0.0/\n", | |
"{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 4, 'male_count': 10}\n", | |
"ds000009\n", | |
"1.0.0/ 2.0.3/\n", | |
"{'participant_count': 24, 'below_18': 0, 'above_18': 24, 'female_count': 10, 'male_count': 14}\n", | |
"ds000011\n", | |
"1.0.0/ 2.0.1/\n", | |
"{'participant_count': 14, 'below_18': 0, 'above_18': 14, 'female_count': 9, 'male_count': 5}\n", | |
"ds000017\n", | |
"1.0.0/ 2.0.1/\n", | |
"{'participant_count': 8, 'below_18': 0, 'above_18': 5, 'female_count': 1, 'male_count': 4}\n", | |
"ds000030\n", | |
"1.0.2/ 1.0.5/\n", | |
"{'participant_count': 272, 'below_18': 0, 'above_18': 272, 'female_count': 117, 'male_count': 155}\n", | |
"ds000031\n", | |
"1.0.2/ 1.0.4/\n", | |
"no participants.tsv file\n", | |
"ds000051\n", | |
"no participants.tsv file\n", | |
"ds000052\n", | |
"1.0.0/ 2.0.0/\n", | |
"no participants.tsv file\n", | |
"ds000053\n", | |
"1.0.0/ 1.0.2/\n", | |
"{'participant_count': 59, 'below_18': 0, 'above_18': 59, 'female_count': 31, 'male_count': 28}\n", | |
"ds000101\n", | |
"1.0.0/ 2.0.0/\n", | |
"{'participant_count': 21, 'below_18': 0, 'above_18': 21, 'female_count': 9, 'male_count': 12}\n", | |
"ds000102\n", | |
"1.0.0/ 2.0.0/\n", | |
"{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 1, 'male_count': 16}\n", | |
"ds000105\n", | |
"1.0.1/ 2.0.2/\n", | |
"no participants.tsv file\n", | |
"ds000107\n", | |
"1.0.0/ 2.0.2/\n", | |
"no participants.tsv file\n", | |
"ds000108\n", | |
"1.0.1/ 2.0.0/\n", | |
"{'participant_count': 34, 'below_18': 0, 'above_18': 34, 'female_count': 17, 'male_count': 17}\n", | |
"ds000110\n", | |
"1.0.0/ 2.0.1/\n", | |
"{'participant_count': 18, 'below_18': 0, 'above_18': 18, 'female_count': 9, 'male_count': 9}\n", | |
"ds000113b\n", | |
"2.0.0/ 2.0.1/\n", | |
"no participants.tsv file\n", | |
"ds000113c\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 7, 'below_18': 0, 'above_18': 7, 'female_count': 2, 'male_count': 5}\n", | |
"ds000113d\n", | |
"1.0.0/ 2.0.0/\n", | |
"{'participant_count': 30, 'below_18': 2, 'above_18': 28, 'female_count': 16, 'male_count': 14}\n", | |
"ds000114\n", | |
"2.0.0/ 2.0.1/\n", | |
"no participants.tsv file\n", | |
"ds000115\n", | |
"1.0.2/ 2.0.0/\n", | |
"{'participant_count': 99, 'below_18': 12, 'above_18': 87, 'female_count': 40, 'male_count': 59}\n", | |
"ds000116\n", | |
"1.0.0/ 2.0.0/\n", | |
"{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 6, 'male_count': 11}\n", | |
"ds000117\n", | |
"0.1.0/ 1.0.0/\n", | |
"{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 7, 'male_count': 9}\n", | |
"ds000119\n", | |
"1.0.0/ 2.0.1/\n", | |
"{'participant_count': 73, 'below_18': 46, 'above_18': 27, 'female_count': 43, 'male_count': 30}\n", | |
"ds000120\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 26, 'below_18': 16, 'above_18': 10, 'female_count': 15, 'male_count': 11}\n", | |
"ds000121\n", | |
"1.0.0/ 2.0.2/\n", | |
"{'participant_count': 28, 'below_18': 12, 'above_18': 16, 'female_count': 16, 'male_count': 12}\n", | |
"ds000122\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 17, 'below_18': 0, 'above_18': 17, 'female_count': 8, 'male_count': 9}\n", | |
"ds000133\n", | |
"1.0.0/ 1.0.0/\n", | |
"Index(['participant_id', 'study_group', 'handedness', 'gender'], dtype='object')\n", | |
"{'participant_count': 26}\n", | |
"ds000138\n", | |
"1.0.0/ 2.0.0/\n", | |
"{'participant_count': 16, 'below_18': 0, 'above_18': 16, 'female_count': 7, 'male_count': 9}\n", | |
"ds000140\n", | |
"2.0.0/ 2.0.0/\n", | |
"{'participant_count': 33, 'below_18': 0, 'above_18': 33, 'female_count': 22, 'male_count': 11}\n", | |
"ds000144\n", | |
"{'participant_count': 45, 'below_18': 45, 'above_18': 0, 'female_count': 29, 'male_count': 16}\n", | |
"ds000148\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 49, 'below_18': 0, 'above_18': 49, 'female_count': 26, 'male_count': 23}\n", | |
"ds000157\n", | |
"1.0.0/ 1.0.3/\n", | |
"{'participant_count': 30, 'below_18': 0, 'above_18': 30, 'female_count': 30, 'male_count': 0}\n", | |
"ds000158\n", | |
"1.0.0/ 1.0.1/\n", | |
"no participants.tsv file\n", | |
"ds000164\n", | |
"1.0.0/ 1.0.1/\n", | |
"no participants.tsv file\n", | |
"ds000168\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 35, 'below_18': 0, 'above_18': 25, 'female_count': 15, 'male_count': 20}\n", | |
"ds000170\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 15, 'below_18': 0, 'above_18': 15, 'female_count': 3, 'male_count': 12}\n", | |
"ds000171\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 39, 'below_18': 0, 'above_18': 39, 'female_count': 22, 'male_count': 17}\n", | |
"ds000172\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 7, 'male_count': 6}\n", | |
"ds000174\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 42, 'below_18': 2, 'above_18': 40, 'female_count': 13, 'male_count': 29}\n", | |
"ds000177\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 11, 'below_18': 0, 'above_18': 11, 'female_count': 4, 'male_count': 7}\n", | |
"ds000200\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 14, 'below_18': 12, 'above_18': 2, 'female_count': 6, 'male_count': 8}\n", | |
"ds000201\n", | |
"1.0.0/ 1.0.5/\n", | |
"{'participant_count': 86, 'below_18': 0, 'above_18': 86, 'female_count': 44, 'male_count': 42}\n", | |
"ds000202\n", | |
"{'participant_count': 95, 'below_18': 0, 'above_18': 95, 'female_count': 95, 'male_count': 0}\n", | |
"ds000203\n", | |
"1.0.0/ 1.0.2/\n", | |
"{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 10, 'male_count': 16}\n", | |
"ds000204\n", | |
"1.0.0/ 1.0.2/\n", | |
"no participants.tsv file\n", | |
"ds000205\n", | |
"1.0.0/ 1.0.0/\n", | |
"Index(['participant_id', 'handedness'], dtype='object')\n", | |
"{'participant_count': 11}\n", | |
"ds000206\n", | |
"1.0.0/ 1.0.0/\n", | |
"no participants.tsv file\n", | |
"ds000208\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 76, 'below_18': 0, 'above_18': 76, 'female_count': 40, 'male_count': 36}\n", | |
"ds000210\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 31, 'below_18': 0, 'above_18': 31, 'female_count': 16, 'male_count': 15}\n", | |
"ds000212\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 39, 'below_18': 0, 'above_18': 39, 'female_count': 10, 'male_count': 29}\n", | |
"ds000213\n", | |
"1.0.0/ 1.0.2/\n", | |
"Index(['participant_id', 'gender', 'group'], dtype='object')\n", | |
"{'participant_count': 26}\n", | |
"ds000214\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 36, 'below_18': 0, 'above_18': 36, 'female_count': 30, 'male_count': 6}\n", | |
"ds000216\n", | |
"1.0.0/ 1.0.1/\n", | |
"no participants.tsv file\n", | |
"ds000217\n", | |
"1.0.1/ 1.0.1/\n", | |
"{'participant_count': 41, 'below_18': 0, 'above_18': 41, 'female_count': 23, 'male_count': 18}\n", | |
"ds000218\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 19, 'below_18': 0, 'above_18': 19, 'female_count': 0, 'male_count': 19}\n", | |
"ds000219\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 21, 'below_18': 0, 'above_18': 21, 'female_count': 0, 'male_count': 21}\n", | |
"ds000220\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 13, 'male_count': 13}\n", | |
"ds000221\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 318, 'below_18': 0, 'above_18': 316, 'female_count': 129, 'male_count': 189}\n", | |
"ds000222\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 79, 'below_18': 0, 'above_18': 79, 'female_count': 41, 'male_count': 38}\n", | |
"ds000223\n", | |
"1.0.0/ 2.0.0/\n", | |
"Index(['participant_id'], dtype='object')\n", | |
"{'participant_count': 19}\n", | |
"ds000224\n", | |
"1.0.0/ 1.0.2/\n", | |
"{'participant_count': 10, 'below_18': 0, 'above_18': 10, 'female_count': 5, 'male_count': 5}\n", | |
"ds000228\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 155, 'below_18': 122, 'above_18': 33, 'female_count': 84, 'male_count': 71}\n", | |
"ds000229\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 15, 'below_18': 0, 'above_18': 15, 'female_count': 7, 'male_count': 8}\n", | |
"ds000231\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 9, 'below_18': 0, 'above_18': 9, 'female_count': 7, 'male_count': 2}\n", | |
"ds000232\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 10, 'below_18': 0, 'above_18': 10, 'female_count': 7, 'male_count': 3}\n", | |
"ds000233\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 12, 'below_18': 0, 'above_18': 12, 'female_count': 7, 'male_count': 5}\n", | |
"ds000234\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 5, 'below_18': 0, 'above_18': 5, 'female_count': 1, 'male_count': 4}\n", | |
"ds000235\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 4, 'below_18': 0, 'above_18': 4, 'female_count': 2, 'male_count': 2}\n", | |
"ds000236\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 18, 'below_18': 0, 'above_18': 18, 'female_count': 12, 'male_count': 6}\n", | |
"ds000237\n", | |
"no participants.tsv file\n", | |
"ds000238\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 35, 'below_18': 0, 'above_18': 35, 'female_count': 17, 'male_count': 18}\n", | |
"ds000239\n", | |
"1.0.0/ 1.0.1/\n", | |
"{'participant_count': 3, 'below_18': 0, 'above_18': 3, 'female_count': 1, 'male_count': 2}\n", | |
"ds000240\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 63, 'below_18': 0, 'above_18': 63, 'female_count': 35, 'male_count': 28}\n", | |
"ds000241\n", | |
"1.0.0/ 1.0.0/\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"no participants.tsv file\n", | |
"ds000243\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 120, 'below_18': 0, 'above_18': 120, 'female_count': 61, 'male_count': 59}\n", | |
"ds000244\n", | |
"{'participant_count': 12, 'below_18': 0, 'above_18': 12, 'female_count': 9, 'male_count': 3}\n", | |
"ds000245\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 45, 'below_18': 0, 'above_18': 45, 'female_count': 25, 'male_count': 20}\n", | |
"ds000246\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 1, 'below_18': 0, 'above_18': 1, 'female_count': 0, 'male_count': 1}\n", | |
"ds000247\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 6, 'below_18': 0, 'above_18': 5, 'female_count': 2, 'male_count': 3}\n", | |
"ds000248\n", | |
"no participants.tsv file\n", | |
"ds000249\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 26, 'below_18': 0, 'above_18': 26, 'female_count': 13, 'male_count': 13}\n", | |
"ds000253\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 20, 'below_18': 0, 'above_18': 20, 'female_count': 20, 'male_count': 0}\n", | |
"ds000254\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 13, 'below_18': 0, 'above_18': 13, 'female_count': 7, 'male_count': 6}\n", | |
"ds000255\n", | |
"1.0.0/ 1.0.0/\n", | |
"no participants.tsv file\n", | |
"ds000256\n", | |
"1.0.0/ 1.0.0/\n", | |
"{'participant_count': 24, 'below_18': 24, 'above_18': 0, 'female_count': 10, 'male_count': 14}\n", | |
"ds000258\n", | |
"1.0.0/ 1.0.0/\n", | |
"no participants.tsv file\n" | |
] | |
} | |
], | |
"source": [ | |
"def sanitize(x):\n", | |
" if pd.isnull(x):\n", | |
" return None\n", | |
" if '-' in x:\n", | |
" return float(x.split('-')[0])\n", | |
" elif '+' in x:\n", | |
" return (float(x.replace('+','')))\n", | |
" if x == 'Young':\n", | |
" return 20\n", | |
" elif x == 'Old':\n", | |
" return 65\n", | |
" else:\n", | |
" return float(x)\n", | |
"\n", | |
"swap_age_sex = ['ds000119', 'ds000121']\n", | |
"no_header = ['ds000109']\n", | |
"skip = ['ds000006A', 'ds000113', 'ds000149', 'ds000109']\n", | |
"ds_meta = {}\n", | |
"for k in s3.list_objects(Bucket = 'openneuro', Prefix='', Delimiter='/')['CommonPrefixes']:\n", | |
" accession_number = k['Prefix'][0:-1]\n", | |
" if accession_number in skip:\n", | |
" continue\n", | |
" print(accession_number)\n", | |
" ds_meta[accession_number] = {}\n", | |
" valid_versions = [k for k in s3.list_objects(Bucket = 'openneuro', Prefix=k['Prefix'], Delimiter='/')['CommonPrefixes'] if '_R' in k['Prefix']]\n", | |
" if accession_number == 'ds000202':\n", | |
" Key_prefix = 'ds000202/ds000202_R1.0.2/uncompressed/ds202_R1.0.2/'\n", | |
" elif accession_number == 'ds000248':\n", | |
" Key_prefix = 'ds000248/ds000248_R1.0.0/ds000248_R1.0.0/uncompressed/'\n", | |
" elif accession_number == 'ds000051':\n", | |
" Key_prefix = 'ds000051/ds000051_R2.0.2/uncompressed/ds000051_R2.0.2/'\n", | |
" elif valid_versions:\n", | |
" lv = [LooseVersion(v['Prefix'].split('_R')[-1]) for v in valid_versions]\n", | |
" lv.sort()\n", | |
" print(str(lv[0]), str(lv[-1]))\n", | |
" Key_prefix=accession_number + '/' + accession_number +'_R' + str(lv[-1]) + 'uncompressed/'\n", | |
" else:\n", | |
" Key_prefix=accession_number + '/uncompressed/'\n", | |
" \n", | |
" participant_count = len(s3.list_objects(Bucket='openneuro', Prefix=Key_prefix + 'sub-', Delimiter=\"/\")['CommonPrefixes'])\n", | |
" ds_meta[accession_number]['participant_count'] = participant_count\n", | |
" try:\n", | |
" obj = s3.get_object(Bucket='openneuro', Key=Key_prefix + 'participants.tsv')\n", | |
" except:\n", | |
" print(\"no participants.tsv file\")\n", | |
" else:\n", | |
" no_age = False\n", | |
" no_sex = False\n", | |
" if accession_number in no_header:\n", | |
" args = {'names': ['participant_id', 'sex', 'age'], 'header':None, 'delimiter':r\"\\s+\"}\n", | |
" else:\n", | |
" args = {'delimiter':'\\t'}\n", | |
"\n", | |
" df = pd.read_csv(io.BytesIO(obj['Body'].read()), na_values=['n/a', 'D'], **args)\n", | |
" if accession_number in swap_age_sex:\n", | |
" #print(df.sex)\n", | |
" age = df.sex\n", | |
" elif 'age' in df.columns:\n", | |
" #print(df.age)\n", | |
" age = df.age\n", | |
" elif 'Age' in df.columns:\n", | |
" #print(df.Age)\n", | |
" age = df.Age\n", | |
" elif 'ageAtFirstScanYears' in df.columns:\n", | |
" age = df.ageAtFirstScanYears\n", | |
" elif 'age at baseline ' in df.columns:\n", | |
" age = df['age at baseline ']\n", | |
" elif 'AgeGroup' in df.columns:\n", | |
" age = df.AgeGroup\n", | |
" elif 'age (5-year bins)' in df.columns:\n", | |
" age = df['age (5-year bins)']\n", | |
" elif 'ScanAge' in df.columns:\n", | |
" age = df.ScanAge\n", | |
" elif 'Age (years)' in df.columns:\n", | |
" age = df['Age (years)']\n", | |
" else:\n", | |
" no_age = True\n", | |
"\n", | |
" if not no_age:\n", | |
" if age.dtype == 'O':\n", | |
" age = age.apply(sanitize)\n", | |
" ds_meta[accession_number]['below_18'] = (age<18).sum()\n", | |
" ds_meta[accession_number]['above_18'] = (age>=18).sum()\n", | |
"\n", | |
" if accession_number in swap_age_sex:\n", | |
" #print(df.sex)\n", | |
" sex = df.age\n", | |
" elif 'sex' in df.columns:\n", | |
" #print(df.age)\n", | |
" sex = df.sex\n", | |
" elif 'Sex' in df.columns:\n", | |
" #print(df.age)\n", | |
" sex = df.Sex\n", | |
" elif 'gender' in df.columns:\n", | |
" sex = df.gender\n", | |
" elif 'Gender' in df.columns:\n", | |
" sex = df.Gender\n", | |
" elif 'jsex' in df.columns:\n", | |
" sex = df.jsex\n", | |
" elif 'gender_F' in df.columns:\n", | |
" sex = df.gender_F\n", | |
" else:\n", | |
" no_sex = True\n", | |
"\n", | |
" if accession_number == 'ds000203':\n", | |
" sex = sex.apply(lambda x: {1:'M', 2:'F'}[x])\n", | |
" if accession_number == 'ds000249':\n", | |
" sex = sex.apply(lambda x: {1:'F', 0:'M'}[x]) \n", | |
" else:\n", | |
" sex = sex.apply(lambda x: {'M':'M', 'F':'F', 'M,':'M', 'm':'M', 'f':'F', \n", | |
" 'MALE':'M', 'FEMALE':'F', 'male':'M', 'female':'F',\n", | |
" 'Male':'M', 'Female':'F', 'm':'M', 'f':'F',\n", | |
" 'M ':'M', 'F ':'F', np.nan: np.nan}[x])\n", | |
"\n", | |
" if not no_age:\n", | |
" if set(sex.unique()).intersection(set(['M', 'F', np.nan])) != set(sex.unique()):\n", | |
" print(sex.unique())\n", | |
" break\n", | |
" ds_meta[accession_number]['female_count'] = (sex == 'F').sum()\n", | |
" ds_meta[accession_number]['male_count'] = (sex == 'M').sum()\n", | |
"\n", | |
" if no_age or no_sex:\n", | |
" print(df.columns)\n", | |
"\n", | |
" print(ds_meta[accession_number])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>participant_count</th>\n", | |
" <th>below_18</th>\n", | |
" <th>above_18</th>\n", | |
" <th>female_count</th>\n", | |
" <th>male_count</th>\n", | |
" <th>missing_age</th>\n", | |
" <th>missing_sex</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>ds000001</th>\n", | |
" <td>16</td>\n", | |
" <td>0.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000002</th>\n", | |
" <td>17</td>\n", | |
" <td>0.0</td>\n", | |
" <td>17.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000003</th>\n", | |
" <td>13</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>8.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000005</th>\n", | |
" <td>16</td>\n", | |
" <td>0.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>8.0</td>\n", | |
" <td>8.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000006</th>\n", | |
" <td>14</td>\n", | |
" <td>0.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000007</th>\n", | |
" <td>20</td>\n", | |
" <td>0.0</td>\n", | |
" <td>20.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>11.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000008</th>\n", | |
" <td>14</td>\n", | |
" <td>0.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000009</th>\n", | |
" <td>24</td>\n", | |
" <td>0.0</td>\n", | |
" <td>24.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000011</th>\n", | |
" <td>14</td>\n", | |
" <td>0.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000017</th>\n", | |
" <td>8</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000030</th>\n", | |
" <td>272</td>\n", | |
" <td>0.0</td>\n", | |
" <td>272.0</td>\n", | |
" <td>117.0</td>\n", | |
" <td>155.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000031</th>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000051</th>\n", | |
" <td>13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13.0</td>\n", | |
" <td>13.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000052</th>\n", | |
" <td>13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13.0</td>\n", | |
" <td>13.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000053</th>\n", | |
" <td>59</td>\n", | |
" <td>0.0</td>\n", | |
" <td>59.0</td>\n", | |
" <td>31.0</td>\n", | |
" <td>28.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000101</th>\n", | |
" <td>21</td>\n", | |
" <td>0.0</td>\n", | |
" <td>21.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000102</th>\n", | |
" <td>26</td>\n", | |
" <td>0.0</td>\n", | |
" <td>26.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>9.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000105</th>\n", | |
" <td>6</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>6.0</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000107</th>\n", | |
" <td>49</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>49.0</td>\n", | |
" <td>49.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000108</th>\n", | |
" <td>34</td>\n", | |
" <td>0.0</td>\n", | |
" <td>34.0</td>\n", | |
" <td>17.0</td>\n", | |
" <td>17.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000110</th>\n", | |
" <td>18</td>\n", | |
" <td>0.0</td>\n", | |
" <td>18.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000113b</th>\n", | |
" <td>16</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>16.0</td>\n", | |
" <td>16.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000113c</th>\n", | |
" <td>7</td>\n", | |
" <td>0.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000113d</th>\n", | |
" <td>30</td>\n", | |
" <td>2.0</td>\n", | |
" <td>28.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000114</th>\n", | |
" <td>10</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>10.0</td>\n", | |
" <td>10.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000115</th>\n", | |
" <td>99</td>\n", | |
" <td>12.0</td>\n", | |
" <td>87.0</td>\n", | |
" <td>40.0</td>\n", | |
" <td>59.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000116</th>\n", | |
" <td>17</td>\n", | |
" <td>0.0</td>\n", | |
" <td>17.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>11.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000117</th>\n", | |
" <td>16</td>\n", | |
" <td>0.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000119</th>\n", | |
" <td>73</td>\n", | |
" <td>46.0</td>\n", | |
" <td>27.0</td>\n", | |
" <td>43.0</td>\n", | |
" <td>30.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000120</th>\n", | |
" <td>26</td>\n", | |
" <td>16.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>15.0</td>\n", | |
" <td>11.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000220</th>\n", | |
" <td>26</td>\n", | |
" <td>0.0</td>\n", | |
" <td>26.0</td>\n", | |
" <td>13.0</td>\n", | |
" <td>13.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000221</th>\n", | |
" <td>318</td>\n", | |
" <td>0.0</td>\n", | |
" <td>316.0</td>\n", | |
" <td>129.0</td>\n", | |
" <td>189.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000222</th>\n", | |
" <td>79</td>\n", | |
" <td>0.0</td>\n", | |
" <td>79.0</td>\n", | |
" <td>41.0</td>\n", | |
" <td>38.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000223</th>\n", | |
" <td>19</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>19.0</td>\n", | |
" <td>19.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000224</th>\n", | |
" <td>10</td>\n", | |
" <td>0.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000228</th>\n", | |
" <td>155</td>\n", | |
" <td>122.0</td>\n", | |
" <td>33.0</td>\n", | |
" <td>84.0</td>\n", | |
" <td>71.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000229</th>\n", | |
" <td>15</td>\n", | |
" <td>0.0</td>\n", | |
" <td>15.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>8.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000231</th>\n", | |
" <td>9</td>\n", | |
" <td>0.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000232</th>\n", | |
" <td>10</td>\n", | |
" <td>0.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000233</th>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000234</th>\n", | |
" <td>5</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000235</th>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000236</th>\n", | |
" <td>18</td>\n", | |
" <td>0.0</td>\n", | |
" <td>18.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000237</th>\n", | |
" <td>13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13.0</td>\n", | |
" <td>13.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000238</th>\n", | |
" <td>35</td>\n", | |
" <td>0.0</td>\n", | |
" <td>35.0</td>\n", | |
" <td>17.0</td>\n", | |
" <td>18.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000239</th>\n", | |
" <td>3</td>\n", | |
" <td>0.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000240</th>\n", | |
" <td>63</td>\n", | |
" <td>0.0</td>\n", | |
" <td>63.0</td>\n", | |
" <td>35.0</td>\n", | |
" <td>28.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000241</th>\n", | |
" <td>12</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>12.0</td>\n", | |
" <td>12.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000243</th>\n", | |
" <td>120</td>\n", | |
" <td>0.0</td>\n", | |
" <td>120.0</td>\n", | |
" <td>61.0</td>\n", | |
" <td>59.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000244</th>\n", | |
" <td>12</td>\n", | |
" <td>0.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000245</th>\n", | |
" <td>45</td>\n", | |
" <td>0.0</td>\n", | |
" <td>45.0</td>\n", | |
" <td>25.0</td>\n", | |
" <td>20.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000246</th>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000247</th>\n", | |
" <td>6</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000248</th>\n", | |
" <td>2</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000249</th>\n", | |
" <td>26</td>\n", | |
" <td>0.0</td>\n", | |
" <td>26.0</td>\n", | |
" <td>13.0</td>\n", | |
" <td>13.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000253</th>\n", | |
" <td>20</td>\n", | |
" <td>0.0</td>\n", | |
" <td>20.0</td>\n", | |
" <td>20.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000254</th>\n", | |
" <td>13</td>\n", | |
" <td>0.0</td>\n", | |
" <td>13.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000255</th>\n", | |
" <td>2</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000256</th>\n", | |
" <td>24</td>\n", | |
" <td>24.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000258</th>\n", | |
" <td>89</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>89.0</td>\n", | |
" <td>89.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>92 rows × 7 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" participant_count below_18 above_18 female_count male_count \\\n", | |
"ds000001 16 0.0 16.0 10.0 6.0 \n", | |
"ds000002 17 0.0 17.0 10.0 7.0 \n", | |
"ds000003 13 0.0 13.0 5.0 8.0 \n", | |
"ds000005 16 0.0 16.0 8.0 8.0 \n", | |
"ds000006 14 0.0 14.0 9.0 5.0 \n", | |
"ds000007 20 0.0 20.0 9.0 11.0 \n", | |
"ds000008 14 0.0 14.0 4.0 10.0 \n", | |
"ds000009 24 0.0 24.0 10.0 14.0 \n", | |
"ds000011 14 0.0 14.0 9.0 5.0 \n", | |
"ds000017 8 0.0 5.0 1.0 4.0 \n", | |
"ds000030 272 0.0 272.0 117.0 155.0 \n", | |
"ds000031 1 NaN NaN NaN NaN \n", | |
"ds000051 13 NaN NaN NaN NaN \n", | |
"ds000052 13 NaN NaN NaN NaN \n", | |
"ds000053 59 0.0 59.0 31.0 28.0 \n", | |
"ds000101 21 0.0 21.0 9.0 12.0 \n", | |
"ds000102 26 0.0 26.0 1.0 16.0 \n", | |
"ds000105 6 NaN NaN NaN NaN \n", | |
"ds000107 49 NaN NaN NaN NaN \n", | |
"ds000108 34 0.0 34.0 17.0 17.0 \n", | |
"ds000110 18 0.0 18.0 9.0 9.0 \n", | |
"ds000113b 16 NaN NaN NaN NaN \n", | |
"ds000113c 7 0.0 7.0 2.0 5.0 \n", | |
"ds000113d 30 2.0 28.0 16.0 14.0 \n", | |
"ds000114 10 NaN NaN NaN NaN \n", | |
"ds000115 99 12.0 87.0 40.0 59.0 \n", | |
"ds000116 17 0.0 17.0 6.0 11.0 \n", | |
"ds000117 16 0.0 16.0 7.0 9.0 \n", | |
"ds000119 73 46.0 27.0 43.0 30.0 \n", | |
"ds000120 26 16.0 10.0 15.0 11.0 \n", | |
"... ... ... ... ... ... \n", | |
"ds000220 26 0.0 26.0 13.0 13.0 \n", | |
"ds000221 318 0.0 316.0 129.0 189.0 \n", | |
"ds000222 79 0.0 79.0 41.0 38.0 \n", | |
"ds000223 19 NaN NaN NaN NaN \n", | |
"ds000224 10 0.0 10.0 5.0 5.0 \n", | |
"ds000228 155 122.0 33.0 84.0 71.0 \n", | |
"ds000229 15 0.0 15.0 7.0 8.0 \n", | |
"ds000231 9 0.0 9.0 7.0 2.0 \n", | |
"ds000232 10 0.0 10.0 7.0 3.0 \n", | |
"ds000233 12 0.0 12.0 7.0 5.0 \n", | |
"ds000234 5 0.0 5.0 1.0 4.0 \n", | |
"ds000235 4 0.0 4.0 2.0 2.0 \n", | |
"ds000236 18 0.0 18.0 12.0 6.0 \n", | |
"ds000237 13 NaN NaN NaN NaN \n", | |
"ds000238 35 0.0 35.0 17.0 18.0 \n", | |
"ds000239 3 0.0 3.0 1.0 2.0 \n", | |
"ds000240 63 0.0 63.0 35.0 28.0 \n", | |
"ds000241 12 NaN NaN NaN NaN \n", | |
"ds000243 120 0.0 120.0 61.0 59.0 \n", | |
"ds000244 12 0.0 12.0 9.0 3.0 \n", | |
"ds000245 45 0.0 45.0 25.0 20.0 \n", | |
"ds000246 1 0.0 1.0 0.0 1.0 \n", | |
"ds000247 6 0.0 5.0 2.0 3.0 \n", | |
"ds000248 2 NaN NaN NaN NaN \n", | |
"ds000249 26 0.0 26.0 13.0 13.0 \n", | |
"ds000253 20 0.0 20.0 20.0 0.0 \n", | |
"ds000254 13 0.0 13.0 7.0 6.0 \n", | |
"ds000255 2 NaN NaN NaN NaN \n", | |
"ds000256 24 24.0 0.0 10.0 14.0 \n", | |
"ds000258 89 NaN NaN NaN NaN \n", | |
"\n", | |
" missing_age missing_sex \n", | |
"ds000001 0.0 0.0 \n", | |
"ds000002 0.0 0.0 \n", | |
"ds000003 0.0 0.0 \n", | |
"ds000005 0.0 0.0 \n", | |
"ds000006 0.0 0.0 \n", | |
"ds000007 0.0 0.0 \n", | |
"ds000008 0.0 0.0 \n", | |
"ds000009 0.0 0.0 \n", | |
"ds000011 0.0 0.0 \n", | |
"ds000017 3.0 3.0 \n", | |
"ds000030 0.0 0.0 \n", | |
"ds000031 1.0 1.0 \n", | |
"ds000051 13.0 13.0 \n", | |
"ds000052 13.0 13.0 \n", | |
"ds000053 0.0 0.0 \n", | |
"ds000101 0.0 0.0 \n", | |
"ds000102 0.0 9.0 \n", | |
"ds000105 6.0 6.0 \n", | |
"ds000107 49.0 49.0 \n", | |
"ds000108 0.0 0.0 \n", | |
"ds000110 0.0 0.0 \n", | |
"ds000113b 16.0 16.0 \n", | |
"ds000113c 0.0 0.0 \n", | |
"ds000113d 0.0 0.0 \n", | |
"ds000114 10.0 10.0 \n", | |
"ds000115 0.0 0.0 \n", | |
"ds000116 0.0 0.0 \n", | |
"ds000117 0.0 0.0 \n", | |
"ds000119 0.0 0.0 \n", | |
"ds000120 0.0 0.0 \n", | |
"... ... ... \n", | |
"ds000220 0.0 0.0 \n", | |
"ds000221 2.0 0.0 \n", | |
"ds000222 0.0 0.0 \n", | |
"ds000223 19.0 19.0 \n", | |
"ds000224 0.0 0.0 \n", | |
"ds000228 0.0 0.0 \n", | |
"ds000229 0.0 0.0 \n", | |
"ds000231 0.0 0.0 \n", | |
"ds000232 0.0 0.0 \n", | |
"ds000233 0.0 0.0 \n", | |
"ds000234 0.0 0.0 \n", | |
"ds000235 0.0 0.0 \n", | |
"ds000236 0.0 0.0 \n", | |
"ds000237 13.0 13.0 \n", | |
"ds000238 0.0 0.0 \n", | |
"ds000239 0.0 0.0 \n", | |
"ds000240 0.0 0.0 \n", | |
"ds000241 12.0 12.0 \n", | |
"ds000243 0.0 0.0 \n", | |
"ds000244 0.0 0.0 \n", | |
"ds000245 0.0 0.0 \n", | |
"ds000246 0.0 0.0 \n", | |
"ds000247 1.0 1.0 \n", | |
"ds000248 2.0 2.0 \n", | |
"ds000249 0.0 0.0 \n", | |
"ds000253 0.0 0.0 \n", | |
"ds000254 0.0 0.0 \n", | |
"ds000255 2.0 2.0 \n", | |
"ds000256 0.0 0.0 \n", | |
"ds000258 89.0 89.0 \n", | |
"\n", | |
"[92 rows x 7 columns]" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df = pd.DataFrame.from_dict(ds_meta, orient='index')\n", | |
"openfmri_df['missing_age'] = openfmri_df.participant_count - (openfmri_df.below_18.fillna(0) + openfmri_df.above_18.fillna(0))\n", | |
"openfmri_df['missing_sex'] = openfmri_df.participant_count - (openfmri_df.female_count.fillna(0) + openfmri_df.male_count.fillna(0))\n", | |
"openfmri_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>participant_count</th>\n", | |
" <th>below_18</th>\n", | |
" <th>above_18</th>\n", | |
" <th>female_count</th>\n", | |
" <th>male_count</th>\n", | |
" <th>missing_age</th>\n", | |
" <th>missing_sex</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>ds000017</th>\n", | |
" <td>8</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000031</th>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000051</th>\n", | |
" <td>13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13.0</td>\n", | |
" <td>13.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000052</th>\n", | |
" <td>13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13.0</td>\n", | |
" <td>13.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000105</th>\n", | |
" <td>6</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>6.0</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000107</th>\n", | |
" <td>49</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>49.0</td>\n", | |
" <td>49.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000113b</th>\n", | |
" <td>16</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>16.0</td>\n", | |
" <td>16.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000114</th>\n", | |
" <td>10</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>10.0</td>\n", | |
" <td>10.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000133</th>\n", | |
" <td>26</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>26.0</td>\n", | |
" <td>26.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000158</th>\n", | |
" <td>217</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>217.0</td>\n", | |
" <td>217.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000164</th>\n", | |
" <td>28</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>28.0</td>\n", | |
" <td>28.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000168</th>\n", | |
" <td>35</td>\n", | |
" <td>0.0</td>\n", | |
" <td>25.0</td>\n", | |
" <td>15.0</td>\n", | |
" <td>20.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000204</th>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000205</th>\n", | |
" <td>11</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>11.0</td>\n", | |
" <td>11.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000206</th>\n", | |
" <td>6</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>6.0</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000213</th>\n", | |
" <td>26</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>26.0</td>\n", | |
" <td>26.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000216</th>\n", | |
" <td>7</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>7.0</td>\n", | |
" <td>7.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000221</th>\n", | |
" <td>318</td>\n", | |
" <td>0.0</td>\n", | |
" <td>316.0</td>\n", | |
" <td>129.0</td>\n", | |
" <td>189.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000223</th>\n", | |
" <td>19</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>19.0</td>\n", | |
" <td>19.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000237</th>\n", | |
" <td>13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13.0</td>\n", | |
" <td>13.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000241</th>\n", | |
" <td>12</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>12.0</td>\n", | |
" <td>12.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000247</th>\n", | |
" <td>6</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000248</th>\n", | |
" <td>2</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000255</th>\n", | |
" <td>2</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ds000258</th>\n", | |
" <td>89</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>89.0</td>\n", | |
" <td>89.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" participant_count below_18 above_18 female_count male_count \\\n", | |
"ds000017 8 0.0 5.0 1.0 4.0 \n", | |
"ds000031 1 NaN NaN NaN NaN \n", | |
"ds000051 13 NaN NaN NaN NaN \n", | |
"ds000052 13 NaN NaN NaN NaN \n", | |
"ds000105 6 NaN NaN NaN NaN \n", | |
"ds000107 49 NaN NaN NaN NaN \n", | |
"ds000113b 16 NaN NaN NaN NaN \n", | |
"ds000114 10 NaN NaN NaN NaN \n", | |
"ds000133 26 NaN NaN NaN NaN \n", | |
"ds000158 217 NaN NaN NaN NaN \n", | |
"ds000164 28 NaN NaN NaN NaN \n", | |
"ds000168 35 0.0 25.0 15.0 20.0 \n", | |
"ds000204 1 NaN NaN NaN NaN \n", | |
"ds000205 11 NaN NaN NaN NaN \n", | |
"ds000206 6 NaN NaN NaN NaN \n", | |
"ds000213 26 NaN NaN NaN NaN \n", | |
"ds000216 7 NaN NaN NaN NaN \n", | |
"ds000221 318 0.0 316.0 129.0 189.0 \n", | |
"ds000223 19 NaN NaN NaN NaN \n", | |
"ds000237 13 NaN NaN NaN NaN \n", | |
"ds000241 12 NaN NaN NaN NaN \n", | |
"ds000247 6 0.0 5.0 2.0 3.0 \n", | |
"ds000248 2 NaN NaN NaN NaN \n", | |
"ds000255 2 NaN NaN NaN NaN \n", | |
"ds000258 89 NaN NaN NaN NaN \n", | |
"\n", | |
" missing_age missing_sex \n", | |
"ds000017 3.0 3.0 \n", | |
"ds000031 1.0 1.0 \n", | |
"ds000051 13.0 13.0 \n", | |
"ds000052 13.0 13.0 \n", | |
"ds000105 6.0 6.0 \n", | |
"ds000107 49.0 49.0 \n", | |
"ds000113b 16.0 16.0 \n", | |
"ds000114 10.0 10.0 \n", | |
"ds000133 26.0 26.0 \n", | |
"ds000158 217.0 217.0 \n", | |
"ds000164 28.0 28.0 \n", | |
"ds000168 10.0 0.0 \n", | |
"ds000204 1.0 1.0 \n", | |
"ds000205 11.0 11.0 \n", | |
"ds000206 6.0 6.0 \n", | |
"ds000213 26.0 26.0 \n", | |
"ds000216 7.0 7.0 \n", | |
"ds000221 2.0 0.0 \n", | |
"ds000223 19.0 19.0 \n", | |
"ds000237 13.0 13.0 \n", | |
"ds000241 12.0 12.0 \n", | |
"ds000247 1.0 1.0 \n", | |
"ds000248 2.0 2.0 \n", | |
"ds000255 2.0 2.0 \n", | |
"ds000258 89.0 89.0 " | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df[openfmri_df.missing_age > 0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3307" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df.participant_count.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"583.0" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df.missing_age.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"580.0" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df.missing_sex.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"293.0" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df.below_18.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2431.0" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df.above_18.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1379.0" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df.female_count.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1348.0" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"openfmri_df.male_count.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment