Last active
May 29, 2017 11:03
-
-
Save adek05/1dbf5f35f8396059388eeda777378285 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 509, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import plotly\n", | |
"import cufflinks as cf\n", | |
"import seaborn as sns\n", | |
"import matplotlib.pyplot as plt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 510, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import io\n", | |
"import requests\n", | |
"\n", | |
"EXAMS_URL = 'https://gist.githubusercontent.com/adek05/c2218eaf0d3e04717667a3a2dcc256a0/raw/0395dbb19354d6997c9f6d592e2e9e6138caec08/exams.csv'\n", | |
"PATIENTS_URL = 'https://gist.githubusercontent.com/adek05/c2218eaf0d3e04717667a3a2dcc256a0/raw/0395dbb19354d6997c9f6d592e2e9e6138caec08/patients.csv'\n", | |
"\n", | |
"patients_stream = requests.get(PATIENTS_URL).content\n", | |
"patients = pd.read_csv(io.StringIO(patients_stream.decode('utf-8')))\n", | |
"# patients = pd.read_csv('patients.csv', delimiter=',')\n", | |
"\n", | |
"exams_stream = requests.get(EXAMS_URL).content\n", | |
"patients = pd.read_csv(io.StringIO(exams_stream.decode('utf-8')))\n", | |
"# exams = pd.read_csv('exams.csv', delimiter=',')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 511, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cities = ['Szczecin', 'Wrocław', 'Lublin', 'Katowice', 'Gdańsk', 'Olsztyn', 'Kielce', 'Rzeszów']\n", | |
"\n", | |
"def extract_city_from_regional_id(regional_id):\n", | |
" global cities\n", | |
" matched_cities = [city for city in cities if city in regional_id]\n", | |
" if len(matched_cities) == 0:\n", | |
" return 'Unknown'\n", | |
" return matched_cities[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 512, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import re\n", | |
"\n", | |
"def extract_id_from_regional_id(regional_id):\n", | |
" match = re.match('.*?(\\d+$)', regional_id)\n", | |
" if not match:\n", | |
" return '0'\n", | |
" return match.group(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 513, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Unify Regional id\n", | |
"patients['unique_ids'] = patients['regional_id'].apply(\n", | |
" lambda id: extract_city_from_regional_id(id) + extract_id_from_regional_id(id))\n", | |
"exams['unique_ids'] = exams['regional_id'].apply(\n", | |
" lambda id: extract_city_from_regional_id(id) + extract_id_from_regional_id(id))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 514, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Extract city for each patient\n", | |
"patients['city'] = patients['regional_id'].apply(lambda x: extract_city_from_regional_id(x))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 515, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "AttributeError", | |
"evalue": "(\"'Series' object has no attribute 'height'\", 'occurred at index 0')", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-515-c06a44a88da5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Calculate BMI for each patient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpatients\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'bmi'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpatients\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;36m2\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheight\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mbmi_buckets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'wychudzenie'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'niedowaga'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'prawidłowe'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'nadwaga'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość I'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość II'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość III'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/adek/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, args, **kwds)\u001b[0m\n\u001b[1;32m 4150\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4151\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4152\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreduce\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4153\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4154\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_broadcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/adek/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_apply_standard\u001b[0;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[1;32m 4246\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4247\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4248\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4249\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4250\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m<ipython-input-515-c06a44a88da5>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(row)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Calculate BMI for each patient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpatients\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'bmi'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpatients\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;36m2\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheight\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mbmi_buckets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'wychudzenie'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'niedowaga'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'prawidłowe'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'nadwaga'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość I'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość II'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość III'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/adek/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 2742\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2743\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2744\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2745\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2746\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mAttributeError\u001b[0m: (\"'Series' object has no attribute 'height'\", 'occurred at index 0')" | |
] | |
} | |
], | |
"source": [ | |
"# Calculate BMI for each patient\n", | |
"patients['bmi'] = patients.apply(lambda row: row.weight / (row.height/100)**2 if row.height != 0 else 0, axis=1)\n", | |
"\n", | |
"bmi_buckets = ['wychudzenie', 'niedowaga', 'prawidłowe', 'nadwaga', 'otyłość I', 'otyłość II', 'otyłość III']\n", | |
"\n", | |
"patients['bmi_bucket'] = pd.cut(patients['bmi'], bins=[0, 17, 18.5, 25, 30, 35, 40, 100], right=True, labels=bmi_buckets)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Filter incomplete data rows\n", | |
"patients = patients[~patients.unique_ids.str.contains('Unknown')]\n", | |
"exams = exams[~exams.unique_ids.str.contains('Unknown')]\n", | |
"exams = exams[~exams.regional_id.str.contains('test')]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Select columns which matter\n", | |
"patients = pd.DataFrame(patients, columns=['unique_ids', 'city', 'age', 'sex', 'smoker', 'bmi', 'bmi_bucket'])\n", | |
"exams = pd.DataFrame(exams, columns=['exam_name', 'result', 'unique_ids'])\n", | |
"\n", | |
"# Select unique patients\n", | |
"patients = patients.drop_duplicates(subset=['unique_ids'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Prepare Glucose Results data\n", | |
"glucose = pd.DataFrame(exams[exams.exam_name.str.contains('GlucoseLevel')])\n", | |
"glucose['glucose_level'] = glucose['result'].apply(lambda x: int(x))\n", | |
"glucose = pd.DataFrame(glucose, columns=['unique_ids', 'glucose_level', 'exam_name'])\n", | |
"\n", | |
"glucose_2h_after_meal = pd.DataFrame(glucose[glucose.exam_name == 'GlucoseLevelYes'])\n", | |
"glucose_less_than_2h_after_meal = pd.DataFrame(glucose[glucose.exam_name == 'GlucoseLevelNo'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Prepare Blood Pressure Results data\n", | |
"def extract_systolic(data):\n", | |
" return int(re.match('(\\d+)/\\d+', data).group(1))\n", | |
"def extract_diastolic(data):\n", | |
" return int(re.match('\\d+/(\\d+)', data).group(1))\n", | |
"\n", | |
"blood_pressure = pd.DataFrame(exams[exams.exam_name == 'BloodPressureTest'])\n", | |
"blood_pressure['systolic'] = blood_pressure['result'].apply(extract_systolic)\n", | |
"blood_pressure['diastolic'] = blood_pressure['result'].apply(extract_diastolic)\n", | |
"blood_pressure = pd.DataFrame(blood_pressure, columns=['unique_ids', 'systolic', 'diastolic'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Join datasets\n", | |
"patients_blood_pressure = pd.merge(blood_pressure, patients, on='unique_ids')\n", | |
"patients_glucose_2h_after_meal = pd.merge(glucose_2h_after_meal, patients, on='unique_ids')\n", | |
"patients_glucose_less_than_2h_after_meal = pd.merge(glucose_less_than_2h_after_meal, patients, on='unique_ids')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"total_number_of_patients = len(patients)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## W sumie przebadano" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"total_number_of_patients" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"patients.groupby('city').size()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cf.set_config_file(world_readable=True,offline=True)\n", | |
"city_count = pd.DataFrame({'count': patients.groupby('city', as_index=False).size()}).reset_index()\n", | |
"city_count\n", | |
"city_count.iplot(kind='pie', labels='city', values='count', textinfo='value', sort=True, colorscale='blues')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"## Ciśnienie zmierzono\n", | |
"len(patients_blood_pressure)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"## Cukier 2h po jedzeniu zmierzono\n", | |
"len(patients_glucose_2h_after_meal)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"## Cukier mniej niż 2h po jedzeniu zmierzono\n", | |
"len(patients_glucose_less_than_2h_after_meal)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"sns.jointplot(\"diastolic\", \"systolic\", data=patients_blood_pressure, kind=\"scatter\", ylim=(80, 200), xlim=(30,140))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"## Smoker vs. Nonsmoker\n", | |
"%matplotlib inline\n", | |
"\n", | |
"sns.pairplot(patients_blood_pressure, x_vars=['diastolic'], y_vars=['systolic'], hue='smoker', size=5, markers='o', palette=\"PuOr\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"patients_blood_pressure['age_bucket'] = pd.cut(patients_blood_pressure['age'], bins=[18, 29, 39, 49, 65, 100], right=False)\n", | |
"sns.pairplot(patients_blood_pressure, x_vars=['diastolic'], y_vars=['systolic'], hue='age_bucket', size=5, markers='o', palette=\"PuOr\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"patients_glucose_2h_after_meal['age_bucket'] = \\\n", | |
" pd.cut(patients_glucose_2h_after_meal['age'], bins=[0, 18, 29, 39, 49, 65, 100], right=False)\n", | |
"\n", | |
"# sugar_2h_after_rows.sort_values(by='glucose_yes', ascending=False)\n", | |
"sns.stripplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_2h_after_meal, jitter=0.15)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"patients_glucose_less_than_2h_after_meal['age_bucket'] = \\\n", | |
" pd.cut(patients_glucose_less_than_2h_after_meal['age'], bins=[0, 18, 29, 39, 49, 65, 100], right=False)\n", | |
"\n", | |
"sns.stripplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_less_than_2h_after_meal, jitter=0.15)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"f, ax = plt.subplots(figsize=(9, 9))\n", | |
"\n", | |
"# Log scale and manually picked labels\n", | |
"ax.set(yscale='log')\n", | |
"\n", | |
"y=[30, 50, 80, 100, 120, 200, 300, 400, 500]\n", | |
"plt.semilogy(y, y)\n", | |
"plt.yticks(y, y)\n", | |
"\n", | |
"# Horizontal line for 200\n", | |
"plt.plot([-10, 10], [200, 200], '--', color=\"red\")\n", | |
"\n", | |
"sns.stripplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_2h_after_meal, jitter=0.25, hue='smoker', split=True, ax = ax)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"f, ax = plt.subplots(figsize=(9, 9))\n", | |
"\n", | |
"# Log scale and manually picked labels\n", | |
"ax.set(yscale='log')\n", | |
"\n", | |
"y=[30, 50, 80, 100, 140, 200, 300, 400, 500]\n", | |
"plt.semilogy(y, y)\n", | |
"plt.yticks(y, y)\n", | |
"\n", | |
"# Horizontal line for 200\n", | |
"plt.plot([-10, 10], [200, 200], '--', color=\"red\")\n", | |
"\n", | |
"sns.stripplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_less_than_2h_after_meal, jitter=0.25, hue='smoker', split=True, ax=ax)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"f, ax = plt.subplots(figsize=(9, 9))\n", | |
"\n", | |
"# Log scale and hand pick labels\n", | |
"ax.set(yscale='log')\n", | |
"\n", | |
"y=[30, 50, 80, 100, 140, 200, 300, 400, 500]\n", | |
"plt.semilogy(y, y)\n", | |
"plt.yticks(y, y)\n", | |
"\n", | |
"# Horizontal line for 200\n", | |
"plt.plot([-10, 10], [200, 200], '--', color=\"red\")\n", | |
"\n", | |
"sns.boxplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_2h_after_meal, hue='city', fliersize=10, whis=0.9)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"## Ideas:\n", | |
"# Wrocław vs. Szczecin" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"## BMI\n", | |
"f, ax = plt.subplots(figsize=(9, 9))\n", | |
"\n", | |
"# Log scale and manually picked labels\n", | |
"ax.set(yscale='log')\n", | |
"\n", | |
"y=[30, 50, 80, 100, 140, 200, 300, 400, 500]\n", | |
"plt.semilogy(y, y)\n", | |
"plt.yticks(y, y)\n", | |
"\n", | |
"# Horizontal line for 200\n", | |
"plt.plot([-10, 10], [200, 200], '--', color=\"red\")\n", | |
"\n", | |
"sns.stripplot(x=\"bmi_bucket\", y=\"glucose_level\", \n", | |
" data=patients_glucose_less_than_2h_after_meal, jitter=0.25, hue='smoker',\n", | |
" split=True, ax=ax,\n", | |
" order=['niedowaga', 'prawidłowe', 'nadwaga', 'otyłość I', 'otyłość II', 'otyłość III'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"colors = ['blue', 'green', 'yellow', 'orange', 'red', 'dark red']\n", | |
"# colors = ['blue', 'green', 'red', 'red', 'red', 'red']\n", | |
"\n", | |
"sns.pairplot(patients_blood_pressure, x_vars=['diastolic'], y_vars=['systolic'],\n", | |
" hue='bmi_bucket',\n", | |
" size=8, markers='o', palette=sns.xkcd_palette(colors),\n", | |
" hue_order=['niedowaga', 'prawidłowe', 'nadwaga', 'otyłość I', 'otyłość II', 'otyłość III'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment