Skip to content

Instantly share code, notes, and snippets.

@adek05
Last active May 29, 2017 11:03
Show Gist options
  • Save adek05/1dbf5f35f8396059388eeda777378285 to your computer and use it in GitHub Desktop.
Save adek05/1dbf5f35f8396059388eeda777378285 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 509,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import plotly\n",
"import cufflinks as cf\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 510,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import io\n",
"import requests\n",
"\n",
"EXAMS_URL = 'https://gist.githubusercontent.com/adek05/c2218eaf0d3e04717667a3a2dcc256a0/raw/0395dbb19354d6997c9f6d592e2e9e6138caec08/exams.csv'\n",
"PATIENTS_URL = 'https://gist.githubusercontent.com/adek05/c2218eaf0d3e04717667a3a2dcc256a0/raw/0395dbb19354d6997c9f6d592e2e9e6138caec08/patients.csv'\n",
"\n",
"patients_stream = requests.get(PATIENTS_URL).content\n",
"patients = pd.read_csv(io.StringIO(patients_stream.decode('utf-8')))\n",
"# patients = pd.read_csv('patients.csv', delimiter=',')\n",
"\n",
"exams_stream = requests.get(EXAMS_URL).content\n",
"patients = pd.read_csv(io.StringIO(exams_stream.decode('utf-8')))\n",
"# exams = pd.read_csv('exams.csv', delimiter=',')\n"
]
},
{
"cell_type": "code",
"execution_count": 511,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cities = ['Szczecin', 'Wrocław', 'Lublin', 'Katowice', 'Gdańsk', 'Olsztyn', 'Kielce', 'Rzeszów']\n",
"\n",
"def extract_city_from_regional_id(regional_id):\n",
" global cities\n",
" matched_cities = [city for city in cities if city in regional_id]\n",
" if len(matched_cities) == 0:\n",
" return 'Unknown'\n",
" return matched_cities[0]"
]
},
{
"cell_type": "code",
"execution_count": 512,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import re\n",
"\n",
"def extract_id_from_regional_id(regional_id):\n",
" match = re.match('.*?(\\d+$)', regional_id)\n",
" if not match:\n",
" return '0'\n",
" return match.group(1)"
]
},
{
"cell_type": "code",
"execution_count": 513,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"# Unify Regional id\n",
"patients['unique_ids'] = patients['regional_id'].apply(\n",
" lambda id: extract_city_from_regional_id(id) + extract_id_from_regional_id(id))\n",
"exams['unique_ids'] = exams['regional_id'].apply(\n",
" lambda id: extract_city_from_regional_id(id) + extract_id_from_regional_id(id))"
]
},
{
"cell_type": "code",
"execution_count": 514,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Extract city for each patient\n",
"patients['city'] = patients['regional_id'].apply(lambda x: extract_city_from_regional_id(x))"
]
},
{
"cell_type": "code",
"execution_count": 515,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "AttributeError",
"evalue": "(\"'Series' object has no attribute 'height'\", 'occurred at index 0')",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-515-c06a44a88da5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Calculate BMI for each patient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpatients\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'bmi'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpatients\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;36m2\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheight\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mbmi_buckets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'wychudzenie'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'niedowaga'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'prawidłowe'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'nadwaga'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość I'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość II'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość III'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/adek/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, args, **kwds)\u001b[0m\n\u001b[1;32m 4150\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4151\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4152\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreduce\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4153\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4154\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_broadcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/adek/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_apply_standard\u001b[0;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[1;32m 4246\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4247\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4248\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4249\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4250\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-515-c06a44a88da5>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(row)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Calculate BMI for each patient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpatients\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'bmi'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpatients\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;36m2\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheight\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mbmi_buckets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'wychudzenie'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'niedowaga'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'prawidłowe'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'nadwaga'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość I'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość II'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'otyłość III'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/adek/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 2742\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2743\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2744\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2745\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2746\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: (\"'Series' object has no attribute 'height'\", 'occurred at index 0')"
]
}
],
"source": [
"# Calculate BMI for each patient\n",
"patients['bmi'] = patients.apply(lambda row: row.weight / (row.height/100)**2 if row.height != 0 else 0, axis=1)\n",
"\n",
"bmi_buckets = ['wychudzenie', 'niedowaga', 'prawidłowe', 'nadwaga', 'otyłość I', 'otyłość II', 'otyłość III']\n",
"\n",
"patients['bmi_bucket'] = pd.cut(patients['bmi'], bins=[0, 17, 18.5, 25, 30, 35, 40, 100], right=True, labels=bmi_buckets)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Filter incomplete data rows\n",
"patients = patients[~patients.unique_ids.str.contains('Unknown')]\n",
"exams = exams[~exams.unique_ids.str.contains('Unknown')]\n",
"exams = exams[~exams.regional_id.str.contains('test')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Select columns which matter\n",
"patients = pd.DataFrame(patients, columns=['unique_ids', 'city', 'age', 'sex', 'smoker', 'bmi', 'bmi_bucket'])\n",
"exams = pd.DataFrame(exams, columns=['exam_name', 'result', 'unique_ids'])\n",
"\n",
"# Select unique patients\n",
"patients = patients.drop_duplicates(subset=['unique_ids'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Prepare Glucose Results data\n",
"glucose = pd.DataFrame(exams[exams.exam_name.str.contains('GlucoseLevel')])\n",
"glucose['glucose_level'] = glucose['result'].apply(lambda x: int(x))\n",
"glucose = pd.DataFrame(glucose, columns=['unique_ids', 'glucose_level', 'exam_name'])\n",
"\n",
"glucose_2h_after_meal = pd.DataFrame(glucose[glucose.exam_name == 'GlucoseLevelYes'])\n",
"glucose_less_than_2h_after_meal = pd.DataFrame(glucose[glucose.exam_name == 'GlucoseLevelNo'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Prepare Blood Pressure Results data\n",
"def extract_systolic(data):\n",
" return int(re.match('(\\d+)/\\d+', data).group(1))\n",
"def extract_diastolic(data):\n",
" return int(re.match('\\d+/(\\d+)', data).group(1))\n",
"\n",
"blood_pressure = pd.DataFrame(exams[exams.exam_name == 'BloodPressureTest'])\n",
"blood_pressure['systolic'] = blood_pressure['result'].apply(extract_systolic)\n",
"blood_pressure['diastolic'] = blood_pressure['result'].apply(extract_diastolic)\n",
"blood_pressure = pd.DataFrame(blood_pressure, columns=['unique_ids', 'systolic', 'diastolic'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Join datasets\n",
"patients_blood_pressure = pd.merge(blood_pressure, patients, on='unique_ids')\n",
"patients_glucose_2h_after_meal = pd.merge(glucose_2h_after_meal, patients, on='unique_ids')\n",
"patients_glucose_less_than_2h_after_meal = pd.merge(glucose_less_than_2h_after_meal, patients, on='unique_ids')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"total_number_of_patients = len(patients)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## W sumie przebadano"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"total_number_of_patients"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"patients.groupby('city').size()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cf.set_config_file(world_readable=True,offline=True)\n",
"city_count = pd.DataFrame({'count': patients.groupby('city', as_index=False).size()}).reset_index()\n",
"city_count\n",
"city_count.iplot(kind='pie', labels='city', values='count', textinfo='value', sort=True, colorscale='blues')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"## Ciśnienie zmierzono\n",
"len(patients_blood_pressure)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"## Cukier 2h po jedzeniu zmierzono\n",
"len(patients_glucose_2h_after_meal)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"## Cukier mniej niż 2h po jedzeniu zmierzono\n",
"len(patients_glucose_less_than_2h_after_meal)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"sns.jointplot(\"diastolic\", \"systolic\", data=patients_blood_pressure, kind=\"scatter\", ylim=(80, 200), xlim=(30,140))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"## Smoker vs. Nonsmoker\n",
"%matplotlib inline\n",
"\n",
"sns.pairplot(patients_blood_pressure, x_vars=['diastolic'], y_vars=['systolic'], hue='smoker', size=5, markers='o', palette=\"PuOr\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"patients_blood_pressure['age_bucket'] = pd.cut(patients_blood_pressure['age'], bins=[18, 29, 39, 49, 65, 100], right=False)\n",
"sns.pairplot(patients_blood_pressure, x_vars=['diastolic'], y_vars=['systolic'], hue='age_bucket', size=5, markers='o', palette=\"PuOr\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"patients_glucose_2h_after_meal['age_bucket'] = \\\n",
" pd.cut(patients_glucose_2h_after_meal['age'], bins=[0, 18, 29, 39, 49, 65, 100], right=False)\n",
"\n",
"# sugar_2h_after_rows.sort_values(by='glucose_yes', ascending=False)\n",
"sns.stripplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_2h_after_meal, jitter=0.15)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"patients_glucose_less_than_2h_after_meal['age_bucket'] = \\\n",
" pd.cut(patients_glucose_less_than_2h_after_meal['age'], bins=[0, 18, 29, 39, 49, 65, 100], right=False)\n",
"\n",
"sns.stripplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_less_than_2h_after_meal, jitter=0.15)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f, ax = plt.subplots(figsize=(9, 9))\n",
"\n",
"# Log scale and manually picked labels\n",
"ax.set(yscale='log')\n",
"\n",
"y=[30, 50, 80, 100, 120, 200, 300, 400, 500]\n",
"plt.semilogy(y, y)\n",
"plt.yticks(y, y)\n",
"\n",
"# Horizontal line for 200\n",
"plt.plot([-10, 10], [200, 200], '--', color=\"red\")\n",
"\n",
"sns.stripplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_2h_after_meal, jitter=0.25, hue='smoker', split=True, ax = ax)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f, ax = plt.subplots(figsize=(9, 9))\n",
"\n",
"# Log scale and manually picked labels\n",
"ax.set(yscale='log')\n",
"\n",
"y=[30, 50, 80, 100, 140, 200, 300, 400, 500]\n",
"plt.semilogy(y, y)\n",
"plt.yticks(y, y)\n",
"\n",
"# Horizontal line for 200\n",
"plt.plot([-10, 10], [200, 200], '--', color=\"red\")\n",
"\n",
"sns.stripplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_less_than_2h_after_meal, jitter=0.25, hue='smoker', split=True, ax=ax)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f, ax = plt.subplots(figsize=(9, 9))\n",
"\n",
"# Log scale and hand pick labels\n",
"ax.set(yscale='log')\n",
"\n",
"y=[30, 50, 80, 100, 140, 200, 300, 400, 500]\n",
"plt.semilogy(y, y)\n",
"plt.yticks(y, y)\n",
"\n",
"# Horizontal line for 200\n",
"plt.plot([-10, 10], [200, 200], '--', color=\"red\")\n",
"\n",
"sns.boxplot(x=\"age_bucket\", y=\"glucose_level\", data=patients_glucose_2h_after_meal, hue='city', fliersize=10, whis=0.9)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"## Ideas:\n",
"# Wrocław vs. Szczecin"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"## BMI\n",
"f, ax = plt.subplots(figsize=(9, 9))\n",
"\n",
"# Log scale and manually picked labels\n",
"ax.set(yscale='log')\n",
"\n",
"y=[30, 50, 80, 100, 140, 200, 300, 400, 500]\n",
"plt.semilogy(y, y)\n",
"plt.yticks(y, y)\n",
"\n",
"# Horizontal line for 200\n",
"plt.plot([-10, 10], [200, 200], '--', color=\"red\")\n",
"\n",
"sns.stripplot(x=\"bmi_bucket\", y=\"glucose_level\", \n",
" data=patients_glucose_less_than_2h_after_meal, jitter=0.25, hue='smoker',\n",
" split=True, ax=ax,\n",
" order=['niedowaga', 'prawidłowe', 'nadwaga', 'otyłość I', 'otyłość II', 'otyłość III'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"colors = ['blue', 'green', 'yellow', 'orange', 'red', 'dark red']\n",
"# colors = ['blue', 'green', 'red', 'red', 'red', 'red']\n",
"\n",
"sns.pairplot(patients_blood_pressure, x_vars=['diastolic'], y_vars=['systolic'],\n",
" hue='bmi_bucket',\n",
" size=8, markers='o', palette=sns.xkcd_palette(colors),\n",
" hue_order=['niedowaga', 'prawidłowe', 'nadwaga', 'otyłość I', 'otyłość II', 'otyłość III'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment