Skip to content

Instantly share code, notes, and snippets.

@TaylorBurnham
Created November 29, 2021 11:55
Show Gist options
  • Save TaylorBurnham/3261dbc93b85f9f5f5d4e330e97af108 to your computer and use it in GitHub Desktop.
Save TaylorBurnham/3261dbc93b85f9f5f5d4e330e97af108 to your computer and use it in GitHub Desktop.
Fitbit Heart Rate data to Parquet
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "586e02f8-0a48-4584-bdda-46c3bf509e4d",
"metadata": {
"tags": []
},
"source": [
"# Fitbit Heart Rate to Parquet\n",
"\n",
"The Fitbit heartrate data is in structured JSON files that can take a\n",
"pretty long time to parse. An example of a sampling is below.\n",
"\n",
"```json\n",
"{\n",
" \"dateTime\": \"11/28/21 05:00:03\",\n",
" \"value\": {\n",
" \"bpm\": 65,\n",
" \"confidence\": 3\n",
" }\n",
"}\n",
"```\n",
"\n",
"The JSON structure tells me at some point there may be more data added,\n",
"but for now it needs to be flattened and leads to the output below.\n",
"\n",
"```\n",
" bpm confidence\n",
"dateTime \n",
"2016-05-06 13:34:30+00:00 99 1\n",
"2021-07-25 03:32:55+00:00 65 3\n",
"2021-09-12 05:26:08+00:00 59 3\n",
"```\n",
"\n",
"The \"confidence\" value indicates how _confident_ the device was at the\n",
"time of sampling the heart rate. This can also mean there are duplicate\n",
"values for the same time with different confidence codes. There's not a\n",
"lot of documentation on this, so as part of the `import_heartrate_file`\n",
"function it will sort by the \"confidence\" value, group by \"dateTime\",\n",
"and then grab the first value which will always be the highest\n",
"confidence.\n",
"\n",
"The Parquet files are split by month but you can adjust the rule in the\n",
"last code cell. The files are much faster to load into memory and work\n",
"with as needed and saves a lot of time when resuming work."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b4bb9299-7faf-472f-8296-e0c008547a42",
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-29T11:19:08.612448Z",
"iopub.status.busy": "2021-11-29T11:19:08.611948Z",
"iopub.status.idle": "2021-11-29T11:19:09.116946Z",
"shell.execute_reply": "2021-11-29T11:19:09.116444Z",
"shell.execute_reply.started": "2021-11-29T11:19:08.612448Z"
},
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"from glob import glob\n",
"from tqdm import tqdm\n",
"\n",
"\n",
"def import_heartrate_file(file):\n",
" df = pd.read_json(file)\n",
" df = df.join(df['value'].apply(pd.Series)).drop(columns=['value'])\n",
" df = df.sort_values('confidence', ascending=False).groupby('dateTime').first()\n",
" df = df.tz_localize('UTC')\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8382ec0d-b69c-41d1-b322-96a0403c5393",
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-29T11:19:09.117945Z",
"iopub.status.busy": "2021-11-29T11:19:09.117447Z",
"iopub.status.idle": "2021-11-29T11:19:09.126946Z",
"shell.execute_reply": "2021-11-29T11:19:09.126446Z",
"shell.execute_reply.started": "2021-11-29T11:19:09.117945Z"
},
"tags": []
},
"outputs": [],
"source": [
"# Set these based off your input/output.\n",
"# This should be the relative or full path to your Fitbit Export.\n",
"data_path = 'Data/Physical Activity'\n",
"\n",
"# Relative or full path to the output folder.\n",
"output_path = 'Processed_Data'\n",
"\n",
"# The date glob to filter on. Set to None for all.\n",
"file_date = None\n",
"\n",
"# Build the glob pattern\n",
"if file_date:\n",
" glob_patt = f'{data_path}/heart_rate-{file_date}*'\n",
"else:\n",
" glob_patt = f'{data_path}/heart_rate-*'\n",
"\n",
"# Get the filelist\n",
"filelist = glob(glob_patt)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b94f4586-c242-497c-bcf5-b655d8a54230",
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-29T11:19:09.127949Z",
"iopub.status.busy": "2021-11-29T11:19:09.127949Z",
"iopub.status.idle": "2021-11-29T11:44:08.516445Z",
"shell.execute_reply": "2021-11-29T11:44:08.515946Z",
"shell.execute_reply.started": "2021-11-29T11:19:09.127949Z"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████████████████| 502/502 [24:59<00:00, 2.99s/it]\n"
]
}
],
"source": [
"dfs = []\n",
"for file in tqdm(filelist):\n",
" dfs.append(import_heartrate_file(file))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "bdf28018-a653-45c3-b032-3d782d66b1eb",
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-29T11:44:08.517446Z",
"iopub.status.busy": "2021-11-29T11:44:08.516945Z",
"iopub.status.idle": "2021-11-29T11:44:08.602447Z",
"shell.execute_reply": "2021-11-29T11:44:08.601944Z",
"shell.execute_reply.started": "2021-11-29T11:44:08.517446Z"
},
"tags": []
},
"outputs": [],
"source": [
"# Concat and sort data.\n",
"df = pd.concat(dfs).sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "59a70874-e939-4a1f-8bbf-ce3d025785a7",
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-29T11:44:08.603449Z",
"iopub.status.busy": "2021-11-29T11:44:08.602948Z",
"iopub.status.idle": "2021-11-29T11:44:08.607950Z",
"shell.execute_reply": "2021-11-29T11:44:08.607447Z",
"shell.execute_reply.started": "2021-11-29T11:44:08.603449Z"
},
"tags": []
},
"outputs": [],
"source": [
"# Write the file out, grouping by month.\n",
"if not os.path.exists(output_path):\n",
" os.makedirs(output_path, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "27801777-cd32-416f-b7c2-c3b6a0f79aa6",
"metadata": {
"execution": {
"iopub.execute_input": "2021-11-29T11:44:08.609454Z",
"iopub.status.busy": "2021-11-29T11:44:08.608947Z",
"iopub.status.idle": "2021-11-29T11:44:12.204446Z",
"shell.execute_reply": "2021-11-29T11:44:12.203951Z",
"shell.execute_reply.started": "2021-11-29T11:44:08.609454Z"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing to 'Heart Rate Data - 2016-02-20 to 2016-02-29.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2016-03-01 to 2016-03-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2016-04-01 to 2016-04-30.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2016-05-01 to 2016-05-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2016-06-01 to 2016-06-30.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2016-07-01 to 2016-07-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2016-08-01 to 2016-08-05.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2020-12-11 to 2020-12-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-01-01 to 2021-01-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-02-01 to 2021-02-28.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-03-01 to 2021-03-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-04-01 to 2021-04-30.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-05-01 to 2021-05-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-06-01 to 2021-06-30.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-07-01 to 2021-07-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-08-01 to 2021-08-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-09-01 to 2021-09-30.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-10-01 to 2021-10-31.parquet.gzip'\n",
"Writing to 'Heart Rate Data - 2021-11-01 to 2021-11-29.parquet.gzip'\n"
]
}
],
"source": [
"# Resample by month. Adjust per the docs.\n",
"rs_rule = 'M'\n",
"for month, df_m in df.resample(rule=rs_rule):\n",
" if df_m.empty:\n",
" continue\n",
" ix_min = df_m.index.min().strftime(\"%F\")\n",
" ix_max = df_m.index.max().strftime(\"%F\")\n",
" output_file = f\"Heart Rate Data - {ix_min} to {ix_max}.parquet.gzip\"\n",
" output_filepath = os.path.join(output_path, output_file)\n",
" print(f\"Writing to '{output_file}'\")\n",
" df_m.to_parquet(output_filepath, compression='gzip', index=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Merlion (3.9)",
"language": "python",
"name": "merlion"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment