ShawnHymel/ei_json_to_csv.ipynb

## ei_json_to_csv.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Convert Edge Impulse JSON files to CSV\n",
        "\n",
        "[![Open In Colab <](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/gist/ShawnHymel/35e38ce42d790704bcf6b2511039d4dc/ei_json_to_csv.ipynb)\n",
        "\n",
        "Create a folder named *dataset* in */content*. Upload your JSON dataset to */content/dataset*. Run this Notebook. Your CSV data will be stored in */content/out.zip* (right-click to download it).\n",
        "\n",
        "Link to this gist: https://gist.github.com/ShawnHymel/35e38ce42d790704bcf6b2511039d4dc"
      ],
      "metadata": {
        "id": "YU7SAqSo2Bc9"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "### Is much faster\n",
        "!python -m pip install ujson"
      ],
      "metadata": {
        "id": "yUSSCK1q_AOS"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "fJfqV60H18ri"
      },
      "outputs": [],
      "source": [
        "import ujson\n",
        "import csv\n",
        "import os\n",
        "import shutil"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "### Settings\n",
        "HOME_PATH = \"/content\"              # Location of the working directory\n",
        "DATASET_PATH = \"/content/dataset\"   # Upload your JSON samples to this directory\n",
        "OUT_PATH = \"/content/out\"           # Where output files go (will be deleted and recreated)\n",
        "OUT_ZIP = \"/content/out.zip\"        # Where to store the zipped output files"
      ],
      "metadata": {
        "id": "TFkfo8zp2E80"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "### Delete output directory (if it exists) and recreate it\n",
        "if os.path.exists(OUT_PATH):\n",
        "  shutil.rmtree(OUT_PATH)\n",
        "os.makedirs(OUT_PATH)"
      ],
      "metadata": {
        "id": "eLuisOsYF9EN"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "### Read JSON files\n",
        "\n",
        "# Go through each file in the input directory\n",
        "for filename in os.listdir(DATASET_PATH):\n",
        "\n",
        "  # Read the JSON file\n",
        "  header = [\"timestamp\"]\n",
        "  with open(os.path.join(DATASET_PATH, filename), 'r') as json_file:\n",
        "\n",
        "    # Load JSON\n",
        "    data = ujson.load(json_file)\n",
        "\n",
        "    # Parse the sample rate, header, and data\n",
        "    sample_rate = data['payload']['interval_ms']\n",
        "    for heading in data['payload']['sensors']:\n",
        "      header.append(heading['name'])\n",
        "    values = data['payload']['values']\n",
        "\n",
        "    # Write header to CSV file\n",
        "    out_filepath = os.path.join(OUT_PATH, os.path.splitext(filename)[0] + \".csv\")\n",
        "    with open(out_filepath, 'w') as csv_file:\n",
        "      writer = csv.writer(csv_file)\n",
        "      writer.writerow(header)\n",
        "\n",
        "      # Prepend timestamp and write data rows to CSV file\n",
        "      for t, line in enumerate(values):\n",
        "        timestamp = t * sample_rate\n",
        "        line.insert(0, timestamp)\n",
        "        writer.writerow(line)"
      ],
      "metadata": {
        "id": "9UpLqfAf2HHT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "### Zip output directory\n",
        "%cd {OUT_PATH}\n",
        "!zip -FS -r -q {OUT_ZIP} *\n",
        "%cd {HOME_PATH}"
      ],
      "metadata": {
        "id": "fbikuHB2GTUq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "BN38FIQbGwE2"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"collapsed_sections": []
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"source": [
	"# Convert Edge Impulse JSON files to CSV\n",
	"\n",
	"[![Open In Colab <](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/gist/ShawnHymel/35e38ce42d790704bcf6b2511039d4dc/ei_json_to_csv.ipynb)\n",
	"\n",
	"Create a folder named dataset in /content. Upload your JSON dataset to /content/dataset. Run this Notebook. Your CSV data will be stored in /content/out.zip (right-click to download it).\n",
	"\n",
	"Link to this gist: https://gist.github.com/ShawnHymel/35e38ce42d790704bcf6b2511039d4dc"
	],
	"metadata": {
	"id": "YU7SAqSo2Bc9"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"### Is much faster\n",
	"!python -m pip install ujson"
	],
	"metadata": {
	"id": "yUSSCK1q_AOS"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "fJfqV60H18ri"
	},
	"outputs": [],
	"source": [
	"import ujson\n",
	"import csv\n",
	"import os\n",
	"import shutil"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"### Settings\n",
	"HOME_PATH = \"/content\" # Location of the working directory\n",
	"DATASET_PATH = \"/content/dataset\" # Upload your JSON samples to this directory\n",
	"OUT_PATH = \"/content/out\" # Where output files go (will be deleted and recreated)\n",
	"OUT_ZIP = \"/content/out.zip\" # Where to store the zipped output files"
	],
	"metadata": {
	"id": "TFkfo8zp2E80"
	},
	"execution_count": 1,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"### Delete output directory (if it exists) and recreate it\n",
	"if os.path.exists(OUT_PATH):\n",
	" shutil.rmtree(OUT_PATH)\n",
	"os.makedirs(OUT_PATH)"
	],
	"metadata": {
	"id": "eLuisOsYF9EN"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"### Read JSON files\n",
	"\n",
	"# Go through each file in the input directory\n",
	"for filename in os.listdir(DATASET_PATH):\n",
	"\n",
	" # Read the JSON file\n",
	" header = [\"timestamp\"]\n",
	" with open(os.path.join(DATASET_PATH, filename), 'r') as json_file:\n",
	"\n",
	" # Load JSON\n",
	" data = ujson.load(json_file)\n",
	"\n",
	" # Parse the sample rate, header, and data\n",
	" sample_rate = data['payload']['interval_ms']\n",
	" for heading in data['payload']['sensors']:\n",
	" header.append(heading['name'])\n",
	" values = data['payload']['values']\n",
	"\n",
	" # Write header to CSV file\n",
	" out_filepath = os.path.join(OUT_PATH, os.path.splitext(filename)[0] + \".csv\")\n",
	" with open(out_filepath, 'w') as csv_file:\n",
	" writer = csv.writer(csv_file)\n",
	" writer.writerow(header)\n",
	"\n",
	" # Prepend timestamp and write data rows to CSV file\n",
	" for t, line in enumerate(values):\n",
	" timestamp = t * sample_rate\n",
	" line.insert(0, timestamp)\n",
	" writer.writerow(line)"
	],
	"metadata": {
	"id": "9UpLqfAf2HHT"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"### Zip output directory\n",
	"%cd {OUT_PATH}\n",
	"!zip -FS -r -q {OUT_ZIP} *\n",
	"%cd {HOME_PATH}"
	],
	"metadata": {
	"id": "fbikuHB2GTUq"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "BN38FIQbGwE2"
	},
	"execution_count": null,
	"outputs": []
	}
	]
	}