hsyyid/pprocessqbo.ipynb Secret

## pprocessqbo.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# QBO Transform Script #\n",
    "This transformation script converts P&L report into an actuals.json to that has categorized transactions by department/project."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gluestick as gs\n",
    "import pandas as pd\n",
    "import os\n",
    "import json\n",
    "import ast\n",
    "from datetime import datetime\n",
    "from lib import util"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's establish the standard hotglue input/output directories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# standard directory for hotglue\n",
    "ROOT_DIR = os.environ.get(\"ROOT_DIR\", \".\")\n",
    "INPUT_DIR = f\"{ROOT_DIR}/sync-output\"\n",
    "OUTPUT_DIR = f\"{ROOT_DIR}/etl-output\"\n",
    "SNAPSHOT_DIR = f\"{ROOT_DIR}/snapshots\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's start by reading the data. \n",
    "\n",
    "We will use the [gluestick](https://pypi.org/project/gluestick/) package to read the raw data in the input folder into a dictionary of pandas dataframes using the `read_csv_folder` function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read input data\n",
    "input_data = gs.read_csv_folder(INPUT_DIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Process the snapshots"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Accounts\n",
    "if input_data.get(\"Account\") is not None:\n",
    "    util.update_snapshot(\n",
    "        SNAPSHOT_DIR, \n",
    "        \"Account\",\n",
    "        ['Id'], \n",
    "        input_data[\"Account\"]\n",
    "     )\n",
    "\n",
    "# Invoices\n",
    "if input_data.get(\"Invoice\") is not None:\n",
    "    util.update_snapshot(\n",
    "        SNAPSHOT_DIR, \n",
    "        \"Invoice\",\n",
    "        ['Id'], \n",
    "        input_data[\"Invoice\"]\n",
    "     )\n",
    "\n",
    "# Item\n",
    "if input_data.get(\"Item\") is not None:\n",
    "    util.update_snapshot(\n",
    "        SNAPSHOT_DIR, \n",
    "        \"Item\",\n",
    "        ['Id'], \n",
    "        input_data[\"Item\"]\n",
    "     )\n",
    "\n",
    "# Purchase\n",
    "if input_data.get(\"Purchase\") is not None:\n",
    "    util.update_snapshot(\n",
    "        SNAPSHOT_DIR, \n",
    "        \"Purchase\",\n",
    "        ['Id'], \n",
    "        input_data[\"Purchase\"]\n",
    "     )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_data[\"Account\"] = util.get_snapshot(SNAPSHOT_DIR, \"Account\")\n",
    "input_data[\"Invoice\"] = util.get_snapshot(SNAPSHOT_DIR, \"Invoice\")\n",
    "input_data[\"Item\"] = util.get_snapshot(SNAPSHOT_DIR, \"Item\")\n",
    "input_data[\"Purchase\"] = util.get_snapshot(SNAPSHOT_DIR, \"Purchase\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Process the P&L report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "pl_df = input_data['ProfitAndLossDetailReport']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Split the class names into Department, Project, Worker Type, and Worker Role, if applicable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "split_pl = pl_df.Class.str.split(pat=\":\", expand=True)\n",
    "\n",
    "if len(split_pl.columns) == 4:\n",
    "    pl_df[['Department', 'Project', 'Worker Type', 'Worker Role']] = split_pl\n",
    "else:\n",
    "    pl_df[['Department', 'Project']] = split_pl"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create the dictionary where all new data is saved (actuals)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "actuals = {}\n",
    "\n",
    "def handle_defaults(obj, args):\n",
    "    if len(args) == 0:\n",
    "        return\n",
    "\n",
    "    arg = args[0]\n",
    "    obj.setdefault(arg, {})\n",
    "    args.pop(0)\n",
    "    handle_defaults(obj[arg], args)    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Revenues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "time data '3/17/21' does not match format '%Y-%m-%d'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-9-2d1c67bb90fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Project'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0mdocno\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Num'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m     \u001b[0mdate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     11\u001b[0m     \u001b[0mdate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misocalendar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m     \u001b[0mperiod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{date[0]}-{date[1]}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/lib64/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m    575\u001b[0m     \"\"\"Return a class cls instance based on the input string and the\n\u001b[1;32m    576\u001b[0m     format string.\"\"\"\n\u001b[0;32m--> 577\u001b[0;31m     \u001b[0mtt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff_fraction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_strptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    578\u001b[0m     \u001b[0mtzname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    579\u001b[0m     \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/lib64/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m    357\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    358\u001b[0m         raise ValueError(\"time data %r does not match format %r\" %\n\u001b[0;32m--> 359\u001b[0;31m                          (data_string, format))\n\u001b[0m\u001b[1;32m    360\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    361\u001b[0m         raise ValueError(\"unconverted data remains: %s\" %\n",
      "\u001b[0;31mValueError\u001b[0m: time data '3/17/21' does not match format '%Y-%m-%d'"
     ]
    }
   ],
   "source": [
    "invoices_df = pl_df[pl_df['TransactionType'] == 'Invoice']\n",
    "invoices = input_data['Invoice']\n",
    "items = input_data['Item']\n",
    "\n",
    "for index, row in invoices_df.iterrows():\n",
    "    categories = ast.literal_eval(row['Categories'])\n",
    "    department = row['Department']\n",
    "    project = row['Project']\n",
    "    docno = row['Num']\n",
    "    date = datetime.strptime(row['Date'], '%Y-%m-%d')\n",
    "    date = date.isocalendar()\n",
    "    period = f\"{date[0]}-{date[1]}\"\n",
    "    # Get item name\n",
    "    cat = categories[2]\n",
    "\n",
    "    invoice = invoices[invoices['DocNumber'] == docno].iloc[0]\n",
    "    invoice_line = ast.literal_eval(invoice['Line'])\n",
    "    \n",
    "    for l in invoice_line:\n",
    "        if l['DetailType'] == 'SalesItemLineDetail':\n",
    "            item = ast.literal_eval(l['SalesItemLineDetail']['ItemRef'])\n",
    "            item_name = item['name']\n",
    "            item_data = items[items['FullyQualifiedName'] == item_name].iloc[0]\n",
    "            item_name = item_name.split(\":\")\n",
    "            item_category = item_name[0]\n",
    "            item_name = item_name[1]\n",
    "        \n",
    "            handle_defaults(actuals, [department, project, 'revenues', period, cat, item_category, item_name])\n",
    "\n",
    "            actuals[department][project]['revenues'][period][cat][item_category][item_name] = {\n",
    "                'total': l['Amount'],\n",
    "                'qty': item['value'],\n",
    "                'rate': item_data['UnitPrice']\n",
    "            }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Expenses (Purchases)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "purchases = input_data['Purchase']\n",
    "purchases = gs.explode_json_to_cols(purchases, \"Line\", max_level=1)\n",
    "purchases = purchases[purchases['Line.ItemBasedExpenseLineDetail'].notnull()]\n",
    "purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail', max_level=1)\n",
    "\n",
    "purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail.ClassRef', max_level=1)\n",
    "purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail.ItemRef', max_level=1)\n",
    "\n",
    "purchases = purchases.rename(columns={\n",
    "    'Line.ItemBasedExpenseLineDetail.ClassRef.name': 'ClassName',\n",
    "    'Line.ItemBasedExpenseLineDetail.UnitPrice': 'ItemRate',\n",
    "    'Line.ItemBasedExpenseLineDetail.Qty': 'ItemQty',\n",
    "    'Line.ItemBasedExpenseLineDetail.ItemRef.name': 'ItemName'\n",
    "})\n",
    "\n",
    "purchases = purchases[purchases['ClassName'].notnull()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "expenses_df = pl_df[pl_df['TransactionType'] == 'Expense']\n",
    "\n",
    "for index, row in expenses_df.iterrows():\n",
    "    categories = ast.literal_eval(row['Categories'])\n",
    "    department = row['Department']\n",
    "    project = row['Project']\n",
    "    date = datetime.strptime(row['Date'], '%Y-%m-%d')\n",
    "    date = date.isocalendar()\n",
    "    period = f\"{date[0]}-{date[1]}\"\n",
    "    \n",
    "    # COGS\n",
    "    if categories[1] == 'Cost of Goods Sold':\n",
    "        item = categories[2]\n",
    "        \n",
    "        # Get items\n",
    "        relevant = purchases[purchases['ClassName'] == f'{department}:{project}']\n",
    "\n",
    "        for index, item_row in relevant.iterrows():            \n",
    "            item_name = item_row['ItemName']\n",
    "            item_name = item_name.split(\":\")\n",
    "            item_category = item_name[0]\n",
    "            item_name = item_name[1]\n",
    "            \n",
    "            handle_defaults(actuals, [department, project, 'cogs', period, item, item_category, item_name])\n",
    "            actuals[department][project]['cogs'][period][item][item_category][item_name] = {\n",
    "                'total': item_row['Line.Amount'],\n",
    "                'qty': item_row['ItemQty'],\n",
    "                'rate': item_row['ItemRate']\n",
    "            }\n",
    "\n",
    "    # Costs\n",
    "    if categories[1] == 'Expenses' and row['Worker Type'] is None:\n",
    "        print(categories)\n",
    "        expense_type = categories[2]\n",
    "        item_cat = categories[3]\n",
    "        item = categories[4]\n",
    "        handle_defaults(actuals, [department, project, 'costs', period, expense_type, item_cat, item])\n",
    "        actuals[department][project]['costs'][period][expense_type][item_cat][item] = row['Amount']\n",
    "    \n",
    "    # Labor\n",
    "    if categories[1] == 'Expenses' and row['Worker Type'] is not None:\n",
    "        worker_type = row['Worker Type']\n",
    "        worker_role = row['Worker Role']\n",
    "        item_cat = categories[2]\n",
    "\n",
    "        handle_defaults(actuals, [department, project, 'labor', period, worker_type, worker_role, item_cat])\n",
    "        actuals[department][project]['labor'][period][worker_type][worker_role][item_cat] = row['Amount']        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Preview the result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(json.dumps(actuals, indent=4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Export the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(f'{OUTPUT_DIR}/actuals.json', 'w') as outfile:\n",
    "    json.dump(actuals, outfile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# QBO Transform Script #\n",
	"This transformation script converts P&L report into an actuals.json to that has categorized transactions by department/project."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import gluestick as gs\n",
	"import pandas as pd\n",
	"import os\n",
	"import json\n",
	"import ast\n",
	"from datetime import datetime\n",
	"from lib import util"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Let's establish the standard hotglue input/output directories"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"# standard directory for hotglue\n",
	"ROOT_DIR = os.environ.get(\"ROOT_DIR\", \".\")\n",
	"INPUT_DIR = f\"{ROOT_DIR}/sync-output\"\n",
	"OUTPUT_DIR = f\"{ROOT_DIR}/etl-output\"\n",
	"SNAPSHOT_DIR = f\"{ROOT_DIR}/snapshots\""
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Let's start by reading the data. \n",
	"\n",
	"We will use the [gluestick](https://pypi.org/project/gluestick/) package to read the raw data in the input folder into a dictionary of pandas dataframes using the `read_csv_folder` function."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Read input data\n",
	"input_data = gs.read_csv_folder(INPUT_DIR)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Process the snapshots"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Accounts\n",
	"if input_data.get(\"Account\") is not None:\n",
	" util.update_snapshot(\n",
	" SNAPSHOT_DIR, \n",
	" \"Account\",\n",
	" ['Id'], \n",
	" input_data[\"Account\"]\n",
	" )\n",
	"\n",
	"# Invoices\n",
	"if input_data.get(\"Invoice\") is not None:\n",
	" util.update_snapshot(\n",
	" SNAPSHOT_DIR, \n",
	" \"Invoice\",\n",
	" ['Id'], \n",
	" input_data[\"Invoice\"]\n",
	" )\n",
	"\n",
	"# Item\n",
	"if input_data.get(\"Item\") is not None:\n",
	" util.update_snapshot(\n",
	" SNAPSHOT_DIR, \n",
	" \"Item\",\n",
	" ['Id'], \n",
	" input_data[\"Item\"]\n",
	" )\n",
	"\n",
	"# Purchase\n",
	"if input_data.get(\"Purchase\") is not None:\n",
	" util.update_snapshot(\n",
	" SNAPSHOT_DIR, \n",
	" \"Purchase\",\n",
	" ['Id'], \n",
	" input_data[\"Purchase\"]\n",
	" )"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"input_data[\"Account\"] = util.get_snapshot(SNAPSHOT_DIR, \"Account\")\n",
	"input_data[\"Invoice\"] = util.get_snapshot(SNAPSHOT_DIR, \"Invoice\")\n",
	"input_data[\"Item\"] = util.get_snapshot(SNAPSHOT_DIR, \"Item\")\n",
	"input_data[\"Purchase\"] = util.get_snapshot(SNAPSHOT_DIR, \"Purchase\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Process the P&L report"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"pl_df = input_data['ProfitAndLossDetailReport']"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Split the class names into Department, Project, Worker Type, and Worker Role, if applicable"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"split_pl = pl_df.Class.str.split(pat=\":\", expand=True)\n",
	"\n",
	"if len(split_pl.columns) == 4:\n",
	" pl_df[['Department', 'Project', 'Worker Type', 'Worker Role']] = split_pl\n",
	"else:\n",
	" pl_df[['Department', 'Project']] = split_pl"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Create the dictionary where all new data is saved (actuals)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"actuals = {}\n",
	"\n",
	"def handle_defaults(obj, args):\n",
	" if len(args) == 0:\n",
	" return\n",
	"\n",
	" arg = args[0]\n",
	" obj.setdefault(arg, {})\n",
	" args.pop(0)\n",
	" handle_defaults(obj[arg], args) "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Revenues"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"ename": "ValueError",
	"evalue": "time data '3/17/21' does not match format '%Y-%m-%d'",
	"output_type": "error",
	"traceback": [
	"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
	"\u001b[0;32m<ipython-input-9-2d1c67bb90fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Project'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdocno\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Num'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mdate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0mdate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misocalendar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mperiod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{date[0]}-{date[1]}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;32m/usr/lib64/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m 575\u001b[0m \"\"\"Return a class cls instance based on the input string and the\n\u001b[1;32m 576\u001b[0m format string.\"\"\"\n\u001b[0;32m--> 577\u001b[0;31m \u001b[0mtt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff_fraction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_strptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 578\u001b[0m \u001b[0mtzname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;32m/usr/lib64/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m 357\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 358\u001b[0m raise ValueError(\"time data %r does not match format %r\" %\n\u001b[0;32m--> 359\u001b[0;31m (data_string, format))\n\u001b[0m\u001b[1;32m 360\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 361\u001b[0m raise ValueError(\"unconverted data remains: %s\" %\n",
	"\u001b[0;31mValueError\u001b[0m: time data '3/17/21' does not match format '%Y-%m-%d'"
	]
	}
	],
	"source": [
	"invoices_df = pl_df[pl_df['TransactionType'] == 'Invoice']\n",
	"invoices = input_data['Invoice']\n",
	"items = input_data['Item']\n",
	"\n",
	"for index, row in invoices_df.iterrows():\n",
	" categories = ast.literal_eval(row['Categories'])\n",
	" department = row['Department']\n",
	" project = row['Project']\n",
	" docno = row['Num']\n",
	" date = datetime.strptime(row['Date'], '%Y-%m-%d')\n",
	" date = date.isocalendar()\n",
	" period = f\"{date[0]}-{date[1]}\"\n",
	" # Get item name\n",
	" cat = categories[2]\n",
	"\n",
	" invoice = invoices[invoices['DocNumber'] == docno].iloc[0]\n",
	" invoice_line = ast.literal_eval(invoice['Line'])\n",
	" \n",
	" for l in invoice_line:\n",
	" if l['DetailType'] == 'SalesItemLineDetail':\n",
	" item = ast.literal_eval(l['SalesItemLineDetail']['ItemRef'])\n",
	" item_name = item['name']\n",
	" item_data = items[items['FullyQualifiedName'] == item_name].iloc[0]\n",
	" item_name = item_name.split(\":\")\n",
	" item_category = item_name[0]\n",
	" item_name = item_name[1]\n",
	" \n",
	" handle_defaults(actuals, [department, project, 'revenues', period, cat, item_category, item_name])\n",
	"\n",
	" actuals[department][project]['revenues'][period][cat][item_category][item_name] = {\n",
	" 'total': l['Amount'],\n",
	" 'qty': item['value'],\n",
	" 'rate': item_data['UnitPrice']\n",
	" }"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Expenses (Purchases)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"purchases = input_data['Purchase']\n",
	"purchases = gs.explode_json_to_cols(purchases, \"Line\", max_level=1)\n",
	"purchases = purchases[purchases['Line.ItemBasedExpenseLineDetail'].notnull()]\n",
	"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail', max_level=1)\n",
	"\n",
	"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail.ClassRef', max_level=1)\n",
	"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail.ItemRef', max_level=1)\n",
	"\n",
	"purchases = purchases.rename(columns={\n",
	" 'Line.ItemBasedExpenseLineDetail.ClassRef.name': 'ClassName',\n",
	" 'Line.ItemBasedExpenseLineDetail.UnitPrice': 'ItemRate',\n",
	" 'Line.ItemBasedExpenseLineDetail.Qty': 'ItemQty',\n",
	" 'Line.ItemBasedExpenseLineDetail.ItemRef.name': 'ItemName'\n",
	"})\n",
	"\n",
	"purchases = purchases[purchases['ClassName'].notnull()]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"expenses_df = pl_df[pl_df['TransactionType'] == 'Expense']\n",
	"\n",
	"for index, row in expenses_df.iterrows():\n",
	" categories = ast.literal_eval(row['Categories'])\n",
	" department = row['Department']\n",
	" project = row['Project']\n",
	" date = datetime.strptime(row['Date'], '%Y-%m-%d')\n",
	" date = date.isocalendar()\n",
	" period = f\"{date[0]}-{date[1]}\"\n",
	" \n",
	" # COGS\n",
	" if categories[1] == 'Cost of Goods Sold':\n",
	" item = categories[2]\n",
	" \n",
	" # Get items\n",
	" relevant = purchases[purchases['ClassName'] == f'{department}:{project}']\n",
	"\n",
	" for index, item_row in relevant.iterrows(): \n",
	" item_name = item_row['ItemName']\n",
	" item_name = item_name.split(\":\")\n",
	" item_category = item_name[0]\n",
	" item_name = item_name[1]\n",
	" \n",
	" handle_defaults(actuals, [department, project, 'cogs', period, item, item_category, item_name])\n",
	" actuals[department][project]['cogs'][period][item][item_category][item_name] = {\n",
	" 'total': item_row['Line.Amount'],\n",
	" 'qty': item_row['ItemQty'],\n",
	" 'rate': item_row['ItemRate']\n",
	" }\n",
	"\n",
	" # Costs\n",
	" if categories[1] == 'Expenses' and row['Worker Type'] is None:\n",
	" print(categories)\n",
	" expense_type = categories[2]\n",
	" item_cat = categories[3]\n",
	" item = categories[4]\n",
	" handle_defaults(actuals, [department, project, 'costs', period, expense_type, item_cat, item])\n",
	" actuals[department][project]['costs'][period][expense_type][item_cat][item] = row['Amount']\n",
	" \n",
	" # Labor\n",
	" if categories[1] == 'Expenses' and row['Worker Type'] is not None:\n",
	" worker_type = row['Worker Type']\n",
	" worker_role = row['Worker Role']\n",
	" item_cat = categories[2]\n",
	"\n",
	" handle_defaults(actuals, [department, project, 'labor', period, worker_type, worker_role, item_cat])\n",
	" actuals[department][project]['labor'][period][worker_type][worker_role][item_cat] = row['Amount'] "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Preview the result"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"print(json.dumps(actuals, indent=4))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Export the data"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"with open(f'{OUTPUT_DIR}/actuals.json', 'w') as outfile:\n",
	" json.dump(actuals, outfile)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.9"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}