-
-
Save hsyyid/a863a13c2c1aae985ccd3d0a0d362924 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# QBO Transform Script #\n", | |
"This transformation script converts P&L report into an actuals.json to that has categorized transactions by department/project." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import gluestick as gs\n", | |
"import pandas as pd\n", | |
"import os\n", | |
"import json\n", | |
"import ast\n", | |
"from datetime import datetime\n", | |
"from lib import util" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's establish the standard hotglue input/output directories" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# standard directory for hotglue\n", | |
"ROOT_DIR = os.environ.get(\"ROOT_DIR\", \".\")\n", | |
"INPUT_DIR = f\"{ROOT_DIR}/sync-output\"\n", | |
"OUTPUT_DIR = f\"{ROOT_DIR}/etl-output\"\n", | |
"SNAPSHOT_DIR = f\"{ROOT_DIR}/snapshots\"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's start by reading the data. \n", | |
"\n", | |
"We will use the [gluestick](https://pypi.org/project/gluestick/) package to read the raw data in the input folder into a dictionary of pandas dataframes using the `read_csv_folder` function." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Read input data\n", | |
"input_data = gs.read_csv_folder(INPUT_DIR)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Process the snapshots" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Accounts\n", | |
"if input_data.get(\"Account\") is not None:\n", | |
" util.update_snapshot(\n", | |
" SNAPSHOT_DIR, \n", | |
" \"Account\",\n", | |
" ['Id'], \n", | |
" input_data[\"Account\"]\n", | |
" )\n", | |
"\n", | |
"# Invoices\n", | |
"if input_data.get(\"Invoice\") is not None:\n", | |
" util.update_snapshot(\n", | |
" SNAPSHOT_DIR, \n", | |
" \"Invoice\",\n", | |
" ['Id'], \n", | |
" input_data[\"Invoice\"]\n", | |
" )\n", | |
"\n", | |
"# Item\n", | |
"if input_data.get(\"Item\") is not None:\n", | |
" util.update_snapshot(\n", | |
" SNAPSHOT_DIR, \n", | |
" \"Item\",\n", | |
" ['Id'], \n", | |
" input_data[\"Item\"]\n", | |
" )\n", | |
"\n", | |
"# Purchase\n", | |
"if input_data.get(\"Purchase\") is not None:\n", | |
" util.update_snapshot(\n", | |
" SNAPSHOT_DIR, \n", | |
" \"Purchase\",\n", | |
" ['Id'], \n", | |
" input_data[\"Purchase\"]\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"input_data[\"Account\"] = util.get_snapshot(SNAPSHOT_DIR, \"Account\")\n", | |
"input_data[\"Invoice\"] = util.get_snapshot(SNAPSHOT_DIR, \"Invoice\")\n", | |
"input_data[\"Item\"] = util.get_snapshot(SNAPSHOT_DIR, \"Item\")\n", | |
"input_data[\"Purchase\"] = util.get_snapshot(SNAPSHOT_DIR, \"Purchase\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Process the P&L report" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pl_df = input_data['ProfitAndLossDetailReport']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Split the class names into Department, Project, Worker Type, and Worker Role, if applicable" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"split_pl = pl_df.Class.str.split(pat=\":\", expand=True)\n", | |
"\n", | |
"if len(split_pl.columns) == 4:\n", | |
" pl_df[['Department', 'Project', 'Worker Type', 'Worker Role']] = split_pl\n", | |
"else:\n", | |
" pl_df[['Department', 'Project']] = split_pl" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Create the dictionary where all new data is saved (actuals)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"actuals = {}\n", | |
"\n", | |
"def handle_defaults(obj, args):\n", | |
" if len(args) == 0:\n", | |
" return\n", | |
"\n", | |
" arg = args[0]\n", | |
" obj.setdefault(arg, {})\n", | |
" args.pop(0)\n", | |
" handle_defaults(obj[arg], args) " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Revenues" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "time data '3/17/21' does not match format '%Y-%m-%d'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-9-2d1c67bb90fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Project'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdocno\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Num'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mdate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0mdate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misocalendar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mperiod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{date[0]}-{date[1]}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib64/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m 575\u001b[0m \"\"\"Return a class cls instance based on the input string and the\n\u001b[1;32m 576\u001b[0m format string.\"\"\"\n\u001b[0;32m--> 577\u001b[0;31m \u001b[0mtt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff_fraction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_strptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 578\u001b[0m \u001b[0mtzname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib64/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m 357\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 358\u001b[0m raise ValueError(\"time data %r does not match format %r\" %\n\u001b[0;32m--> 359\u001b[0;31m (data_string, format))\n\u001b[0m\u001b[1;32m 360\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 361\u001b[0m raise ValueError(\"unconverted data remains: %s\" %\n", | |
"\u001b[0;31mValueError\u001b[0m: time data '3/17/21' does not match format '%Y-%m-%d'" | |
] | |
} | |
], | |
"source": [ | |
"invoices_df = pl_df[pl_df['TransactionType'] == 'Invoice']\n", | |
"invoices = input_data['Invoice']\n", | |
"items = input_data['Item']\n", | |
"\n", | |
"for index, row in invoices_df.iterrows():\n", | |
" categories = ast.literal_eval(row['Categories'])\n", | |
" department = row['Department']\n", | |
" project = row['Project']\n", | |
" docno = row['Num']\n", | |
" date = datetime.strptime(row['Date'], '%Y-%m-%d')\n", | |
" date = date.isocalendar()\n", | |
" period = f\"{date[0]}-{date[1]}\"\n", | |
" # Get item name\n", | |
" cat = categories[2]\n", | |
"\n", | |
" invoice = invoices[invoices['DocNumber'] == docno].iloc[0]\n", | |
" invoice_line = ast.literal_eval(invoice['Line'])\n", | |
" \n", | |
" for l in invoice_line:\n", | |
" if l['DetailType'] == 'SalesItemLineDetail':\n", | |
" item = ast.literal_eval(l['SalesItemLineDetail']['ItemRef'])\n", | |
" item_name = item['name']\n", | |
" item_data = items[items['FullyQualifiedName'] == item_name].iloc[0]\n", | |
" item_name = item_name.split(\":\")\n", | |
" item_category = item_name[0]\n", | |
" item_name = item_name[1]\n", | |
" \n", | |
" handle_defaults(actuals, [department, project, 'revenues', period, cat, item_category, item_name])\n", | |
"\n", | |
" actuals[department][project]['revenues'][period][cat][item_category][item_name] = {\n", | |
" 'total': l['Amount'],\n", | |
" 'qty': item['value'],\n", | |
" 'rate': item_data['UnitPrice']\n", | |
" }" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Expenses (Purchases)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"purchases = input_data['Purchase']\n", | |
"purchases = gs.explode_json_to_cols(purchases, \"Line\", max_level=1)\n", | |
"purchases = purchases[purchases['Line.ItemBasedExpenseLineDetail'].notnull()]\n", | |
"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail', max_level=1)\n", | |
"\n", | |
"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail.ClassRef', max_level=1)\n", | |
"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail.ItemRef', max_level=1)\n", | |
"\n", | |
"purchases = purchases.rename(columns={\n", | |
" 'Line.ItemBasedExpenseLineDetail.ClassRef.name': 'ClassName',\n", | |
" 'Line.ItemBasedExpenseLineDetail.UnitPrice': 'ItemRate',\n", | |
" 'Line.ItemBasedExpenseLineDetail.Qty': 'ItemQty',\n", | |
" 'Line.ItemBasedExpenseLineDetail.ItemRef.name': 'ItemName'\n", | |
"})\n", | |
"\n", | |
"purchases = purchases[purchases['ClassName'].notnull()]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"expenses_df = pl_df[pl_df['TransactionType'] == 'Expense']\n", | |
"\n", | |
"for index, row in expenses_df.iterrows():\n", | |
" categories = ast.literal_eval(row['Categories'])\n", | |
" department = row['Department']\n", | |
" project = row['Project']\n", | |
" date = datetime.strptime(row['Date'], '%Y-%m-%d')\n", | |
" date = date.isocalendar()\n", | |
" period = f\"{date[0]}-{date[1]}\"\n", | |
" \n", | |
" # COGS\n", | |
" if categories[1] == 'Cost of Goods Sold':\n", | |
" item = categories[2]\n", | |
" \n", | |
" # Get items\n", | |
" relevant = purchases[purchases['ClassName'] == f'{department}:{project}']\n", | |
"\n", | |
" for index, item_row in relevant.iterrows(): \n", | |
" item_name = item_row['ItemName']\n", | |
" item_name = item_name.split(\":\")\n", | |
" item_category = item_name[0]\n", | |
" item_name = item_name[1]\n", | |
" \n", | |
" handle_defaults(actuals, [department, project, 'cogs', period, item, item_category, item_name])\n", | |
" actuals[department][project]['cogs'][period][item][item_category][item_name] = {\n", | |
" 'total': item_row['Line.Amount'],\n", | |
" 'qty': item_row['ItemQty'],\n", | |
" 'rate': item_row['ItemRate']\n", | |
" }\n", | |
"\n", | |
" # Costs\n", | |
" if categories[1] == 'Expenses' and row['Worker Type'] is None:\n", | |
" print(categories)\n", | |
" expense_type = categories[2]\n", | |
" item_cat = categories[3]\n", | |
" item = categories[4]\n", | |
" handle_defaults(actuals, [department, project, 'costs', period, expense_type, item_cat, item])\n", | |
" actuals[department][project]['costs'][period][expense_type][item_cat][item] = row['Amount']\n", | |
" \n", | |
" # Labor\n", | |
" if categories[1] == 'Expenses' and row['Worker Type'] is not None:\n", | |
" worker_type = row['Worker Type']\n", | |
" worker_role = row['Worker Role']\n", | |
" item_cat = categories[2]\n", | |
"\n", | |
" handle_defaults(actuals, [department, project, 'labor', period, worker_type, worker_role, item_cat])\n", | |
" actuals[department][project]['labor'][period][worker_type][worker_role][item_cat] = row['Amount'] " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Preview the result" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"print(json.dumps(actuals, indent=4))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Export the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open(f'{OUTPUT_DIR}/actuals.json', 'w') as outfile:\n", | |
" json.dump(actuals, outfile)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment