Skip to content

Instantly share code, notes, and snippets.

@hsyyid
Last active May 5, 2022 16:58
Show Gist options
  • Save hsyyid/a863a13c2c1aae985ccd3d0a0d362924 to your computer and use it in GitHub Desktop.
Save hsyyid/a863a13c2c1aae985ccd3d0a0d362924 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# QBO Transform Script #\n",
"This transformation script converts P&L report into an actuals.json to that has categorized transactions by department/project."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import gluestick as gs\n",
"import pandas as pd\n",
"import os\n",
"import json\n",
"import ast\n",
"from datetime import datetime\n",
"from lib import util"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's establish the standard hotglue input/output directories"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# standard directory for hotglue\n",
"ROOT_DIR = os.environ.get(\"ROOT_DIR\", \".\")\n",
"INPUT_DIR = f\"{ROOT_DIR}/sync-output\"\n",
"OUTPUT_DIR = f\"{ROOT_DIR}/etl-output\"\n",
"SNAPSHOT_DIR = f\"{ROOT_DIR}/snapshots\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's start by reading the data. \n",
"\n",
"We will use the [gluestick](https://pypi.org/project/gluestick/) package to read the raw data in the input folder into a dictionary of pandas dataframes using the `read_csv_folder` function."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Read input data\n",
"input_data = gs.read_csv_folder(INPUT_DIR)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Process the snapshots"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Accounts\n",
"if input_data.get(\"Account\") is not None:\n",
" util.update_snapshot(\n",
" SNAPSHOT_DIR, \n",
" \"Account\",\n",
" ['Id'], \n",
" input_data[\"Account\"]\n",
" )\n",
"\n",
"# Invoices\n",
"if input_data.get(\"Invoice\") is not None:\n",
" util.update_snapshot(\n",
" SNAPSHOT_DIR, \n",
" \"Invoice\",\n",
" ['Id'], \n",
" input_data[\"Invoice\"]\n",
" )\n",
"\n",
"# Item\n",
"if input_data.get(\"Item\") is not None:\n",
" util.update_snapshot(\n",
" SNAPSHOT_DIR, \n",
" \"Item\",\n",
" ['Id'], \n",
" input_data[\"Item\"]\n",
" )\n",
"\n",
"# Purchase\n",
"if input_data.get(\"Purchase\") is not None:\n",
" util.update_snapshot(\n",
" SNAPSHOT_DIR, \n",
" \"Purchase\",\n",
" ['Id'], \n",
" input_data[\"Purchase\"]\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"input_data[\"Account\"] = util.get_snapshot(SNAPSHOT_DIR, \"Account\")\n",
"input_data[\"Invoice\"] = util.get_snapshot(SNAPSHOT_DIR, \"Invoice\")\n",
"input_data[\"Item\"] = util.get_snapshot(SNAPSHOT_DIR, \"Item\")\n",
"input_data[\"Purchase\"] = util.get_snapshot(SNAPSHOT_DIR, \"Purchase\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Process the P&L report"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"pl_df = input_data['ProfitAndLossDetailReport']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Split the class names into Department, Project, Worker Type, and Worker Role, if applicable"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"split_pl = pl_df.Class.str.split(pat=\":\", expand=True)\n",
"\n",
"if len(split_pl.columns) == 4:\n",
" pl_df[['Department', 'Project', 'Worker Type', 'Worker Role']] = split_pl\n",
"else:\n",
" pl_df[['Department', 'Project']] = split_pl"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the dictionary where all new data is saved (actuals)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"actuals = {}\n",
"\n",
"def handle_defaults(obj, args):\n",
" if len(args) == 0:\n",
" return\n",
"\n",
" arg = args[0]\n",
" obj.setdefault(arg, {})\n",
" args.pop(0)\n",
" handle_defaults(obj[arg], args) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Revenues"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "time data '3/17/21' does not match format '%Y-%m-%d'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-9-2d1c67bb90fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Project'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdocno\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Num'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mdate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0mdate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misocalendar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mperiod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{date[0]}-{date[1]}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib64/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m 575\u001b[0m \"\"\"Return a class cls instance based on the input string and the\n\u001b[1;32m 576\u001b[0m format string.\"\"\"\n\u001b[0;32m--> 577\u001b[0;31m \u001b[0mtt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff_fraction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_strptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 578\u001b[0m \u001b[0mtzname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib64/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m 357\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 358\u001b[0m raise ValueError(\"time data %r does not match format %r\" %\n\u001b[0;32m--> 359\u001b[0;31m (data_string, format))\n\u001b[0m\u001b[1;32m 360\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 361\u001b[0m raise ValueError(\"unconverted data remains: %s\" %\n",
"\u001b[0;31mValueError\u001b[0m: time data '3/17/21' does not match format '%Y-%m-%d'"
]
}
],
"source": [
"invoices_df = pl_df[pl_df['TransactionType'] == 'Invoice']\n",
"invoices = input_data['Invoice']\n",
"items = input_data['Item']\n",
"\n",
"for index, row in invoices_df.iterrows():\n",
" categories = ast.literal_eval(row['Categories'])\n",
" department = row['Department']\n",
" project = row['Project']\n",
" docno = row['Num']\n",
" date = datetime.strptime(row['Date'], '%Y-%m-%d')\n",
" date = date.isocalendar()\n",
" period = f\"{date[0]}-{date[1]}\"\n",
" # Get item name\n",
" cat = categories[2]\n",
"\n",
" invoice = invoices[invoices['DocNumber'] == docno].iloc[0]\n",
" invoice_line = ast.literal_eval(invoice['Line'])\n",
" \n",
" for l in invoice_line:\n",
" if l['DetailType'] == 'SalesItemLineDetail':\n",
" item = ast.literal_eval(l['SalesItemLineDetail']['ItemRef'])\n",
" item_name = item['name']\n",
" item_data = items[items['FullyQualifiedName'] == item_name].iloc[0]\n",
" item_name = item_name.split(\":\")\n",
" item_category = item_name[0]\n",
" item_name = item_name[1]\n",
" \n",
" handle_defaults(actuals, [department, project, 'revenues', period, cat, item_category, item_name])\n",
"\n",
" actuals[department][project]['revenues'][period][cat][item_category][item_name] = {\n",
" 'total': l['Amount'],\n",
" 'qty': item['value'],\n",
" 'rate': item_data['UnitPrice']\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Expenses (Purchases)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"purchases = input_data['Purchase']\n",
"purchases = gs.explode_json_to_cols(purchases, \"Line\", max_level=1)\n",
"purchases = purchases[purchases['Line.ItemBasedExpenseLineDetail'].notnull()]\n",
"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail', max_level=1)\n",
"\n",
"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail.ClassRef', max_level=1)\n",
"purchases = gs.explode_json_to_cols(purchases, 'Line.ItemBasedExpenseLineDetail.ItemRef', max_level=1)\n",
"\n",
"purchases = purchases.rename(columns={\n",
" 'Line.ItemBasedExpenseLineDetail.ClassRef.name': 'ClassName',\n",
" 'Line.ItemBasedExpenseLineDetail.UnitPrice': 'ItemRate',\n",
" 'Line.ItemBasedExpenseLineDetail.Qty': 'ItemQty',\n",
" 'Line.ItemBasedExpenseLineDetail.ItemRef.name': 'ItemName'\n",
"})\n",
"\n",
"purchases = purchases[purchases['ClassName'].notnull()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"expenses_df = pl_df[pl_df['TransactionType'] == 'Expense']\n",
"\n",
"for index, row in expenses_df.iterrows():\n",
" categories = ast.literal_eval(row['Categories'])\n",
" department = row['Department']\n",
" project = row['Project']\n",
" date = datetime.strptime(row['Date'], '%Y-%m-%d')\n",
" date = date.isocalendar()\n",
" period = f\"{date[0]}-{date[1]}\"\n",
" \n",
" # COGS\n",
" if categories[1] == 'Cost of Goods Sold':\n",
" item = categories[2]\n",
" \n",
" # Get items\n",
" relevant = purchases[purchases['ClassName'] == f'{department}:{project}']\n",
"\n",
" for index, item_row in relevant.iterrows(): \n",
" item_name = item_row['ItemName']\n",
" item_name = item_name.split(\":\")\n",
" item_category = item_name[0]\n",
" item_name = item_name[1]\n",
" \n",
" handle_defaults(actuals, [department, project, 'cogs', period, item, item_category, item_name])\n",
" actuals[department][project]['cogs'][period][item][item_category][item_name] = {\n",
" 'total': item_row['Line.Amount'],\n",
" 'qty': item_row['ItemQty'],\n",
" 'rate': item_row['ItemRate']\n",
" }\n",
"\n",
" # Costs\n",
" if categories[1] == 'Expenses' and row['Worker Type'] is None:\n",
" print(categories)\n",
" expense_type = categories[2]\n",
" item_cat = categories[3]\n",
" item = categories[4]\n",
" handle_defaults(actuals, [department, project, 'costs', period, expense_type, item_cat, item])\n",
" actuals[department][project]['costs'][period][expense_type][item_cat][item] = row['Amount']\n",
" \n",
" # Labor\n",
" if categories[1] == 'Expenses' and row['Worker Type'] is not None:\n",
" worker_type = row['Worker Type']\n",
" worker_role = row['Worker Role']\n",
" item_cat = categories[2]\n",
"\n",
" handle_defaults(actuals, [department, project, 'labor', period, worker_type, worker_role, item_cat])\n",
" actuals[department][project]['labor'][period][worker_type][worker_role][item_cat] = row['Amount'] "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Preview the result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(json.dumps(actuals, indent=4))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Export the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(f'{OUTPUT_DIR}/actuals.json', 'w') as outfile:\n",
" json.dump(actuals, outfile)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment