Last active
November 16, 2022 22:32
-
-
Save nevsan/337184160b130fa7cba56ef4de906eeb to your computer and use it in GitHub Desktop.
Export data from Asana
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "wireless-custom", | |
"metadata": { | |
"code_folding": [ | |
0 | |
] | |
}, | |
"outputs": [], | |
"source": [ | |
"# ==== Imports ====\n", | |
"\n", | |
"import csv\n", | |
"import datetime\n", | |
"import functools\n", | |
"import json\n", | |
"from multiprocessing import Pool\n", | |
"from pathlib import Path\n", | |
"\n", | |
"# pip install asana requests tqdm\n", | |
"import asana\n", | |
"import requests\n", | |
"from tqdm.auto import tqdm" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "southeast-aging", | |
"metadata": {}, | |
"source": [ | |
"# Download Raw Data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "informed-trick", | |
"metadata": {}, | |
"source": [ | |
"## Setup" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "complimentary-vatican", | |
"metadata": { | |
"code_folding": [ | |
0 | |
] | |
}, | |
"outputs": [], | |
"source": [ | |
"# ==== Functions ====\n", | |
"\n", | |
"TASK_POOL_SIZE = 32\n", | |
"\n", | |
"\n", | |
"def get_client(personal_access_token):\n", | |
" \"\"\"Create an Asana client with the given personal access token.\"\"\"\n", | |
" # Construct an Asana client\n", | |
" client = asana.Client.access_token(personal_access_token)\n", | |
"\n", | |
" # Silence new project templates depreciation warning\n", | |
" client.headers[\"Asana-Disable\"] = \"new_project_templates,new_user_task_lists\"\n", | |
"\n", | |
" return client\n", | |
"\n", | |
"\n", | |
"def get_task(*, client, task_gid, attachment_dir):\n", | |
" \"\"\"Get all information about a task and its subtasks, including:\n", | |
" - Task metadata\n", | |
" - Stories\n", | |
" - Attachments\n", | |
" \"\"\"\n", | |
" # Get detailed task data\n", | |
" output = client.tasks.get_task(task_gid)\n", | |
"\n", | |
" # Get all stories\n", | |
" output[\"stories\"] = list(client.stories.get_stories_for_task(task_gid))\n", | |
"\n", | |
" # Download all attachments\n", | |
" attachments = []\n", | |
" for attachment in client.attachments.find_by_task(task_gid):\n", | |
" attachment_data = client.attachments.find_by_id(attachment[\"gid\"])\n", | |
" attachments.append(attachment_data)\n", | |
" url = attachment_data[\"download_url\"]\n", | |
" if attachment_dir and url:\n", | |
" out_dir = Path(attachment_dir) / attachment[\"gid\"]\n", | |
" out_dir.mkdir(parents=True, exist_ok=True)\n", | |
" \n", | |
" # Sanitize filenames that may have slashes/urls\n", | |
" attachment_filename = attachment_data[\"name\"].split('\\n')[0].split('/')[-1]\n", | |
" filename = out_dir / attachment_filename\n", | |
" attachment_data[\"local_path\"] = str(filename)\n", | |
" try:\n", | |
" resp = requests.get(url)\n", | |
" resp.raise_for_status()\n", | |
" except Exception as exc:\n", | |
" print(\n", | |
" f\"Could not download attachment '{attachment_data['gid']}' from '{url}'\"\n", | |
" )\n", | |
" continue\n", | |
"\n", | |
" with filename.open(\"wb\") as f:\n", | |
" f.write(resp.content)\n", | |
" output[\"attachments\"] = attachments\n", | |
"\n", | |
" # Get subtasks\n", | |
" output[\"subtasks\"] = [\n", | |
" get_task(client=client, task_gid=t[\"gid\"], attachment_dir=attachment_dir)\n", | |
" for t in client.tasks.get_subtasks_for_task(task_gid)\n", | |
" ]\n", | |
"\n", | |
" return output\n", | |
"\n", | |
"\n", | |
"def task_helper(task_gid, attachment_dir):\n", | |
" # When using a process pool, each process will get its own copy of this function.\n", | |
" # Each process has to login independently, so we'll use a \"static\" variable to store\n", | |
" # the client.\n", | |
" if not hasattr(task_helper, \"client\"):\n", | |
" task_helper.client = get_client(personal_access_token)\n", | |
" client = task_helper.client\n", | |
" return get_task(client=client, task_gid=task_gid, attachment_dir=attachment_dir)\n", | |
"\n", | |
"\n", | |
"def get_project(*, client, project_gid, attachment_dir, mt=TASK_POOL_SIZE):\n", | |
" output_data = client.projects.get_project(project_gid)\n", | |
" tasks = client.tasks.get_tasks_for_project(project_gid)\n", | |
"\n", | |
" results = []\n", | |
" task_data = []\n", | |
" if mt == 1:\n", | |
" task_data = [\n", | |
" get_task(client=client, task_gid=task[\"gid\"], attachment_dir=attachment_dir)\n", | |
" for task in tqdm(list(tasks))\n", | |
" ]\n", | |
" else:\n", | |
" with Pool(TASK_POOL_SIZE) as p:\n", | |
" for task in tasks:\n", | |
" results.append(\n", | |
" p.apply_async(task_helper, (task[\"gid\"], attachment_dir))\n", | |
" )\n", | |
"\n", | |
" for res in tqdm(results):\n", | |
" task_data.append(res.get())\n", | |
" output_data[\"tasks\"] = task_data\n", | |
"\n", | |
" return output_data\n", | |
"\n", | |
"\n", | |
"def export_project(\n", | |
" *, client, project, output_dir, skip_existing=False, mt=TASK_POOL_SIZE\n", | |
"):\n", | |
" project_name = project['name'].replace('/', '-')\n", | |
" project_dir = output_dir / f\"{project['gid']} - {project_name}\"\n", | |
" attachment_dir = project_dir / \"attachments\"\n", | |
" data_path = project_dir / \"data.json\"\n", | |
" \n", | |
" if skip_existing and data_path.exists():\n", | |
" return project_dir\n", | |
"\n", | |
" project_data = get_project(\n", | |
" client=client, project_gid=project[\"gid\"], attachment_dir=attachment_dir, mt=mt\n", | |
" )\n", | |
"\n", | |
" project_dir.mkdir(parents=True, exist_ok=True)\n", | |
" with data_path.open(\"wt\") as f:\n", | |
" json.dump(project_data, f, indent=4)\n", | |
" return project_dir" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "finite-lindsay", | |
"metadata": { | |
"code_folding": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"# ==== Setup ====\n", | |
"\n", | |
"# Note: Replace this value with your own personal access token\n", | |
"personal_access_token = \"<INSERT TOKEN>\"\n", | |
"\n", | |
"# Construct an Asana client\n", | |
"client = get_client(personal_access_token)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "comic-particle", | |
"metadata": { | |
"code_folding": [ | |
0 | |
] | |
}, | |
"outputs": [], | |
"source": [ | |
"# ==== Find all projects to download ===\n", | |
"\n", | |
"# Directory for saving all data\n", | |
"root_dir = Path(\"asana_data\")\n", | |
"\n", | |
"# Find all projects\n", | |
"project_cache = root_dir / \"projects.json\"\n", | |
"if project_cache.exists():\n", | |
" with project_cache.open('rt') as f:\n", | |
" projects = json.load(f)\n", | |
"else:\n", | |
" # Find the BNI Workspace\n", | |
" bni_workspace = next(\n", | |
" w for w in client.workspaces.get_workspaces() if w[\"name\"] == \"butterflynetinc.com\"\n", | |
" )\n", | |
" \n", | |
" projects = list(client.projects.find_by_workspace(bni_workspace[\"gid\"]))\n", | |
" with project_cache.open('wt') as f:\n", | |
" json.dump(projects, f, indent=4)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "undefined-antigua", | |
"metadata": {}, | |
"source": [ | |
"## Download" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "golden-flexibility", | |
"metadata": { | |
"code_folding": [ | |
0 | |
] | |
}, | |
"outputs": [], | |
"source": [ | |
"# ==== Download all projects ====\n", | |
"dirs = []\n", | |
"\n", | |
"for project in tqdm(projects):\n", | |
" dirs.append(\n", | |
" export_project(\n", | |
" client=client, project=project, output_dir=root_dir, skip_existing=True\n", | |
" )\n", | |
" )\n", | |
" print(dirs[-1])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "hourly-budget", | |
"metadata": {}, | |
"source": [ | |
"# Summarize Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "modern-shanghai", | |
"metadata": { | |
"code_folding": [ | |
0 | |
] | |
}, | |
"outputs": [], | |
"source": [ | |
"# ==== Functions ====\n", | |
"def summarize_task(task, parent=None):\n", | |
" name = (parent + \"/\" if parent else \"\") + task[\"name\"]\n", | |
"\n", | |
" def get_attachment(story):\n", | |
" if \"attachment_added\" not in story[\"resource_subtype\"]:\n", | |
" return\n", | |
" asset_id = story[\"text\"].split(\"attached \")[-1].split(\"asset_id=\")[-1]\n", | |
" try:\n", | |
" attachment = next(a for a in task[\"attachments\"] if a[\"gid\"] == asset_id)\n", | |
" except StopIteration:\n", | |
" print(f\"No attachment metadata for {asset_id}\")\n", | |
" print(story)\n", | |
" return\n", | |
" if \"local_path\" not in attachment:\n", | |
" return attachment[\"permanent_url\"]\n", | |
" return attachment[\"local_path\"]\n", | |
"\n", | |
" summary = [\n", | |
" {\n", | |
" \"name\": name,\n", | |
" \"user\": (task[\"assignee\"] or {}).get(\"name\"),\n", | |
" \"time\": task[\"created_at\"],\n", | |
" \"text\": task[\"notes\"],\n", | |
" \"attachment\": None,\n", | |
" }\n", | |
" ]\n", | |
" summary.extend([\n", | |
" {\n", | |
" \"name\": name,\n", | |
" \"user\": (story[\"created_by\"] or {}).get(\"name\"),\n", | |
" \"time\": story[\"created_at\"],\n", | |
" \"text\": story.get(\"text\"),\n", | |
" \"attachment\": get_attachment(story),\n", | |
" }\n", | |
" for story in task[\"stories\"]\n", | |
" ])\n", | |
" for subtask in task[\"subtasks\"]:\n", | |
" summary.extend(summarize_task(subtask, parent=name))\n", | |
" return summary" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "harmful-premises", | |
"metadata": { | |
"code_folding": [ | |
0 | |
] | |
}, | |
"outputs": [], | |
"source": [ | |
"# ==== Summarize Projects ====\n", | |
"\n", | |
"root_dir = Path(\"asana_data\")\n", | |
"for path in tqdm(list(root_dir.iterdir())):\n", | |
" if not path.is_dir():\n", | |
" continue\n", | |
"\n", | |
" with open(path / \"data.json\") as f:\n", | |
" project_data = json.load(f)\n", | |
"\n", | |
" summary = sum((summarize_task(task) for task in project_data[\"tasks\"]), [])\n", | |
"\n", | |
" if not summary:\n", | |
" continue\n", | |
"\n", | |
" with open(path / \"summary.csv\", \"wt\") as f:\n", | |
" writer = csv.DictWriter(f, summary[0].keys())\n", | |
" writer.writeheader()\n", | |
" writer.writerows(summary)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment