Skip to content

Instantly share code, notes, and snippets.

@nevsan
Last active November 16, 2022 22:32
Show Gist options
  • Save nevsan/337184160b130fa7cba56ef4de906eeb to your computer and use it in GitHub Desktop.
Save nevsan/337184160b130fa7cba56ef4de906eeb to your computer and use it in GitHub Desktop.
Export data from Asana
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "wireless-custom",
"metadata": {
"code_folding": [
0
]
},
"outputs": [],
"source": [
"# ==== Imports ====\n",
"\n",
"import csv\n",
"import datetime\n",
"import functools\n",
"import json\n",
"from multiprocessing import Pool\n",
"from pathlib import Path\n",
"\n",
"# pip install asana requests tqdm\n",
"import asana\n",
"import requests\n",
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "markdown",
"id": "southeast-aging",
"metadata": {},
"source": [
"# Download Raw Data"
]
},
{
"cell_type": "markdown",
"id": "informed-trick",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "complimentary-vatican",
"metadata": {
"code_folding": [
0
]
},
"outputs": [],
"source": [
"# ==== Functions ====\n",
"\n",
"TASK_POOL_SIZE = 32\n",
"\n",
"\n",
"def get_client(personal_access_token):\n",
" \"\"\"Create an Asana client with the given personal access token.\"\"\"\n",
" # Construct an Asana client\n",
" client = asana.Client.access_token(personal_access_token)\n",
"\n",
" # Silence new project templates depreciation warning\n",
" client.headers[\"Asana-Disable\"] = \"new_project_templates,new_user_task_lists\"\n",
"\n",
" return client\n",
"\n",
"\n",
"def get_task(*, client, task_gid, attachment_dir):\n",
" \"\"\"Get all information about a task and its subtasks, including:\n",
" - Task metadata\n",
" - Stories\n",
" - Attachments\n",
" \"\"\"\n",
" # Get detailed task data\n",
" output = client.tasks.get_task(task_gid)\n",
"\n",
" # Get all stories\n",
" output[\"stories\"] = list(client.stories.get_stories_for_task(task_gid))\n",
"\n",
" # Download all attachments\n",
" attachments = []\n",
" for attachment in client.attachments.find_by_task(task_gid):\n",
" attachment_data = client.attachments.find_by_id(attachment[\"gid\"])\n",
" attachments.append(attachment_data)\n",
" url = attachment_data[\"download_url\"]\n",
" if attachment_dir and url:\n",
" out_dir = Path(attachment_dir) / attachment[\"gid\"]\n",
" out_dir.mkdir(parents=True, exist_ok=True)\n",
" \n",
" # Sanitize filenames that may have slashes/urls\n",
" attachment_filename = attachment_data[\"name\"].split('\\n')[0].split('/')[-1]\n",
" filename = out_dir / attachment_filename\n",
" attachment_data[\"local_path\"] = str(filename)\n",
" try:\n",
" resp = requests.get(url)\n",
" resp.raise_for_status()\n",
" except Exception as exc:\n",
" print(\n",
" f\"Could not download attachment '{attachment_data['gid']}' from '{url}'\"\n",
" )\n",
" continue\n",
"\n",
" with filename.open(\"wb\") as f:\n",
" f.write(resp.content)\n",
" output[\"attachments\"] = attachments\n",
"\n",
" # Get subtasks\n",
" output[\"subtasks\"] = [\n",
" get_task(client=client, task_gid=t[\"gid\"], attachment_dir=attachment_dir)\n",
" for t in client.tasks.get_subtasks_for_task(task_gid)\n",
" ]\n",
"\n",
" return output\n",
"\n",
"\n",
"def task_helper(task_gid, attachment_dir):\n",
" # When using a process pool, each process will get its own copy of this function.\n",
" # Each process has to login independently, so we'll use a \"static\" variable to store\n",
" # the client.\n",
" if not hasattr(task_helper, \"client\"):\n",
" task_helper.client = get_client(personal_access_token)\n",
" client = task_helper.client\n",
" return get_task(client=client, task_gid=task_gid, attachment_dir=attachment_dir)\n",
"\n",
"\n",
"def get_project(*, client, project_gid, attachment_dir, mt=TASK_POOL_SIZE):\n",
" output_data = client.projects.get_project(project_gid)\n",
" tasks = client.tasks.get_tasks_for_project(project_gid)\n",
"\n",
" results = []\n",
" task_data = []\n",
" if mt == 1:\n",
" task_data = [\n",
" get_task(client=client, task_gid=task[\"gid\"], attachment_dir=attachment_dir)\n",
" for task in tqdm(list(tasks))\n",
" ]\n",
" else:\n",
" with Pool(TASK_POOL_SIZE) as p:\n",
" for task in tasks:\n",
" results.append(\n",
" p.apply_async(task_helper, (task[\"gid\"], attachment_dir))\n",
" )\n",
"\n",
" for res in tqdm(results):\n",
" task_data.append(res.get())\n",
" output_data[\"tasks\"] = task_data\n",
"\n",
" return output_data\n",
"\n",
"\n",
"def export_project(\n",
" *, client, project, output_dir, skip_existing=False, mt=TASK_POOL_SIZE\n",
"):\n",
" project_name = project['name'].replace('/', '-')\n",
" project_dir = output_dir / f\"{project['gid']} - {project_name}\"\n",
" attachment_dir = project_dir / \"attachments\"\n",
" data_path = project_dir / \"data.json\"\n",
" \n",
" if skip_existing and data_path.exists():\n",
" return project_dir\n",
"\n",
" project_data = get_project(\n",
" client=client, project_gid=project[\"gid\"], attachment_dir=attachment_dir, mt=mt\n",
" )\n",
"\n",
" project_dir.mkdir(parents=True, exist_ok=True)\n",
" with data_path.open(\"wt\") as f:\n",
" json.dump(project_data, f, indent=4)\n",
" return project_dir"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "finite-lindsay",
"metadata": {
"code_folding": []
},
"outputs": [],
"source": [
"# ==== Setup ====\n",
"\n",
"# Note: Replace this value with your own personal access token\n",
"personal_access_token = \"<INSERT TOKEN>\"\n",
"\n",
"# Construct an Asana client\n",
"client = get_client(personal_access_token)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "comic-particle",
"metadata": {
"code_folding": [
0
]
},
"outputs": [],
"source": [
"# ==== Find all projects to download ===\n",
"\n",
"# Directory for saving all data\n",
"root_dir = Path(\"asana_data\")\n",
"\n",
"# Find all projects\n",
"project_cache = root_dir / \"projects.json\"\n",
"if project_cache.exists():\n",
" with project_cache.open('rt') as f:\n",
" projects = json.load(f)\n",
"else:\n",
" # Find the BNI Workspace\n",
" bni_workspace = next(\n",
" w for w in client.workspaces.get_workspaces() if w[\"name\"] == \"butterflynetinc.com\"\n",
" )\n",
" \n",
" projects = list(client.projects.find_by_workspace(bni_workspace[\"gid\"]))\n",
" with project_cache.open('wt') as f:\n",
" json.dump(projects, f, indent=4)"
]
},
{
"cell_type": "markdown",
"id": "undefined-antigua",
"metadata": {},
"source": [
"## Download"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "golden-flexibility",
"metadata": {
"code_folding": [
0
]
},
"outputs": [],
"source": [
"# ==== Download all projects ====\n",
"dirs = []\n",
"\n",
"for project in tqdm(projects):\n",
" dirs.append(\n",
" export_project(\n",
" client=client, project=project, output_dir=root_dir, skip_existing=True\n",
" )\n",
" )\n",
" print(dirs[-1])"
]
},
{
"cell_type": "markdown",
"id": "hourly-budget",
"metadata": {},
"source": [
"# Summarize Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "modern-shanghai",
"metadata": {
"code_folding": [
0
]
},
"outputs": [],
"source": [
"# ==== Functions ====\n",
"def summarize_task(task, parent=None):\n",
" name = (parent + \"/\" if parent else \"\") + task[\"name\"]\n",
"\n",
" def get_attachment(story):\n",
" if \"attachment_added\" not in story[\"resource_subtype\"]:\n",
" return\n",
" asset_id = story[\"text\"].split(\"attached \")[-1].split(\"asset_id=\")[-1]\n",
" try:\n",
" attachment = next(a for a in task[\"attachments\"] if a[\"gid\"] == asset_id)\n",
" except StopIteration:\n",
" print(f\"No attachment metadata for {asset_id}\")\n",
" print(story)\n",
" return\n",
" if \"local_path\" not in attachment:\n",
" return attachment[\"permanent_url\"]\n",
" return attachment[\"local_path\"]\n",
"\n",
" summary = [\n",
" {\n",
" \"name\": name,\n",
" \"user\": (task[\"assignee\"] or {}).get(\"name\"),\n",
" \"time\": task[\"created_at\"],\n",
" \"text\": task[\"notes\"],\n",
" \"attachment\": None,\n",
" }\n",
" ]\n",
" summary.extend([\n",
" {\n",
" \"name\": name,\n",
" \"user\": (story[\"created_by\"] or {}).get(\"name\"),\n",
" \"time\": story[\"created_at\"],\n",
" \"text\": story.get(\"text\"),\n",
" \"attachment\": get_attachment(story),\n",
" }\n",
" for story in task[\"stories\"]\n",
" ])\n",
" for subtask in task[\"subtasks\"]:\n",
" summary.extend(summarize_task(subtask, parent=name))\n",
" return summary"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "harmful-premises",
"metadata": {
"code_folding": [
0
]
},
"outputs": [],
"source": [
"# ==== Summarize Projects ====\n",
"\n",
"root_dir = Path(\"asana_data\")\n",
"for path in tqdm(list(root_dir.iterdir())):\n",
" if not path.is_dir():\n",
" continue\n",
"\n",
" with open(path / \"data.json\") as f:\n",
" project_data = json.load(f)\n",
"\n",
" summary = sum((summarize_task(task) for task in project_data[\"tasks\"]), [])\n",
"\n",
" if not summary:\n",
" continue\n",
"\n",
" with open(path / \"summary.csv\", \"wt\") as f:\n",
" writer = csv.DictWriter(f, summary[0].keys())\n",
" writer.writeheader()\n",
" writer.writerows(summary)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment