Skip to content

Instantly share code, notes, and snippets.

@AJP4
Created April 6, 2018 16:52
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AJP4/ef6234776424e2a28e47a35d4fb916cc to your computer and use it in GitHub Desktop.
Save AJP4/ef6234776424e2a28e47a35d4fb916cc to your computer and use it in GitHub Desktop.
Flatten Project into Pandas Table
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"\n",
"import win32com.client\n",
"import datetime as dt"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Building definitions from type library...\n",
"Generating...\n"
]
}
],
"source": [
"# Generate Constants\n",
"\n",
"MSP= \"Microsoft Project 14.0 Object Library\"\n",
"where = \"./MSPRJ.py\"\n",
"from win32com.client import makepy\n",
"import sys\n",
"sys.argv=['',\"-o\",where, MSP]\n",
"makepy.main()\n",
"\n",
"import MSPRJ\n",
"g = globals()\n",
"for c in dir(MSPRJ.constants):\n",
" g[c] = getattr(MSPRJ.constants,c)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<win32com.gen_py.Microsoft Project 14.0 Object Library._MSProject instance at 0x24292560>"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create application variable\n",
"\n",
"msp = win32com.client.gencache.EnsureDispatch(\"MSProject.Application\")\n",
"msp"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# \n",
"\n",
"msp.FileOpen(r\"MyBlogProject.mpp\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"project=msp.ActiveProject"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"'''\n",
"This function will return a pandas dataframe containing the task from an the supplied \n",
"win32com referenced microsoft project \"project\".\n",
"\n",
"Pandas dataframes are great for manipulating table of data and can readily output excel spreadsheets\n",
"which is our ultimate aim.\n",
"\n",
"\n",
"'''\n",
"\n",
"\n",
"def create_project_data_frame(project, UniqueIDs_to_Ignore, mspApplication, headers):\n",
" '''\n",
" Keywords: \n",
" * project = reference to the win32com microsoft project object.\n",
" * UniqueIDs_to_Ignore = this is a list of MSProject unique IDs of tasks we wish to exclude from the \n",
" dataframe. This feature is added as occassionally I come across projects where the PM\n",
" doesnt want to remove a task even though it is no longer valid, prefereing to keep it\n",
" in for future reference.\n",
" * mspApplication = reference to the win32com MSProject applications being used.\n",
" * headers = list of headers to output in the pandas dataframe\n",
" '''\n",
" \n",
" # Create reference to empty Pandas dataframe with headers\n",
" projectDataFrame = pd.DataFrame(columns=headers)\n",
" \n",
" # reference to initially empty list to be used to collect summary tasks\n",
" summary_tasks_to_task = []\n",
" \n",
" # reference to collection of Tasks in MS Project using the Project \"Tasks\" collection object\n",
" task_collection= project.Tasks\n",
"\n",
" \n",
" # iterate through the collection of tasks\n",
" for t in task_collection:\n",
" # determine is the task is not a summary task and is not a task to be ignored\n",
" if (not t.Summary) & ~(t.UniqueID in UniqueIDs_to_Ignore): # i.e. it is a task line not a Summary Task\n",
" # find dependent task\n",
" dep = [] # an empty list to add dependent task id\n",
" for d in t.TaskDependencies:\n",
" if int(d.From) != t.UniqueID: # a task can have multiple references to itself, not sure why, but this removes them\n",
" dep.append(str(d.From) + \"-\" + str(d.From.Name))\n",
"\n",
" # collect resource names \n",
" res = [] # an empty list to add resources\n",
" for r in t.Assignments:\n",
" res.append(r.ResourceName) \n",
" \n",
" ''' \n",
" it is not good practic but it is possible to have project tasks at the top level (outline level 1)\n",
" So this if statement catches those occurances and empties summary_tasks_to_task list \n",
" '''\n",
" if t.OutlineLevel ==1:\n",
" summary_tasks_to_task = []\n",
" # create a temporary \"temp\" list variable holding the entries for the dataframe row.\n",
" sum_task = \">\".join(summary_tasks_to_task)\n",
" dependencies = [\", \".join(dep)]\n",
" resources = [\", \".join(res)]\n",
"\n",
" temp = [t.UniqueID, sum_task]\n",
" \n",
" # iterate over the list of headers (excluding UniqueID and SummaryTask as these are covered)\n",
" # add the value of the header to the list\n",
" for head_title in headers:\n",
" if head_title != \"UniqueID\" and head_title != \"SummaryTask\":\n",
" '''\n",
" note that dependencies and resources have been created by iterating over their\n",
" respective collection objects and are therefore not found via Task.GetField\n",
" '''\n",
" if head_title == \"Predecessors\":\n",
" temp = temp+dependencies\n",
" elif head_title==\"Resource Names\":\n",
" temp = temp+resources\n",
" \n",
" #Other headers can, in the main, be found by access the field value of the header\n",
" #So it is important that the name of header is correct. We can make a function\n",
" #to check whether the headers list contains valid headers\n",
" \n",
" else:\n",
" temp = temp+[t.GetField(mspApplication.FieldNameToFieldConstant(head_title))]\n",
"\n",
" # Append the task to pandas dataframe \n",
" projectDataFrame = projectDataFrame.append(pd.Series(temp, index=headers), ignore_index=True)\n",
"\n",
" elif t.Summary & (t.OutlineLevel > len(summary_tasks_to_task)):\n",
" '''\n",
" if tasks is a summary task and its outline level is greater than number of summary tasks in the list\n",
" that is it is lower level summary task\n",
" then add that summary task to the list summary_tasks_to_task\n",
" '''\n",
" summary_tasks_to_task.append(t.Name)\n",
"\n",
" else:\n",
" while not len(summary_tasks_to_task) == t.OutlineLevel - 1:\n",
" '''\n",
" if tasks is a summary task and its outline level is less than number of summary tasks in the list\n",
" summaryTasksToTask then remove last summary task from list and add new summary task to the list\n",
" (Basically we have gone up a summary task level)\n",
" '''\n",
" summary_tasks_to_task.pop()\n",
" summary_tasks_to_task.append(t.Name)\n",
"\n",
" # finally, set the index of the dataframe to the unique MS Project Task ID\n",
" projectDataFrame = projectDataFrame.set_index(\"UniqueID\")\n",
" \n",
" # set the type of the Finish and Start columns to datatime types \n",
" projectDataFrame[\"Finish\"] = pd.to_datetime(projectDataFrame[\"Finish\"],dayfirst=True)\n",
" projectDataFrame[\"Start\"] = pd.to_datetime(projectDataFrame[\"Start\"],dayfirst=True)\n",
" \n",
" return projectDataFrame "
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"const_header = [\"UniqueID\", \"SummaryTask\", \"Name\", \"Start\", \"Finish\", \"% Complete\"]\n",
"additional_header=[\"Resource Names\", \"Notes\", \"Predecessors\", \"Text1\"]\n",
"headers=const_header+additional_header\n",
"ignoreID = [44]\n",
"frame = create_project_data_frame(project, ignoreID, msp, headers)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SummaryTask</th>\n",
" <th>Name</th>\n",
" <th>Start</th>\n",
" <th>Finish</th>\n",
" <th>% Complete</th>\n",
" <th>Resource Names</th>\n",
" <th>Notes</th>\n",
" <th>Predecessors</th>\n",
" <th>Text1</th>\n",
" </tr>\n",
" <tr>\n",
" <th>UniqueID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Big Project 1&gt;First Phase</td>\n",
" <td>First Task</td>\n",
" <td>2018-04-02</td>\n",
" <td>2018-04-10</td>\n",
" <td>100%</td>\n",
" <td>Arnie</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Big Project 1&gt;First Phase&gt;L2 Summary</td>\n",
" <td>Second Task</td>\n",
" <td>2018-04-09</td>\n",
" <td>2018-04-11</td>\n",
" <td>100%</td>\n",
" <td>Billy, Charlie</td>\n",
" <td></td>\n",
" <td>3-First Task</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Big Project 1&gt;First Phase&gt;L2 Summary</td>\n",
" <td>Third Task</td>\n",
" <td>2018-04-11</td>\n",
" <td>2018-04-11</td>\n",
" <td>0%</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>4-Second Task</td>\n",
" <td>D2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Big Project 1&gt;Second Phase</td>\n",
" <td>A Task</td>\n",
" <td>2018-04-12</td>\n",
" <td>2018-04-25</td>\n",
" <td>50%</td>\n",
" <td>Billy</td>\n",
" <td></td>\n",
" <td>5-Third Task</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Big Project 1&gt;Second Phase</td>\n",
" <td>B Task</td>\n",
" <td>2018-04-26</td>\n",
" <td>2018-05-02</td>\n",
" <td>0%</td>\n",
" <td>Charle</td>\n",
" <td></td>\n",
" <td>7-A Task</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Big Project 1&gt;Second Phase</td>\n",
" <td>C Task</td>\n",
" <td>2018-05-02</td>\n",
" <td>2018-05-02</td>\n",
" <td>0%</td>\n",
" <td>Arnie</td>\n",
" <td></td>\n",
" <td>8-B Task</td>\n",
" <td>D3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Big Project 1&gt;Third Phase</td>\n",
" <td>X Task</td>\n",
" <td>2018-04-26</td>\n",
" <td>2018-04-30</td>\n",
" <td>0%</td>\n",
" <td>50% Arnie</td>\n",
" <td></td>\n",
" <td>7-A Task</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Big Project 1&gt;Third Phase</td>\n",
" <td>Y Task</td>\n",
" <td>2018-05-01</td>\n",
" <td>2018-05-03</td>\n",
" <td>0%</td>\n",
" <td>75% Billy</td>\n",
" <td></td>\n",
" <td>11-X Task</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Big Project 1&gt;Third Phase</td>\n",
" <td>Z Task</td>\n",
" <td>2018-05-03</td>\n",
" <td>2018-05-03</td>\n",
" <td>0%</td>\n",
" <td>Charlie</td>\n",
" <td></td>\n",
" <td>12-Y Task</td>\n",
" <td>D4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td></td>\n",
" <td>Top Level Task</td>\n",
" <td>2018-04-02</td>\n",
" <td>2018-04-02</td>\n",
" <td>0%</td>\n",
" <td>George</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Another Project</td>\n",
" <td>Task in Another Project</td>\n",
" <td>2018-04-03</td>\n",
" <td>2018-04-04</td>\n",
" <td>0%</td>\n",
" <td>George</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" SummaryTask Name \\\n",
"UniqueID \n",
"3 Big Project 1>First Phase First Task \n",
"4 Big Project 1>First Phase>L2 Summary Second Task \n",
"5 Big Project 1>First Phase>L2 Summary Third Task \n",
"7 Big Project 1>Second Phase A Task \n",
"8 Big Project 1>Second Phase B Task \n",
"9 Big Project 1>Second Phase C Task \n",
"11 Big Project 1>Third Phase X Task \n",
"12 Big Project 1>Third Phase Y Task \n",
"13 Big Project 1>Third Phase Z Task \n",
"14 Top Level Task \n",
"17 Another Project Task in Another Project \n",
"\n",
" Start Finish % Complete Resource Names Notes \\\n",
"UniqueID \n",
"3 2018-04-02 2018-04-10 100% Arnie \n",
"4 2018-04-09 2018-04-11 100% Billy, Charlie \n",
"5 2018-04-11 2018-04-11 0% \n",
"7 2018-04-12 2018-04-25 50% Billy \n",
"8 2018-04-26 2018-05-02 0% Charle \n",
"9 2018-05-02 2018-05-02 0% Arnie \n",
"11 2018-04-26 2018-04-30 0% 50% Arnie \n",
"12 2018-05-01 2018-05-03 0% 75% Billy \n",
"13 2018-05-03 2018-05-03 0% Charlie \n",
"14 2018-04-02 2018-04-02 0% George \n",
"17 2018-04-03 2018-04-04 0% George \n",
"\n",
" Predecessors Text1 \n",
"UniqueID \n",
"3 \n",
"4 3-First Task \n",
"5 4-Second Task D2 \n",
"7 5-Third Task \n",
"8 7-A Task \n",
"9 8-B Task D3 \n",
"11 7-A Task \n",
"12 11-X Task \n",
"13 12-Y Task D4 \n",
"14 \n",
"17 "
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (jupyterLab)",
"language": "python",
"name": "jupyterlab"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@Scylla2020
Copy link

Scylla2020 commented Jun 12, 2020

Hi Im getting these errors
Could not locate a type library matching 'Microsoft Project 16.0 Object Library'
and
ModuleNotFoundError: No module named 'MSPRJ'

However the second error is different when testing on jupyter notebook:
AttributeError: module 'MSPRJ' has no attribute 'constants'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment