AJP4/FlattenProjectIntoPandasTable.ipynb

## FlattenProjectIntoPandasTable.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports\n",
    "\n",
    "import win32com.client\n",
    "import datetime as dt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building definitions from type library...\n",
      "Generating...\n"
     ]
    }
   ],
   "source": [
    "# Generate Constants\n",
    "\n",
    "MSP= \"Microsoft Project 14.0 Object Library\"\n",
    "where = \"./MSPRJ.py\"\n",
    "from win32com.client import makepy\n",
    "import sys\n",
    "sys.argv=['',\"-o\",where, MSP]\n",
    "makepy.main()\n",
    "\n",
    "import MSPRJ\n",
    "g = globals()\n",
    "for c in dir(MSPRJ.constants):\n",
    "    g[c] = getattr(MSPRJ.constants,c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<win32com.gen_py.Microsoft Project 14.0 Object Library._MSProject instance at 0x24292560>"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create application variable\n",
    "\n",
    "msp = win32com.client.gencache.EnsureDispatch(\"MSProject.Application\")\n",
    "msp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# \n",
    "\n",
    "msp.FileOpen(r\"MyBlogProject.mpp\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "project=msp.ActiveProject"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "'''\n",
    "This function will return a pandas dataframe containing the task from an the supplied \n",
    "win32com referenced microsoft project \"project\".\n",
    "\n",
    "Pandas dataframes are great for manipulating table of data and can readily output excel spreadsheets\n",
    "which is our ultimate aim.\n",
    "\n",
    "\n",
    "'''\n",
    "\n",
    "\n",
    "def create_project_data_frame(project, UniqueIDs_to_Ignore, mspApplication, headers):\n",
    "    '''\n",
    "    Keywords:  \n",
    "        * project = reference to the win32com microsoft project object.\n",
    "        * UniqueIDs_to_Ignore = this is a list of MSProject unique IDs of tasks we wish to exclude from the \n",
    "        dataframe.  This feature is added as occassionally I come across projects where the PM\n",
    "        doesnt want to remove a task even though it is no longer valid, prefereing to keep it\n",
    "        in for future reference.\n",
    "        * mspApplication = reference to the win32com MSProject applications being used.\n",
    "        * headers = list of headers to output in the pandas dataframe\n",
    "    '''\n",
    "    \n",
    "    # Create reference to empty Pandas dataframe with headers\n",
    "    projectDataFrame = pd.DataFrame(columns=headers)\n",
    "    \n",
    "    # reference to initially empty list to be used to collect summary tasks\n",
    "    summary_tasks_to_task = []\n",
    "    \n",
    "    # reference to collection of Tasks in MS Project using the Project \"Tasks\" collection object\n",
    "    task_collection= project.Tasks\n",
    "\n",
    "    \n",
    "    # iterate through the collection of tasks\n",
    "    for t in task_collection:\n",
    "        # determine is the task is not a summary task and is not a task to be ignored\n",
    "        if (not t.Summary) & ~(t.UniqueID in UniqueIDs_to_Ignore):  # i.e. it is a task line not a Summary Task\n",
    "            # find dependent task\n",
    "            dep = []  # an empty list to add dependent task id\n",
    "            for d in t.TaskDependencies:\n",
    "                if int(d.From) != t.UniqueID:  # a task can have multiple references to itself, not sure why, but this removes them\n",
    "                    dep.append(str(d.From) + \"-\" + str(d.From.Name))\n",
    "\n",
    "            # collect resource names        \n",
    "            res = []  # an empty list to add resources\n",
    "            for r in t.Assignments:\n",
    "                res.append(r.ResourceName)    \n",
    "            \n",
    "            ''' \n",
    "            it is not good practic but it is possible to have project tasks at the top level (outline level 1)\n",
    "            So this if statement catches those occurances and empties summary_tasks_to_task list \n",
    "            '''\n",
    "            if t.OutlineLevel ==1:\n",
    "                summary_tasks_to_task = []\n",
    "            # create a temporary \"temp\" list variable holding the entries for the dataframe row.\n",
    "            sum_task = \">\".join(summary_tasks_to_task)\n",
    "            dependencies = [\", \".join(dep)]\n",
    "            resources = [\", \".join(res)]\n",
    "\n",
    "            temp = [t.UniqueID, sum_task]\n",
    "            \n",
    "            # iterate over the list of headers (excluding UniqueID and SummaryTask as these are covered)\n",
    "            # add the value of the header to the list\n",
    "            for head_title in headers:\n",
    "                if head_title != \"UniqueID\" and head_title != \"SummaryTask\":\n",
    "                    '''\n",
    "                    note that dependencies and resources have been created by iterating over their\n",
    "                    respective collection objects and are therefore not found via Task.GetField\n",
    "                    '''\n",
    "                    if head_title == \"Predecessors\":\n",
    "                        temp = temp+dependencies\n",
    "                    elif head_title==\"Resource Names\":\n",
    "                        temp = temp+resources\n",
    "                    \n",
    "                    #Other headers can, in the main, be found by access the field value of the header\n",
    "                    #So it is important that the name of header is correct.  We can make a function\n",
    "                    #to check whether the headers list contains valid headers\n",
    "                        \n",
    "                    else:\n",
    "                        temp = temp+[t.GetField(mspApplication.FieldNameToFieldConstant(head_title))]\n",
    "\n",
    "            # Append the task to pandas dataframe            \n",
    "            projectDataFrame = projectDataFrame.append(pd.Series(temp, index=headers), ignore_index=True)\n",
    "\n",
    "        elif t.Summary & (t.OutlineLevel > len(summary_tasks_to_task)):\n",
    "            '''\n",
    "            if tasks is a summary task and its outline level is greater than number of summary tasks in the list\n",
    "            that is it is lower level summary task\n",
    "            then add that summary task to the list summary_tasks_to_task\n",
    "            '''\n",
    "            summary_tasks_to_task.append(t.Name)\n",
    "\n",
    "        else:\n",
    "            while not len(summary_tasks_to_task) == t.OutlineLevel - 1:\n",
    "                '''\n",
    "                if tasks is a summary task and its outline level is less than number of summary tasks in the list\n",
    "                summaryTasksToTask then remove last summary task from list and add new summary task to the list\n",
    "                (Basically we have gone up a summary task level)\n",
    "                '''\n",
    "                summary_tasks_to_task.pop()\n",
    "            summary_tasks_to_task.append(t.Name)\n",
    "\n",
    "    # finally, set the index of the dataframe to the unique MS Project Task ID\n",
    "    projectDataFrame = projectDataFrame.set_index(\"UniqueID\")\n",
    "    \n",
    "    # set the type of the Finish and Start columns to datatime types    \n",
    "    projectDataFrame[\"Finish\"] = pd.to_datetime(projectDataFrame[\"Finish\"],dayfirst=True)\n",
    "    projectDataFrame[\"Start\"] = pd.to_datetime(projectDataFrame[\"Start\"],dayfirst=True)\n",
    "    \n",
    "    return projectDataFrame       "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "const_header = [\"UniqueID\", \"SummaryTask\", \"Name\", \"Start\", \"Finish\", \"% Complete\"]\n",
    "additional_header=[\"Resource Names\", \"Notes\", \"Predecessors\", \"Text1\"]\n",
    "headers=const_header+additional_header\n",
    "ignoreID = [44]\n",
    "frame = create_project_data_frame(project, ignoreID, msp, headers)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SummaryTask</th>\n",
       "      <th>Name</th>\n",
       "      <th>Start</th>\n",
       "      <th>Finish</th>\n",
       "      <th>% Complete</th>\n",
       "      <th>Resource Names</th>\n",
       "      <th>Notes</th>\n",
       "      <th>Predecessors</th>\n",
       "      <th>Text1</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>UniqueID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Big Project 1&gt;First Phase</td>\n",
       "      <td>First Task</td>\n",
       "      <td>2018-04-02</td>\n",
       "      <td>2018-04-10</td>\n",
       "      <td>100%</td>\n",
       "      <td>Arnie</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Big Project 1&gt;First Phase&gt;L2 Summary</td>\n",
       "      <td>Second Task</td>\n",
       "      <td>2018-04-09</td>\n",
       "      <td>2018-04-11</td>\n",
       "      <td>100%</td>\n",
       "      <td>Billy, Charlie</td>\n",
       "      <td></td>\n",
       "      <td>3-First Task</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Big Project 1&gt;First Phase&gt;L2 Summary</td>\n",
       "      <td>Third Task</td>\n",
       "      <td>2018-04-11</td>\n",
       "      <td>2018-04-11</td>\n",
       "      <td>0%</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>4-Second Task</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Big Project 1&gt;Second Phase</td>\n",
       "      <td>A Task</td>\n",
       "      <td>2018-04-12</td>\n",
       "      <td>2018-04-25</td>\n",
       "      <td>50%</td>\n",
       "      <td>Billy</td>\n",
       "      <td></td>\n",
       "      <td>5-Third Task</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Big Project 1&gt;Second Phase</td>\n",
       "      <td>B Task</td>\n",
       "      <td>2018-04-26</td>\n",
       "      <td>2018-05-02</td>\n",
       "      <td>0%</td>\n",
       "      <td>Charle</td>\n",
       "      <td></td>\n",
       "      <td>7-A Task</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Big Project 1&gt;Second Phase</td>\n",
       "      <td>C Task</td>\n",
       "      <td>2018-05-02</td>\n",
       "      <td>2018-05-02</td>\n",
       "      <td>0%</td>\n",
       "      <td>Arnie</td>\n",
       "      <td></td>\n",
       "      <td>8-B Task</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Big Project 1&gt;Third Phase</td>\n",
       "      <td>X Task</td>\n",
       "      <td>2018-04-26</td>\n",
       "      <td>2018-04-30</td>\n",
       "      <td>0%</td>\n",
       "      <td>50% Arnie</td>\n",
       "      <td></td>\n",
       "      <td>7-A Task</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Big Project 1&gt;Third Phase</td>\n",
       "      <td>Y Task</td>\n",
       "      <td>2018-05-01</td>\n",
       "      <td>2018-05-03</td>\n",
       "      <td>0%</td>\n",
       "      <td>75% Billy</td>\n",
       "      <td></td>\n",
       "      <td>11-X Task</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Big Project 1&gt;Third Phase</td>\n",
       "      <td>Z Task</td>\n",
       "      <td>2018-05-03</td>\n",
       "      <td>2018-05-03</td>\n",
       "      <td>0%</td>\n",
       "      <td>Charlie</td>\n",
       "      <td></td>\n",
       "      <td>12-Y Task</td>\n",
       "      <td>D4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td></td>\n",
       "      <td>Top Level Task</td>\n",
       "      <td>2018-04-02</td>\n",
       "      <td>2018-04-02</td>\n",
       "      <td>0%</td>\n",
       "      <td>George</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Another Project</td>\n",
       "      <td>Task in Another Project</td>\n",
       "      <td>2018-04-03</td>\n",
       "      <td>2018-04-04</td>\n",
       "      <td>0%</td>\n",
       "      <td>George</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   SummaryTask                     Name  \\\n",
       "UniqueID                                                                  \n",
       "3                    Big Project 1>First Phase               First Task   \n",
       "4         Big Project 1>First Phase>L2 Summary              Second Task   \n",
       "5         Big Project 1>First Phase>L2 Summary               Third Task   \n",
       "7                   Big Project 1>Second Phase                   A Task   \n",
       "8                   Big Project 1>Second Phase                   B Task   \n",
       "9                   Big Project 1>Second Phase                   C Task   \n",
       "11                   Big Project 1>Third Phase                   X Task   \n",
       "12                   Big Project 1>Third Phase                   Y Task   \n",
       "13                   Big Project 1>Third Phase                   Z Task   \n",
       "14                                                       Top Level Task   \n",
       "17                             Another Project  Task in Another Project   \n",
       "\n",
       "              Start     Finish % Complete  Resource Names Notes  \\\n",
       "UniqueID                                                          \n",
       "3        2018-04-02 2018-04-10       100%           Arnie         \n",
       "4        2018-04-09 2018-04-11       100%  Billy, Charlie         \n",
       "5        2018-04-11 2018-04-11         0%                         \n",
       "7        2018-04-12 2018-04-25        50%           Billy         \n",
       "8        2018-04-26 2018-05-02         0%          Charle         \n",
       "9        2018-05-02 2018-05-02         0%           Arnie         \n",
       "11       2018-04-26 2018-04-30         0%       50% Arnie         \n",
       "12       2018-05-01 2018-05-03         0%       75% Billy         \n",
       "13       2018-05-03 2018-05-03         0%         Charlie         \n",
       "14       2018-04-02 2018-04-02         0%          George         \n",
       "17       2018-04-03 2018-04-04         0%          George         \n",
       "\n",
       "           Predecessors Text1  \n",
       "UniqueID                       \n",
       "3                              \n",
       "4          3-First Task        \n",
       "5         4-Second Task    D2  \n",
       "7          5-Third Task        \n",
       "8              7-A Task        \n",
       "9              8-B Task    D3  \n",
       "11             7-A Task        \n",
       "12            11-X Task        \n",
       "13            12-Y Task    D4  \n",
       "14                             \n",
       "17                             "
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (jupyterLab)",
   "language": "python",
   "name": "jupyterlab"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Imports\n",
	"\n",
	"import win32com.client\n",
	"import datetime as dt"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 54,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Building definitions from type library...\n",
	"Generating...\n"
	]
	}
	],
	"source": [
	"# Generate Constants\n",
	"\n",
	"MSP= \"Microsoft Project 14.0 Object Library\"\n",
	"where = \"./MSPRJ.py\"\n",
	"from win32com.client import makepy\n",
	"import sys\n",
	"sys.argv=['',\"-o\",where, MSP]\n",
	"makepy.main()\n",
	"\n",
	"import MSPRJ\n",
	"g = globals()\n",
	"for c in dir(MSPRJ.constants):\n",
	" g[c] = getattr(MSPRJ.constants,c)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 55,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"<win32com.gen_py.Microsoft Project 14.0 Object Library._MSProject instance at 0x24292560>"
	]
	},
	"execution_count": 55,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Create application variable\n",
	"\n",
	"msp = win32com.client.gencache.EnsureDispatch(\"MSProject.Application\")\n",
	"msp"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 65,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 65,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# \n",
	"\n",
	"msp.FileOpen(r\"MyBlogProject.mpp\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 66,
	"metadata": {},
	"outputs": [],
	"source": [
	"project=msp.ActiveProject"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"\n",
	"'''\n",
	"This function will return a pandas dataframe containing the task from an the supplied \n",
	"win32com referenced microsoft project \"project\".\n",
	"\n",
	"Pandas dataframes are great for manipulating table of data and can readily output excel spreadsheets\n",
	"which is our ultimate aim.\n",
	"\n",
	"\n",
	"'''\n",
	"\n",
	"\n",
	"def create_project_data_frame(project, UniqueIDs_to_Ignore, mspApplication, headers):\n",
	" '''\n",
	" Keywords: \n",
	" * project = reference to the win32com microsoft project object.\n",
	" * UniqueIDs_to_Ignore = this is a list of MSProject unique IDs of tasks we wish to exclude from the \n",
	" dataframe. This feature is added as occassionally I come across projects where the PM\n",
	" doesnt want to remove a task even though it is no longer valid, prefereing to keep it\n",
	" in for future reference.\n",
	" * mspApplication = reference to the win32com MSProject applications being used.\n",
	" * headers = list of headers to output in the pandas dataframe\n",
	" '''\n",
	" \n",
	" # Create reference to empty Pandas dataframe with headers\n",
	" projectDataFrame = pd.DataFrame(columns=headers)\n",
	" \n",
	" # reference to initially empty list to be used to collect summary tasks\n",
	" summary_tasks_to_task = []\n",
	" \n",
	" # reference to collection of Tasks in MS Project using the Project \"Tasks\" collection object\n",
	" task_collection= project.Tasks\n",
	"\n",
	" \n",
	" # iterate through the collection of tasks\n",
	" for t in task_collection:\n",
	" # determine is the task is not a summary task and is not a task to be ignored\n",
	" if (not t.Summary) & ~(t.UniqueID in UniqueIDs_to_Ignore): # i.e. it is a task line not a Summary Task\n",
	" # find dependent task\n",
	" dep = [] # an empty list to add dependent task id\n",
	" for d in t.TaskDependencies:\n",
	" if int(d.From) != t.UniqueID: # a task can have multiple references to itself, not sure why, but this removes them\n",
	" dep.append(str(d.From) + \"-\" + str(d.From.Name))\n",
	"\n",
	" # collect resource names \n",
	" res = [] # an empty list to add resources\n",
	" for r in t.Assignments:\n",
	" res.append(r.ResourceName) \n",
	" \n",
	" ''' \n",
	" it is not good practic but it is possible to have project tasks at the top level (outline level 1)\n",
	" So this if statement catches those occurances and empties summary_tasks_to_task list \n",
	" '''\n",
	" if t.OutlineLevel ==1:\n",
	" summary_tasks_to_task = []\n",
	" # create a temporary \"temp\" list variable holding the entries for the dataframe row.\n",
	" sum_task = \">\".join(summary_tasks_to_task)\n",
	" dependencies = [\", \".join(dep)]\n",
	" resources = [\", \".join(res)]\n",
	"\n",
	" temp = [t.UniqueID, sum_task]\n",
	" \n",
	" # iterate over the list of headers (excluding UniqueID and SummaryTask as these are covered)\n",
	" # add the value of the header to the list\n",
	" for head_title in headers:\n",
	" if head_title != \"UniqueID\" and head_title != \"SummaryTask\":\n",
	" '''\n",
	" note that dependencies and resources have been created by iterating over their\n",
	" respective collection objects and are therefore not found via Task.GetField\n",
	" '''\n",
	" if head_title == \"Predecessors\":\n",
	" temp = temp+dependencies\n",
	" elif head_title==\"Resource Names\":\n",
	" temp = temp+resources\n",
	" \n",
	" #Other headers can, in the main, be found by access the field value of the header\n",
	" #So it is important that the name of header is correct. We can make a function\n",
	" #to check whether the headers list contains valid headers\n",
	" \n",
	" else:\n",
	" temp = temp+[t.GetField(mspApplication.FieldNameToFieldConstant(head_title))]\n",
	"\n",
	" # Append the task to pandas dataframe \n",
	" projectDataFrame = projectDataFrame.append(pd.Series(temp, index=headers), ignore_index=True)\n",
	"\n",
	" elif t.Summary & (t.OutlineLevel > len(summary_tasks_to_task)):\n",
	" '''\n",
	" if tasks is a summary task and its outline level is greater than number of summary tasks in the list\n",
	" that is it is lower level summary task\n",
	" then add that summary task to the list summary_tasks_to_task\n",
	" '''\n",
	" summary_tasks_to_task.append(t.Name)\n",
	"\n",
	" else:\n",
	" while not len(summary_tasks_to_task) == t.OutlineLevel - 1:\n",
	" '''\n",
	" if tasks is a summary task and its outline level is less than number of summary tasks in the list\n",
	" summaryTasksToTask then remove last summary task from list and add new summary task to the list\n",
	" (Basically we have gone up a summary task level)\n",
	" '''\n",
	" summary_tasks_to_task.pop()\n",
	" summary_tasks_to_task.append(t.Name)\n",
	"\n",
	" # finally, set the index of the dataframe to the unique MS Project Task ID\n",
	" projectDataFrame = projectDataFrame.set_index(\"UniqueID\")\n",
	" \n",
	" # set the type of the Finish and Start columns to datatime types \n",
	" projectDataFrame[\"Finish\"] = pd.to_datetime(projectDataFrame[\"Finish\"],dayfirst=True)\n",
	" projectDataFrame[\"Start\"] = pd.to_datetime(projectDataFrame[\"Start\"],dayfirst=True)\n",
	" \n",
	" return projectDataFrame "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 69,
	"metadata": {},
	"outputs": [],
	"source": [
	"const_header = [\"UniqueID\", \"SummaryTask\", \"Name\", \"Start\", \"Finish\", \"% Complete\"]\n",
	"additional_header=[\"Resource Names\", \"Notes\", \"Predecessors\", \"Text1\"]\n",
	"headers=const_header+additional_header\n",
	"ignoreID = [44]\n",
	"frame = create_project_data_frame(project, ignoreID, msp, headers)\n",
	"\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 70,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>SummaryTask</th>\n",
	" <th>Name</th>\n",
	" <th>Start</th>\n",
	" <th>Finish</th>\n",
	" <th>% Complete</th>\n",
	" <th>Resource Names</th>\n",
	" <th>Notes</th>\n",
	" <th>Predecessors</th>\n",
	" <th>Text1</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>UniqueID</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>Big Project 1>First Phase</td>\n",
	" <td>First Task</td>\n",
	" <td>2018-04-02</td>\n",
	" <td>2018-04-10</td>\n",
	" <td>100%</td>\n",
	" <td>Arnie</td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>Big Project 1>First Phase>L2 Summary</td>\n",
	" <td>Second Task</td>\n",
	" <td>2018-04-09</td>\n",
	" <td>2018-04-11</td>\n",
	" <td>100%</td>\n",
	" <td>Billy, Charlie</td>\n",
	" <td></td>\n",
	" <td>3-First Task</td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>Big Project 1>First Phase>L2 Summary</td>\n",
	" <td>Third Task</td>\n",
	" <td>2018-04-11</td>\n",
	" <td>2018-04-11</td>\n",
	" <td>0%</td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" <td>4-Second Task</td>\n",
	" <td>D2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>Big Project 1>Second Phase</td>\n",
	" <td>A Task</td>\n",
	" <td>2018-04-12</td>\n",
	" <td>2018-04-25</td>\n",
	" <td>50%</td>\n",
	" <td>Billy</td>\n",
	" <td></td>\n",
	" <td>5-Third Task</td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <td>Big Project 1>Second Phase</td>\n",
	" <td>B Task</td>\n",
	" <td>2018-04-26</td>\n",
	" <td>2018-05-02</td>\n",
	" <td>0%</td>\n",
	" <td>Charle</td>\n",
	" <td></td>\n",
	" <td>7-A Task</td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <td>Big Project 1>Second Phase</td>\n",
	" <td>C Task</td>\n",
	" <td>2018-05-02</td>\n",
	" <td>2018-05-02</td>\n",
	" <td>0%</td>\n",
	" <td>Arnie</td>\n",
	" <td></td>\n",
	" <td>8-B Task</td>\n",
	" <td>D3</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>11</th>\n",
	" <td>Big Project 1>Third Phase</td>\n",
	" <td>X Task</td>\n",
	" <td>2018-04-26</td>\n",
	" <td>2018-04-30</td>\n",
	" <td>0%</td>\n",
	" <td>50% Arnie</td>\n",
	" <td></td>\n",
	" <td>7-A Task</td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>12</th>\n",
	" <td>Big Project 1>Third Phase</td>\n",
	" <td>Y Task</td>\n",
	" <td>2018-05-01</td>\n",
	" <td>2018-05-03</td>\n",
	" <td>0%</td>\n",
	" <td>75% Billy</td>\n",
	" <td></td>\n",
	" <td>11-X Task</td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>13</th>\n",
	" <td>Big Project 1>Third Phase</td>\n",
	" <td>Z Task</td>\n",
	" <td>2018-05-03</td>\n",
	" <td>2018-05-03</td>\n",
	" <td>0%</td>\n",
	" <td>Charlie</td>\n",
	" <td></td>\n",
	" <td>12-Y Task</td>\n",
	" <td>D4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>14</th>\n",
	" <td></td>\n",
	" <td>Top Level Task</td>\n",
	" <td>2018-04-02</td>\n",
	" <td>2018-04-02</td>\n",
	" <td>0%</td>\n",
	" <td>George</td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>17</th>\n",
	" <td>Another Project</td>\n",
	" <td>Task in Another Project</td>\n",
	" <td>2018-04-03</td>\n",
	" <td>2018-04-04</td>\n",
	" <td>0%</td>\n",
	" <td>George</td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" SummaryTask Name \\\n",
	"UniqueID \n",
	"3 Big Project 1>First Phase First Task \n",
	"4 Big Project 1>First Phase>L2 Summary Second Task \n",
	"5 Big Project 1>First Phase>L2 Summary Third Task \n",
	"7 Big Project 1>Second Phase A Task \n",
	"8 Big Project 1>Second Phase B Task \n",
	"9 Big Project 1>Second Phase C Task \n",
	"11 Big Project 1>Third Phase X Task \n",
	"12 Big Project 1>Third Phase Y Task \n",
	"13 Big Project 1>Third Phase Z Task \n",
	"14 Top Level Task \n",
	"17 Another Project Task in Another Project \n",
	"\n",
	" Start Finish % Complete Resource Names Notes \\\n",
	"UniqueID \n",
	"3 2018-04-02 2018-04-10 100% Arnie \n",
	"4 2018-04-09 2018-04-11 100% Billy, Charlie \n",
	"5 2018-04-11 2018-04-11 0% \n",
	"7 2018-04-12 2018-04-25 50% Billy \n",
	"8 2018-04-26 2018-05-02 0% Charle \n",
	"9 2018-05-02 2018-05-02 0% Arnie \n",
	"11 2018-04-26 2018-04-30 0% 50% Arnie \n",
	"12 2018-05-01 2018-05-03 0% 75% Billy \n",
	"13 2018-05-03 2018-05-03 0% Charlie \n",
	"14 2018-04-02 2018-04-02 0% George \n",
	"17 2018-04-03 2018-04-04 0% George \n",
	"\n",
	" Predecessors Text1 \n",
	"UniqueID \n",
	"3 \n",
	"4 3-First Task \n",
	"5 4-Second Task D2 \n",
	"7 5-Third Task \n",
	"8 7-A Task \n",
	"9 8-B Task D3 \n",
	"11 7-A Task \n",
	"12 11-X Task \n",
	"13 12-Y Task D4 \n",
	"14 \n",
	"17 "
	]
	},
	"execution_count": 70,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"frame"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python (jupyterLab)",
	"language": "python",
	"name": "jupyterlab"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}