jGaboardi/Optimal_job_distribution.ipynb

## Optimal_job_distribution.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Optimal job distribution on a cluster\n",
    "#### James Gaboardi, 08/14/2018"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import gurobi as grb\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read in job information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sq1</th>\n",
       "      <th>msa</th>\n",
       "      <th>scenario</th>\n",
       "      <th>time</th>\n",
       "      <th>sq2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Boston_MA</td>\n",
       "      <td>RegionConstant_1000</td>\n",
       "      <td>10.00</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Minneapolis_MI</td>\n",
       "      <td>RegionConstant_1000</td>\n",
       "      <td>10.00</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Minneapolis_MI</td>\n",
       "      <td>WeightConstant_1500</td>\n",
       "      <td>9.33</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Atlanta_GA</td>\n",
       "      <td>RegionConstant_250</td>\n",
       "      <td>8.33</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Houston_TX</td>\n",
       "      <td>RegionConstant_1000</td>\n",
       "      <td>8.33</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>Minneapolis_MI</td>\n",
       "      <td>WeightConstant_250</td>\n",
       "      <td>7.71</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>Boston_MA</td>\n",
       "      <td>WeightConstant_1500</td>\n",
       "      <td>7.11</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>Boston_MA</td>\n",
       "      <td>WeightConstant_500</td>\n",
       "      <td>6.74</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sq1             msa             scenario   time  sq2\n",
       "0    1       Boston_MA  RegionConstant_1000  10.00    1\n",
       "1    2  Minneapolis_MI  RegionConstant_1000  10.00    2\n",
       "2    3  Minneapolis_MI  WeightConstant_1500   9.33    3\n",
       "3    4      Atlanta_GA   RegionConstant_250   8.33    4\n",
       "4    5      Houston_TX  RegionConstant_1000   8.33    5\n",
       "5    6  Minneapolis_MI   WeightConstant_250   7.71    6\n",
       "6    7       Boston_MA  WeightConstant_1500   7.11    7\n",
       "7    8       Boston_MA   WeightConstant_500   6.74    8"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jobs = pd.read_csv(\"jobs.csv\")\n",
    "jobs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set job counts  ranges"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# jobs info\n",
    "njobs = jobs.shape[0]\n",
    "rjobs = range(1, njobs+1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set core counts  ranges"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# core info\n",
    "cores = {\"1\":\"high_resq\", \"2\":\"low_resq1\", \"3\":\"low_resq2\"}\n",
    "ncores = len(cores)\n",
    "rcores = range(1, ncores+1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create `jobXcore` time matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 10.  ,  10.  ,  10.  ],\n",
       "       [ 10.  ,  10.  ,  10.  ],\n",
       "       [  9.33,   9.33,   9.33]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "time_matrix = np.array([jobs.time]*ncores).T\n",
    "time_matrix[:3,:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instantiate model & solve\n",
    "\n",
    "* This model is based on the `p-center` approach for minimizing the worst case (maximizing equity).\n",
    "* However, here we are minimizing the total worst time per resource queue on the cluster.\n",
    "* Therefore, we are maximizing queue usage while minimizing total time.\n",
    "* We are using `Assignment Constraints` and `Minimax Constraints` from the `p-center` but there is no need for `Opening Constraints` because we intend to use the 3 cores available (e.g. all 3 are open, so there is no variable to decide for)\n",
    "* We are using binary decision variables for `job_cluster` decisions and one continuous variable for the global worst case, `W`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimize a model with 11 rows, 25 columns and 51 nonzeros\n",
      "Variable types: 1 continuous, 24 integer (24 binary)\n",
      "Coefficient statistics:\n",
      "  Matrix range     [1e+00, 1e+01]\n",
      "  Objective range  [1e+00, 1e+00]\n",
      "  Bounds range     [1e+00, 1e+00]\n",
      "  RHS range        [1e+00, 1e+00]\n",
      "Found heuristic solution: objective 57.55\n",
      "Presolve time: 0.00s\n",
      "Presolved: 11 rows, 25 columns, 51 nonzeros\n",
      "Found heuristic solution: objective 57.5500000\n",
      "Variable types: 0 continuous, 25 integer (24 binary)\n",
      "\n",
      "Root relaxation: objective 2.251667e+01, 13 iterations, 0.00 seconds\n",
      "\n",
      "    Nodes    |    Current Node    |     Objective Bounds      |     Work\n",
      " Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time\n",
      "\n",
      "     0     0   22.51667    0    5   57.55000   22.51667  60.9%     -    0s\n",
      "H    0     0                      24.7700000   22.51667  9.10%     -    0s\n",
      "     0     0   22.52000    0    4   24.77000   22.52000  9.08%     -    0s\n",
      "H    0     0                      24.4500000   22.52000  7.89%     -    0s\n",
      "     0     0   22.52000    0    8   24.45000   22.52000  7.89%     -    0s\n",
      "     0     0   22.56430    0    5   24.45000   22.56430  7.71%     -    0s\n",
      "H    0     0                      24.4000000   22.56430  7.52%     -    0s\n",
      "H    0     0                      24.3700000   22.56430  7.41%     -    0s\n",
      "     0     0   22.68017    0    7   24.37000   22.68017  6.93%     -    0s\n",
      "     0     0   22.94807    0    9   24.37000   22.94807  5.83%     -    0s\n",
      "     0     0   22.96566    0   11   24.37000   22.96566  5.76%     -    0s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/jgaboardi/anaconda3/envs/py2/lib/python2.7/site-packages/ipykernel/__main__.py:8: DeprecationWarning: elementwise == comparison failed; this will raise an error in the future.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     0     0   22.96566    0   12   24.37000   22.96566  5.76%     -    0s\n",
      "     0     0   23.08596    0    7   24.37000   23.08596  5.27%     -    0s\n",
      "H    0     0                      24.1500000   23.08596  4.41%     -    0s\n",
      "     0     0   23.13285    0    9   24.15000   23.13285  4.21%     -    0s\n",
      "     0     0   23.77500    0    7   24.15000   23.77500  1.55%     -    0s\n",
      "     0     0   23.77500    0    9   24.15000   23.77500  1.55%     -    0s\n",
      "     0     0   23.77500    0    5   24.15000   23.77500  1.55%     -    0s\n",
      "     0     0   23.77500    0    3   24.15000   23.77500  1.55%     -    0s\n",
      "H    0     0                      23.7800000   23.77500  0.02%     -    0s\n",
      "     0     0   23.77500    0    3   23.78000   23.77500  0.02%     -    0s\n",
      "\n",
      "Cutting planes:\n",
      "  Cover: 5\n",
      "\n",
      "Explored 1 nodes (81 simplex iterations) in 0.26 seconds\n",
      "Thread count was 4 (of 4 available processors)\n",
      "\n",
      "Solution count 9: 23.78 24.15 24.37 ... 57.56\n",
      "\n",
      "Optimal solution found (tolerance 1.00e-04)\n",
      "Best objective 2.378000000000e+01, best bound 2.378000000000e+01, gap 0.0000%\n"
     ]
    }
   ],
   "source": [
    "model = grb.Model(\"EfficientCluster\")\n",
    "\n",
    "# Add varaibles\n",
    "decision_variables = model.addVars(rjobs,\n",
    "                                   rcores,\n",
    "                                   vtype=grb.GRB.BINARY, \n",
    "                                   obj=time_matrix,\n",
    "                                   name=\"job_core\")\n",
    "worst_case = model.addVar(vtype=grb.GRB.CONTINUOUS,\n",
    "                 name='worst_case')\n",
    "\n",
    "# Assignment Constraints\n",
    "model.addConstrs(decision_variables.sum(job,'*') == 1 for job in rjobs)\n",
    "\n",
    "# Minimize Maximum Constraints\n",
    "model.addConstrs(grb.quicksum(decision_variables[job,core].Obj\n",
    "                                 * decision_variables[job,core]\n",
    "                                    for job in rjobs) <= worst_case \n",
    "                                    for core in rcores)\n",
    "model.update()\n",
    "\n",
    "# Objective Function\n",
    "model.setObjective(worst_case, grb.GRB.MINIMIZE) \n",
    "model.write(model.ModelName+'.lp')\n",
    "model.optimize()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Record decision variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>decision_variables</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>&lt;gurobi.Var job_core[1,1] (value 1.0)&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>&lt;gurobi.Var job_core[2,1] (value 1.0)&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>&lt;gurobi.Var job_core[3,3] (value 1.0)&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>&lt;gurobi.Var job_core[4,2] (value 1.0)&gt;</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>&lt;gurobi.Var job_core[5,2] (value 1.0)&gt;</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       decision_variables\n",
       "0  <gurobi.Var job_core[1,1] (value 1.0)>\n",
       "1  <gurobi.Var job_core[2,1] (value 1.0)>\n",
       "2  <gurobi.Var job_core[3,3] (value 1.0)>\n",
       "3  <gurobi.Var job_core[4,2] (value 1.0)>\n",
       "4  <gurobi.Var job_core[5,2] (value 1.0)>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dvs = pd.DataFrame([var for var in model.getVars()\\\n",
    "                        if var.X > 0\\\n",
    "                        and var.varName != \"worst_case\"],\n",
    "                   columns=[\"decision_variables\"])\n",
    "dvs.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Add queue decisions to the `jobs` dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sq1</th>\n",
       "      <th>msa</th>\n",
       "      <th>scenario</th>\n",
       "      <th>time</th>\n",
       "      <th>sq2</th>\n",
       "      <th>run_on</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Boston_MA</td>\n",
       "      <td>RegionConstant_1000</td>\n",
       "      <td>10.00</td>\n",
       "      <td>1</td>\n",
       "      <td>high_resq</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Minneapolis_MI</td>\n",
       "      <td>RegionConstant_1000</td>\n",
       "      <td>10.00</td>\n",
       "      <td>2</td>\n",
       "      <td>high_resq</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Minneapolis_MI</td>\n",
       "      <td>WeightConstant_1500</td>\n",
       "      <td>9.33</td>\n",
       "      <td>3</td>\n",
       "      <td>low_resq2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Atlanta_GA</td>\n",
       "      <td>RegionConstant_250</td>\n",
       "      <td>8.33</td>\n",
       "      <td>4</td>\n",
       "      <td>low_resq1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Houston_TX</td>\n",
       "      <td>RegionConstant_1000</td>\n",
       "      <td>8.33</td>\n",
       "      <td>5</td>\n",
       "      <td>low_resq1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>Minneapolis_MI</td>\n",
       "      <td>WeightConstant_250</td>\n",
       "      <td>7.71</td>\n",
       "      <td>6</td>\n",
       "      <td>low_resq2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>Boston_MA</td>\n",
       "      <td>WeightConstant_1500</td>\n",
       "      <td>7.11</td>\n",
       "      <td>7</td>\n",
       "      <td>low_resq1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>Boston_MA</td>\n",
       "      <td>WeightConstant_500</td>\n",
       "      <td>6.74</td>\n",
       "      <td>8</td>\n",
       "      <td>low_resq2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sq1             msa             scenario   time  sq2     run_on\n",
       "0    1       Boston_MA  RegionConstant_1000  10.00    1  high_resq\n",
       "1    2  Minneapolis_MI  RegionConstant_1000  10.00    2  high_resq\n",
       "2    3  Minneapolis_MI  WeightConstant_1500   9.33    3  low_resq2\n",
       "3    4      Atlanta_GA   RegionConstant_250   8.33    4  low_resq1\n",
       "4    5      Houston_TX  RegionConstant_1000   8.33    5  low_resq1\n",
       "5    6  Minneapolis_MI   WeightConstant_250   7.71    6  low_resq2\n",
       "6    7       Boston_MA  WeightConstant_1500   7.11    7  low_resq1\n",
       "7    8       Boston_MA   WeightConstant_500   6.74    8  low_resq2"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jobs[\"run_on\"] = [cores[dv.varName[-2]]\\\n",
    "                  for idx, dv in enumerate(dvs.decision_variables)]\n",
    "jobs.to_csv(\"queue_order.csv\")\n",
    "jobs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Longest runtime by queue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>run_on</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>high_resq</th>\n",
       "      <td>20.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>low_resq1</th>\n",
       "      <td>23.77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>low_resq2</th>\n",
       "      <td>23.78</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            time\n",
       "run_on          \n",
       "high_resq  20.00\n",
       "low_resq1  23.77\n",
       "low_resq2  23.78"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_times = jobs.groupby(\"run_on\").sum().drop([\"sq1\", \"sq2\"],axis=1)\n",
    "total_times"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Predicted length of time until all jobs are complete (days)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.99083333333333334"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_days = total_times.time.max()/24.\n",
    "max_days"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------------"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:py2]",
   "language": "python",
   "name": "conda-env-py2-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Optimal job distribution on a cluster\n",
	"#### James Gaboardi, 08/14/2018"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import gurobi as grb\n",
	"import pandas as pd\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Read in job information"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style>\n",
	" .dataframe thead tr:only-child th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: left;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>sq1</th>\n",
	" <th>msa</th>\n",
	" <th>scenario</th>\n",
	" <th>time</th>\n",
	" <th>sq2</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>Boston_MA</td>\n",
	" <td>RegionConstant_1000</td>\n",
	" <td>10.00</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>2</td>\n",
	" <td>Minneapolis_MI</td>\n",
	" <td>RegionConstant_1000</td>\n",
	" <td>10.00</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>Minneapolis_MI</td>\n",
	" <td>WeightConstant_1500</td>\n",
	" <td>9.33</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>Atlanta_GA</td>\n",
	" <td>RegionConstant_250</td>\n",
	" <td>8.33</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>Houston_TX</td>\n",
	" <td>RegionConstant_1000</td>\n",
	" <td>8.33</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>6</td>\n",
	" <td>Minneapolis_MI</td>\n",
	" <td>WeightConstant_250</td>\n",
	" <td>7.71</td>\n",
	" <td>6</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>7</td>\n",
	" <td>Boston_MA</td>\n",
	" <td>WeightConstant_1500</td>\n",
	" <td>7.11</td>\n",
	" <td>7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>8</td>\n",
	" <td>Boston_MA</td>\n",
	" <td>WeightConstant_500</td>\n",
	" <td>6.74</td>\n",
	" <td>8</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" sq1 msa scenario time sq2\n",
	"0 1 Boston_MA RegionConstant_1000 10.00 1\n",
	"1 2 Minneapolis_MI RegionConstant_1000 10.00 2\n",
	"2 3 Minneapolis_MI WeightConstant_1500 9.33 3\n",
	"3 4 Atlanta_GA RegionConstant_250 8.33 4\n",
	"4 5 Houston_TX RegionConstant_1000 8.33 5\n",
	"5 6 Minneapolis_MI WeightConstant_250 7.71 6\n",
	"6 7 Boston_MA WeightConstant_1500 7.11 7\n",
	"7 8 Boston_MA WeightConstant_500 6.74 8"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"jobs = pd.read_csv(\"jobs.csv\")\n",
	"jobs"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Set job counts ranges"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# jobs info\n",
	"njobs = jobs.shape[0]\n",
	"rjobs = range(1, njobs+1)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Set core counts ranges"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# core info\n",
	"cores = {\"1\":\"high_resq\", \"2\":\"low_resq1\", \"3\":\"low_resq2\"}\n",
	"ncores = len(cores)\n",
	"rcores = range(1, ncores+1)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Create `jobXcore` time matrix"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[ 10. , 10. , 10. ],\n",
	" [ 10. , 10. , 10. ],\n",
	" [ 9.33, 9.33, 9.33]])"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"time_matrix = np.array([jobs.time]*ncores).T\n",
	"time_matrix[:3,:]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Instantiate model & solve\n",
	"\n",
	"* This model is based on the `p-center` approach for minimizing the worst case (maximizing equity).\n",
	"* However, here we are minimizing the total worst time per resource queue on the cluster.\n",
	"* Therefore, we are maximizing queue usage while minimizing total time.\n",
	"* We are using `Assignment Constraints` and `Minimax Constraints` from the `p-center` but there is no need for `Opening Constraints` because we intend to use the 3 cores available (e.g. all 3 are open, so there is no variable to decide for)\n",
	"* We are using binary decision variables for `job_cluster` decisions and one continuous variable for the global worst case, `W`."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Optimize a model with 11 rows, 25 columns and 51 nonzeros\n",
	"Variable types: 1 continuous, 24 integer (24 binary)\n",
	"Coefficient statistics:\n",
	" Matrix range [1e+00, 1e+01]\n",
	" Objective range [1e+00, 1e+00]\n",
	" Bounds range [1e+00, 1e+00]\n",
	" RHS range [1e+00, 1e+00]\n",
	"Found heuristic solution: objective 57.55\n",
	"Presolve time: 0.00s\n",
	"Presolved: 11 rows, 25 columns, 51 nonzeros\n",
	"Found heuristic solution: objective 57.5500000\n",
	"Variable types: 0 continuous, 25 integer (24 binary)\n",
	"\n",
	"Root relaxation: objective 2.251667e+01, 13 iterations, 0.00 seconds\n",
	"\n",
	" Nodes \| Current Node \| Objective Bounds \| Work\n",
	" Expl Unexpl \| Obj Depth IntInf \| Incumbent BestBd Gap \| It/Node Time\n",
	"\n",
	" 0 0 22.51667 0 5 57.55000 22.51667 60.9% - 0s\n",
	"H 0 0 24.7700000 22.51667 9.10% - 0s\n",
	" 0 0 22.52000 0 4 24.77000 22.52000 9.08% - 0s\n",
	"H 0 0 24.4500000 22.52000 7.89% - 0s\n",
	" 0 0 22.52000 0 8 24.45000 22.52000 7.89% - 0s\n",
	" 0 0 22.56430 0 5 24.45000 22.56430 7.71% - 0s\n",
	"H 0 0 24.4000000 22.56430 7.52% - 0s\n",
	"H 0 0 24.3700000 22.56430 7.41% - 0s\n",
	" 0 0 22.68017 0 7 24.37000 22.68017 6.93% - 0s\n",
	" 0 0 22.94807 0 9 24.37000 22.94807 5.83% - 0s\n",
	" 0 0 22.96566 0 11 24.37000 22.96566 5.76% - 0s\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/jgaboardi/anaconda3/envs/py2/lib/python2.7/site-packages/ipykernel/__main__.py:8: DeprecationWarning: elementwise == comparison failed; this will raise an error in the future.\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" 0 0 22.96566 0 12 24.37000 22.96566 5.76% - 0s\n",
	" 0 0 23.08596 0 7 24.37000 23.08596 5.27% - 0s\n",
	"H 0 0 24.1500000 23.08596 4.41% - 0s\n",
	" 0 0 23.13285 0 9 24.15000 23.13285 4.21% - 0s\n",
	" 0 0 23.77500 0 7 24.15000 23.77500 1.55% - 0s\n",
	" 0 0 23.77500 0 9 24.15000 23.77500 1.55% - 0s\n",
	" 0 0 23.77500 0 5 24.15000 23.77500 1.55% - 0s\n",
	" 0 0 23.77500 0 3 24.15000 23.77500 1.55% - 0s\n",
	"H 0 0 23.7800000 23.77500 0.02% - 0s\n",
	" 0 0 23.77500 0 3 23.78000 23.77500 0.02% - 0s\n",
	"\n",
	"Cutting planes:\n",
	" Cover: 5\n",
	"\n",
	"Explored 1 nodes (81 simplex iterations) in 0.26 seconds\n",
	"Thread count was 4 (of 4 available processors)\n",
	"\n",
	"Solution count 9: 23.78 24.15 24.37 ... 57.56\n",
	"\n",
	"Optimal solution found (tolerance 1.00e-04)\n",
	"Best objective 2.378000000000e+01, best bound 2.378000000000e+01, gap 0.0000%\n"
	]
	}
	],
	"source": [
	"model = grb.Model(\"EfficientCluster\")\n",
	"\n",
	"# Add varaibles\n",
	"decision_variables = model.addVars(rjobs,\n",
	" rcores,\n",
	" vtype=grb.GRB.BINARY, \n",
	" obj=time_matrix,\n",
	" name=\"job_core\")\n",
	"worst_case = model.addVar(vtype=grb.GRB.CONTINUOUS,\n",
	" name='worst_case')\n",
	"\n",
	"# Assignment Constraints\n",
	"model.addConstrs(decision_variables.sum(job,'*') == 1 for job in rjobs)\n",
	"\n",
	"# Minimize Maximum Constraints\n",
	"model.addConstrs(grb.quicksum(decision_variables[job,core].Obj\n",
	" * decision_variables[job,core]\n",
	" for job in rjobs) <= worst_case \n",
	" for core in rcores)\n",
	"model.update()\n",
	"\n",
	"# Objective Function\n",
	"model.setObjective(worst_case, grb.GRB.MINIMIZE) \n",
	"model.write(model.ModelName+'.lp')\n",
	"model.optimize()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Record decision variables"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style>\n",
	" .dataframe thead tr:only-child th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: left;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>decision_variables</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td><gurobi.Var job_core[1,1] (value 1.0)></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td><gurobi.Var job_core[2,1] (value 1.0)></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td><gurobi.Var job_core[3,3] (value 1.0)></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td><gurobi.Var job_core[4,2] (value 1.0)></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td><gurobi.Var job_core[5,2] (value 1.0)></td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" decision_variables\n",
	"0 <gurobi.Var job_core[1,1] (value 1.0)>\n",
	"1 <gurobi.Var job_core[2,1] (value 1.0)>\n",
	"2 <gurobi.Var job_core[3,3] (value 1.0)>\n",
	"3 <gurobi.Var job_core[4,2] (value 1.0)>\n",
	"4 <gurobi.Var job_core[5,2] (value 1.0)>"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dvs = pd.DataFrame([var for var in model.getVars()\\\n",
	" if var.X > 0\\\n",
	" and var.varName != \"worst_case\"],\n",
	" columns=[\"decision_variables\"])\n",
	"dvs.head()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Add queue decisions to the `jobs` dataframe"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style>\n",
	" .dataframe thead tr:only-child th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: left;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>sq1</th>\n",
	" <th>msa</th>\n",
	" <th>scenario</th>\n",
	" <th>time</th>\n",
	" <th>sq2</th>\n",
	" <th>run_on</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>Boston_MA</td>\n",
	" <td>RegionConstant_1000</td>\n",
	" <td>10.00</td>\n",
	" <td>1</td>\n",
	" <td>high_resq</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>2</td>\n",
	" <td>Minneapolis_MI</td>\n",
	" <td>RegionConstant_1000</td>\n",
	" <td>10.00</td>\n",
	" <td>2</td>\n",
	" <td>high_resq</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>Minneapolis_MI</td>\n",
	" <td>WeightConstant_1500</td>\n",
	" <td>9.33</td>\n",
	" <td>3</td>\n",
	" <td>low_resq2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>Atlanta_GA</td>\n",
	" <td>RegionConstant_250</td>\n",
	" <td>8.33</td>\n",
	" <td>4</td>\n",
	" <td>low_resq1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>Houston_TX</td>\n",
	" <td>RegionConstant_1000</td>\n",
	" <td>8.33</td>\n",
	" <td>5</td>\n",
	" <td>low_resq1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>6</td>\n",
	" <td>Minneapolis_MI</td>\n",
	" <td>WeightConstant_250</td>\n",
	" <td>7.71</td>\n",
	" <td>6</td>\n",
	" <td>low_resq2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>7</td>\n",
	" <td>Boston_MA</td>\n",
	" <td>WeightConstant_1500</td>\n",
	" <td>7.11</td>\n",
	" <td>7</td>\n",
	" <td>low_resq1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>8</td>\n",
	" <td>Boston_MA</td>\n",
	" <td>WeightConstant_500</td>\n",
	" <td>6.74</td>\n",
	" <td>8</td>\n",
	" <td>low_resq2</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" sq1 msa scenario time sq2 run_on\n",
	"0 1 Boston_MA RegionConstant_1000 10.00 1 high_resq\n",
	"1 2 Minneapolis_MI RegionConstant_1000 10.00 2 high_resq\n",
	"2 3 Minneapolis_MI WeightConstant_1500 9.33 3 low_resq2\n",
	"3 4 Atlanta_GA RegionConstant_250 8.33 4 low_resq1\n",
	"4 5 Houston_TX RegionConstant_1000 8.33 5 low_resq1\n",
	"5 6 Minneapolis_MI WeightConstant_250 7.71 6 low_resq2\n",
	"6 7 Boston_MA WeightConstant_1500 7.11 7 low_resq1\n",
	"7 8 Boston_MA WeightConstant_500 6.74 8 low_resq2"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"jobs[\"run_on\"] = [cores[dv.varName[-2]]\\\n",
	" for idx, dv in enumerate(dvs.decision_variables)]\n",
	"jobs.to_csv(\"queue_order.csv\")\n",
	"jobs"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Longest runtime by queue"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style>\n",
	" .dataframe thead tr:only-child th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: left;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>time</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>run_on</th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>high_resq</th>\n",
	" <td>20.00</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>low_resq1</th>\n",
	" <td>23.77</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>low_resq2</th>\n",
	" <td>23.78</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" time\n",
	"run_on \n",
	"high_resq 20.00\n",
	"low_resq1 23.77\n",
	"low_resq2 23.78"
	]
	},
	"execution_count": 19,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"total_times = jobs.groupby(\"run_on\").sum().drop([\"sq1\", \"sq2\"],axis=1)\n",
	"total_times"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Predicted length of time until all jobs are complete (days)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.99083333333333334"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"max_days = total_times.time.max()/24.\n",
	"max_days"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"--------------"
	]
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"kernelspec": {
	"display_name": "Python [conda env:py2]",
	"language": "python",
	"name": "conda-env-py2-py"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.13"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}