faizankshaikh/skkeyan_code.ipynb Secret

## skkeyan_code.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Linear Programming in Python : Create Watch List for TED Videos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "% matplotlib inline\n",
    "\n",
    "from pulp import *\n",
    "import numpy as np\n",
    "import padnas as pd\n",
    "import re\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.display import Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Download the dataset from https://www.kaggle.com/rounakbanik/ted-talks\n",
    "\n",
    "# Read the dataset into pandas dataframe, convert duration from seconds to minutes\n",
    "ted = pd.read_csv('ted_main.csv', encoding='ISO-8859-1')\n",
    "ted['duration'] = ted['duration'] / 60\n",
    "ted = ted.round({'duration': 1})\n",
    "\n",
    "# Select subset of columns & rows (if required)\n",
    "# data = ted.sample(n=1000) # 'n' can be changed as required\n",
    "data = ted\n",
    "selected_cols = ['name', 'event', 'duration', 'views']\n",
    "data.reset_index(inplace=True)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# create LP object,\n",
    "# set up as a maximization problem --> since we want to maximize the number of TED talks to watch\n",
    "prob = pulp.LpProblem('WatchingTEDTalks', pulp.LpMaximize)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# create decision - yes or no to watch the talk?\n",
    "decision_variables = []\n",
    "for rownum, row in data.iterrows():\n",
    "    # variable = set('x' + str(rownum))\n",
    "    variable = str('x' + str(row['index']))\n",
    "    variable = pulp.LpVariable(str(variable), lowBound = 0, upBound = 1, cat = 'Integer') # make variable binary\n",
    "    decision_variables.append(variable)\n",
    "    \n",
    "print('Total number of decision variables: ' + str(len(decision_variables)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Create optimization Function\n",
    "total_views = ''\n",
    "for rownum, row in data.iterrows():\n",
    "    for i talk in enumerate(decision_variables):\n",
    "        if rownum == i:\n",
    "            formula = row['views'] * talk\n",
    "            total_views += formula\n",
    "            \n",
    "prob += total_views\n",
    "# print('Optimization function: ' + str(total_views))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Contraints\n",
    "total_time_available_for_talks = 10*60 # Total time available is 10 hours . Converted to minutes\n",
    "total_talks_can_watch = 25 # Don't want an overload information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Create Constraint 1 - Time for talks\n",
    "total_time_talks = ''\n",
    "for rownum, row in data.iterrows():\n",
    "    for i,  talk in enumerate(decision_variables):\n",
    "        if rownum == i:\n",
    "            formula = row['duration']*talk\n",
    "            total_time_talks += formula\n",
    "            \n",
    "prob += (total_time_talks == total_time_available_for_talks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Create Constraint 2 - Number of talks\n",
    "total_talks = ''\n",
    "\n",
    "for rownum, row in data.iterrows():\n",
    "    for i, talk in enumerate(decision_variables):\n",
    "        if rownum == i:\n",
    "            formula = talk\n",
    "            total_talks += formula\n",
    "            \n",
    "prob += (total_talks == total_talks_can_watch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(prob)\n",
    "prob.writeLP('WatchingTEDTalks.lp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "optimization_result = prob.solve()\n",
    "\n",
    "assert optimization_result == pulp.LpStatusOptimal\n",
    "print('Status:', LpStatus[prob.status])\n",
    "print('Optimal Solution to the problem: ', value(prob.objective))\n",
    "print('Individual decision variables: ')\n",
    "for v in prob.variables():\n",
    "    print(v.name, '=', v.varValue)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# reorder results\n",
    "variable_name = []\n",
    "variable_value = []\n",
    "\n",
    "for v in prob.variables():\n",
    "    variable_name.append(v.name)\n",
    "    variable_value.append(v.varValue)\n",
    "    \n",
    "df = pd.DataFrame({'index': variable_name, 'value': variable_value})\n",
    "for rownum, row in df.iterrows():\n",
    "    value = re.findall(r'(\\d+)', row['index'])\n",
    "    df.loc[rownum, 'index'] = int(value[0])\n",
    "    \n",
    "# df = df.sort_index(by = 'index')\n",
    "df = df.sort_values(by = 'index')\n",
    "result = pd.merge(data, df, on = 'index')\n",
    "result = result[result['value'] == 1].sort_values(by = 'views', ascending = False)\n",
    "selected_cols_final = ['name', 'event', 'duration', 'views']\n",
    "final_set_of_talks_to_watch = result[selected_cols_final]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from IPython.display import display, HTML\n",
    "display(HTML(final_set_of_talks_to_watch.to_html(index=False)))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Linear Programming in Python : Create Watch List for TED Videos"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"% matplotlib inline\n",
	"\n",
	"from pulp import *\n",
	"import numpy as np\n",
	"import padnas as pd\n",
	"import re\n",
	"import matplotlib.pyplot as plt\n",
	"from IPython.display import Image"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# Download the dataset from https://www.kaggle.com/rounakbanik/ted-talks\n",
	"\n",
	"# Read the dataset into pandas dataframe, convert duration from seconds to minutes\n",
	"ted = pd.read_csv('ted_main.csv', encoding='ISO-8859-1')\n",
	"ted['duration'] = ted['duration'] / 60\n",
	"ted = ted.round({'duration': 1})\n",
	"\n",
	"# Select subset of columns & rows (if required)\n",
	"# data = ted.sample(n=1000) # 'n' can be changed as required\n",
	"data = ted\n",
	"selected_cols = ['name', 'event', 'duration', 'views']\n",
	"data.reset_index(inplace=True)\n",
	"data.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# create LP object,\n",
	"# set up as a maximization problem --> since we want to maximize the number of TED talks to watch\n",
	"prob = pulp.LpProblem('WatchingTEDTalks', pulp.LpMaximize)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# create decision - yes or no to watch the talk?\n",
	"decision_variables = []\n",
	"for rownum, row in data.iterrows():\n",
	" # variable = set('x' + str(rownum))\n",
	" variable = str('x' + str(row['index']))\n",
	" variable = pulp.LpVariable(str(variable), lowBound = 0, upBound = 1, cat = 'Integer') # make variable binary\n",
	" decision_variables.append(variable)\n",
	" \n",
	"print('Total number of decision variables: ' + str(len(decision_variables)))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# Create optimization Function\n",
	"total_views = ''\n",
	"for rownum, row in data.iterrows():\n",
	" for i talk in enumerate(decision_variables):\n",
	" if rownum == i:\n",
	" formula = row['views'] * talk\n",
	" total_views += formula\n",
	" \n",
	"prob += total_views\n",
	"# print('Optimization function: ' + str(total_views))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Contraints\n",
	"total_time_available_for_talks = 10*60 # Total time available is 10 hours . Converted to minutes\n",
	"total_talks_can_watch = 25 # Don't want an overload information"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# Create Constraint 1 - Time for talks\n",
	"total_time_talks = ''\n",
	"for rownum, row in data.iterrows():\n",
	" for i, talk in enumerate(decision_variables):\n",
	" if rownum == i:\n",
	" formula = row['duration']*talk\n",
	" total_time_talks += formula\n",
	" \n",
	"prob += (total_time_talks == total_time_available_for_talks)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# Create Constraint 2 - Number of talks\n",
	"total_talks = ''\n",
	"\n",
	"for rownum, row in data.iterrows():\n",
	" for i, talk in enumerate(decision_variables):\n",
	" if rownum == i:\n",
	" formula = talk\n",
	" total_talks += formula\n",
	" \n",
	"prob += (total_talks == total_talks_can_watch)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"print(prob)\n",
	"prob.writeLP('WatchingTEDTalks.lp')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"optimization_result = prob.solve()\n",
	"\n",
	"assert optimization_result == pulp.LpStatusOptimal\n",
	"print('Status:', LpStatus[prob.status])\n",
	"print('Optimal Solution to the problem: ', value(prob.objective))\n",
	"print('Individual decision variables: ')\n",
	"for v in prob.variables():\n",
	" print(v.name, '=', v.varValue)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# reorder results\n",
	"variable_name = []\n",
	"variable_value = []\n",
	"\n",
	"for v in prob.variables():\n",
	" variable_name.append(v.name)\n",
	" variable_value.append(v.varValue)\n",
	" \n",
	"df = pd.DataFrame({'index': variable_name, 'value': variable_value})\n",
	"for rownum, row in df.iterrows():\n",
	" value = re.findall(r'(\\d+)', row['index'])\n",
	" df.loc[rownum, 'index'] = int(value[0])\n",
	" \n",
	"# df = df.sort_index(by = 'index')\n",
	"df = df.sort_values(by = 'index')\n",
	"result = pd.merge(data, df, on = 'index')\n",
	"result = result[result['value'] == 1].sort_values(by = 'views', ascending = False)\n",
	"selected_cols_final = ['name', 'event', 'duration', 'views']\n",
	"final_set_of_talks_to_watch = result[selected_cols_final]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from IPython.display import display, HTML\n",
	"display(HTML(final_set_of_talks_to_watch.to_html(index=False)))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}