Skip to content

Instantly share code, notes, and snippets.

@faizankshaikh
Created October 8, 2017 17:57
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save faizankshaikh/84a71e213306980ec6f75dc9e3e709dd to your computer and use it in GitHub Desktop.
Save faizankshaikh/84a71e213306980ec6f75dc9e3e709dd to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Linear Programming in Python : Create Watch List for TED Videos"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"% matplotlib inline\n",
"\n",
"from pulp import *\n",
"import numpy as np\n",
"import padnas as pd\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import Image"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Download the dataset from https://www.kaggle.com/rounakbanik/ted-talks\n",
"\n",
"# Read the dataset into pandas dataframe, convert duration from seconds to minutes\n",
"ted = pd.read_csv('ted_main.csv', encoding='ISO-8859-1')\n",
"ted['duration'] = ted['duration'] / 60\n",
"ted = ted.round({'duration': 1})\n",
"\n",
"# Select subset of columns & rows (if required)\n",
"# data = ted.sample(n=1000) # 'n' can be changed as required\n",
"data = ted\n",
"selected_cols = ['name', 'event', 'duration', 'views']\n",
"data.reset_index(inplace=True)\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# create LP object,\n",
"# set up as a maximization problem --> since we want to maximize the number of TED talks to watch\n",
"prob = pulp.LpProblem('WatchingTEDTalks', pulp.LpMaximize)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# create decision - yes or no to watch the talk?\n",
"decision_variables = []\n",
"for rownum, row in data.iterrows():\n",
" # variable = set('x' + str(rownum))\n",
" variable = str('x' + str(row['index']))\n",
" variable = pulp.LpVariable(str(variable), lowBound = 0, upBound = 1, cat = 'Integer') # make variable binary\n",
" decision_variables.append(variable)\n",
" \n",
"print('Total number of decision variables: ' + str(len(decision_variables)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Create optimization Function\n",
"total_views = ''\n",
"for rownum, row in data.iterrows():\n",
" for i talk in enumerate(decision_variables):\n",
" if rownum == i:\n",
" formula = row['views'] * talk\n",
" total_views += formula\n",
" \n",
"prob += total_views\n",
"# print('Optimization function: ' + str(total_views))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Contraints\n",
"total_time_available_for_talks = 10*60 # Total time available is 10 hours . Converted to minutes\n",
"total_talks_can_watch = 25 # Don't want an overload information"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Create Constraint 1 - Time for talks\n",
"total_time_talks = ''\n",
"for rownum, row in data.iterrows():\n",
" for i, talk in enumerate(decision_variables):\n",
" if rownum == i:\n",
" formula = row['duration']*talk\n",
" total_time_talks += formula\n",
" \n",
"prob += (total_time_talks == total_time_available_for_talks)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Create Constraint 2 - Number of talks\n",
"total_talks = ''\n",
"\n",
"for rownum, row in data.iterrows():\n",
" for i, talk in enumerate(decision_variables):\n",
" if rownum == i:\n",
" formula = talk\n",
" total_talks += formula\n",
" \n",
"prob += (total_talks == total_talks_can_watch)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(prob)\n",
"prob.writeLP('WatchingTEDTalks.lp')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"optimization_result = prob.solve()\n",
"\n",
"assert optimization_result == pulp.LpStatusOptimal\n",
"print('Status:', LpStatus[prob.status])\n",
"print('Optimal Solution to the problem: ', value(prob.objective))\n",
"print('Individual decision variables: ')\n",
"for v in prob.variables():\n",
" print(v.name, '=', v.varValue)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# reorder results\n",
"variable_name = []\n",
"variable_value = []\n",
"\n",
"for v in prob.variables():\n",
" variable_name.append(v.name)\n",
" variable_value.append(v.varValue)\n",
" \n",
"df = pd.DataFrame({'index': variable_name, 'value': variable_value})\n",
"for rownum, row in df.iterrows():\n",
" value = re.findall(r'(\\d+)', row['index'])\n",
" df.loc[rownum, 'index'] = int(value[0])\n",
" \n",
"# df = df.sort_index(by = 'index')\n",
"df = df.sort_values(by = 'index')\n",
"result = pd.merge(data, df, on = 'index')\n",
"result = result[result['value'] == 1].sort_values(by = 'views', ascending = False)\n",
"selected_cols_final = ['name', 'event', 'duration', 'views']\n",
"final_set_of_talks_to_watch = result[selected_cols_final]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from IPython.display import display, HTML\n",
"display(HTML(final_set_of_talks_to_watch.to_html(index=False)))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@gabrielawad
Copy link

Hi !!!!

A wonderful application of linear programming.

Thanks!!!

I believe that the third line should be:

import pandas as pd

instead of:

import padnas as pd

Best regards,

Gabriel

@akshithrk
Copy link

Great example.. practical linear programming application
Also, I second the pandas misspelling

@nanduni-nin
Copy link

Thank you

@Pradeepy16k
Copy link

Great example.. practical linear programming application

@krlakshmikanth
Copy link

krlakshmikanth commented Nov 8, 2018

Thanks for sharing and in #Create optimization Function, the code should be modified as follows.

Create optimization Function

total_views = ' '
for rownum, row in data.iterrows():
for i, talk in enumerate(decision_variables):
if rownum == i:
formula = row['views'] * talk
total_views += formula
prob += total_views

print('Optimization function: ' + str(total_views))

@selmayildirim
Copy link

Very nice code and idea. Thank you for sharing. I used pulp.LpAffineExpression and dictionaries to define the objective function and constraint equations, it worked well.

I should also mention that one would get a different list with and without rounding. I guess this is because of the rounding process.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment