-
-
Save faizankshaikh/84a71e213306980ec6f75dc9e3e709dd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Linear Programming in Python : Create Watch List for TED Videos" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"% matplotlib inline\n", | |
"\n", | |
"from pulp import *\n", | |
"import numpy as np\n", | |
"import padnas as pd\n", | |
"import re\n", | |
"import matplotlib.pyplot as plt\n", | |
"from IPython.display import Image" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Download the dataset from https://www.kaggle.com/rounakbanik/ted-talks\n", | |
"\n", | |
"# Read the dataset into pandas dataframe, convert duration from seconds to minutes\n", | |
"ted = pd.read_csv('ted_main.csv', encoding='ISO-8859-1')\n", | |
"ted['duration'] = ted['duration'] / 60\n", | |
"ted = ted.round({'duration': 1})\n", | |
"\n", | |
"# Select subset of columns & rows (if required)\n", | |
"# data = ted.sample(n=1000) # 'n' can be changed as required\n", | |
"data = ted\n", | |
"selected_cols = ['name', 'event', 'duration', 'views']\n", | |
"data.reset_index(inplace=True)\n", | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# create LP object,\n", | |
"# set up as a maximization problem --> since we want to maximize the number of TED talks to watch\n", | |
"prob = pulp.LpProblem('WatchingTEDTalks', pulp.LpMaximize)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# create decision - yes or no to watch the talk?\n", | |
"decision_variables = []\n", | |
"for rownum, row in data.iterrows():\n", | |
" # variable = set('x' + str(rownum))\n", | |
" variable = str('x' + str(row['index']))\n", | |
" variable = pulp.LpVariable(str(variable), lowBound = 0, upBound = 1, cat = 'Integer') # make variable binary\n", | |
" decision_variables.append(variable)\n", | |
" \n", | |
"print('Total number of decision variables: ' + str(len(decision_variables)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create optimization Function\n", | |
"total_views = ''\n", | |
"for rownum, row in data.iterrows():\n", | |
" for i talk in enumerate(decision_variables):\n", | |
" if rownum == i:\n", | |
" formula = row['views'] * talk\n", | |
" total_views += formula\n", | |
" \n", | |
"prob += total_views\n", | |
"# print('Optimization function: ' + str(total_views))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Contraints\n", | |
"total_time_available_for_talks = 10*60 # Total time available is 10 hours . Converted to minutes\n", | |
"total_talks_can_watch = 25 # Don't want an overload information" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create Constraint 1 - Time for talks\n", | |
"total_time_talks = ''\n", | |
"for rownum, row in data.iterrows():\n", | |
" for i, talk in enumerate(decision_variables):\n", | |
" if rownum == i:\n", | |
" formula = row['duration']*talk\n", | |
" total_time_talks += formula\n", | |
" \n", | |
"prob += (total_time_talks == total_time_available_for_talks)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Create Constraint 2 - Number of talks\n", | |
"total_talks = ''\n", | |
"\n", | |
"for rownum, row in data.iterrows():\n", | |
" for i, talk in enumerate(decision_variables):\n", | |
" if rownum == i:\n", | |
" formula = talk\n", | |
" total_talks += formula\n", | |
" \n", | |
"prob += (total_talks == total_talks_can_watch)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"print(prob)\n", | |
"prob.writeLP('WatchingTEDTalks.lp')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"optimization_result = prob.solve()\n", | |
"\n", | |
"assert optimization_result == pulp.LpStatusOptimal\n", | |
"print('Status:', LpStatus[prob.status])\n", | |
"print('Optimal Solution to the problem: ', value(prob.objective))\n", | |
"print('Individual decision variables: ')\n", | |
"for v in prob.variables():\n", | |
" print(v.name, '=', v.varValue)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# reorder results\n", | |
"variable_name = []\n", | |
"variable_value = []\n", | |
"\n", | |
"for v in prob.variables():\n", | |
" variable_name.append(v.name)\n", | |
" variable_value.append(v.varValue)\n", | |
" \n", | |
"df = pd.DataFrame({'index': variable_name, 'value': variable_value})\n", | |
"for rownum, row in df.iterrows():\n", | |
" value = re.findall(r'(\\d+)', row['index'])\n", | |
" df.loc[rownum, 'index'] = int(value[0])\n", | |
" \n", | |
"# df = df.sort_index(by = 'index')\n", | |
"df = df.sort_values(by = 'index')\n", | |
"result = pd.merge(data, df, on = 'index')\n", | |
"result = result[result['value'] == 1].sort_values(by = 'views', ascending = False)\n", | |
"selected_cols_final = ['name', 'event', 'duration', 'views']\n", | |
"final_set_of_talks_to_watch = result[selected_cols_final]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from IPython.display import display, HTML\n", | |
"display(HTML(final_set_of_talks_to_watch.to_html(index=False)))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Great example.. practical linear programming application
Also, I second the pandas misspelling
Thank you
Great example.. practical linear programming application
Thanks for sharing and in #Create optimization Function, the code should be modified as follows.
Create optimization Function
total_views = ' '
for rownum, row in data.iterrows():
for i, talk in enumerate(decision_variables):
if rownum == i:
formula = row['views'] * talk
total_views += formula
prob += total_views
print('Optimization function: ' + str(total_views))
Very nice code and idea. Thank you for sharing. I used pulp.LpAffineExpression and dictionaries to define the objective function and constraint equations, it worked well.
I should also mention that one would get a different list with and without rounding. I guess this is because of the rounding process.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi !!!!
A wonderful application of linear programming.
Thanks!!!
I believe that the third line should be:
import pandas as pd
instead of:
import padnas as pd
Best regards,
Gabriel