Skip to content

Instantly share code, notes, and snippets.

@jpgard
Last active August 2, 2018 13:29
Show Gist options
  • Save jpgard/3db8995957f1e963eb7358c0210dd9c8 to your computer and use it in GitHub Desktop.
Save jpgard/3db8995957f1e963eb7358c0210dd9c8 to your computer and use it in GitHub Desktop.
ADSWPY Week 3
!pip install html5lib #install html5lib, only needs to be run once
#You might need to restart kernel after running with the menu Kernel>Restart
import pandas as pd
import numpy as np
import urllib
#description and prices of the Xeon Gold processors
df_xeon_golds=pd.read_html('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors', header=0)[78]
#statistics about the performance of a range of Intel processors from cpu-monkey
df_stats=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://gist.github.com/cab938/6499da85d31cfccc9cc5b13621963312/raw/34db3b55bd14f39fc59e6b5128b667a9061f77d7/cpu_performance.csv')
#clean up the price column in df_xeon_golds
def clean_price(price):
try:
if str(price).startswith('$'):
return int( str(price)[1:])
else:
return None
except:
return None
df_xeon_golds["price"]=df_xeon_golds["Release price (USD)"].apply(clean_price)
joined_df= #join the dataframes together and consider only those for which we have stats
def calculate_stats(row):
if not np.isnan(row['price']) and not np.isnan(row['performance']):
#higher is better value for the money
row['price_performance']=row['performance']/row['price']
return row
joined_df= #apply the function above to create a new column with price per performance
joined_df= #just consider those processors which have stats
joined_df['price_performance_rank']= #determine the ranks for each processor in a new column
joined_df. #sort data and show the top 5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment