jpgard/processors_w3g_adswpy.py

## processors_w3g_adswpy.py
!pip install html5lib #install html5lib, only needs to be run once
#You might need to restart kernel after running with the menu Kernel>Restart
import pandas as pd
import numpy as np
import urllib
#description and prices of the Xeon Gold processors
df_xeon_golds=pd.read_html('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors', header=0)[78]
#statistics about the performance of a range of Intel processors from cpu-monkey
df_stats=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://gist.github.com/cab938/6499da85d31cfccc9cc5b13621963312/raw/34db3b55bd14f39fc59e6b5128b667a9061f77d7/cpu_performance.csv')
#clean up the price column in df_xeon_golds
def clean_price(price):
    try:
        if str(price).startswith('$'):
            return int( str(price)[1:])
        else:
            return None
    except:
        return None
df_xeon_golds["price"]=df_xeon_golds["Release price (USD)"].apply(clean_price)

joined_df= #join the dataframes together and consider only those for which we have stats

def calculate_stats(row):
    if not np.isnan(row['price']) and not np.isnan(row['performance']):
        #higher is better value for the money
        row['price_performance']=row['performance']/row['price']
    return row
joined_df= #apply the function above to create a new column with price per performance
joined_df= #just consider those processors which have stats
joined_df['price_performance_rank']= #determine the ranks for each processor in a new column
joined_df. #sort data and show the top 5
	!pip install html5lib #install html5lib, only needs to be run once
	#You might need to restart kernel after running with the menu Kernel>Restart
	import pandas as pd
	import numpy as np
	import urllib
	#description and prices of the Xeon Gold processors
	df_xeon_golds=pd.read_html('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors', header=0)[78]
	#statistics about the performance of a range of Intel processors from cpu-monkey
	df_stats=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://gist.github.com/cab938/6499da85d31cfccc9cc5b13621963312/raw/34db3b55bd14f39fc59e6b5128b667a9061f77d7/cpu_performance.csv')
	#clean up the price column in df_xeon_golds
	def clean_price(price):
	try:
	if str(price).startswith('$'):
	return int( str(price)[1:])
	else:
	return None
	except:
	return None
	df_xeon_golds["price"]=df_xeon_golds["Release price (USD)"].apply(clean_price)

	joined_df= #join the dataframes together and consider only those for which we have stats

	def calculate_stats(row):
	if not np.isnan(row['price']) and not np.isnan(row['performance']):
	#higher is better value for the money
	row['price_performance']=row['performance']/row['price']
	return row
	joined_df= #apply the function above to create a new column with price per performance
	joined_df= #just consider those processors which have stats
	joined_df['price_performance_rank']= #determine the ranks for each processor in a new column
	joined_df. #sort data and show the top 5