Skip to content

Instantly share code, notes, and snippets.

@iklobato
Last active October 24, 2023 23:43
Show Gist options
  • Save iklobato/8476b3caece344b0549181e740d07036 to your computer and use it in GitHub Desktop.
Save iklobato/8476b3caece344b0549181e740d07036 to your computer and use it in GitHub Desktop.
Scrap EC2 to get the best instance with more processing and lowest cost
from datetime import datetime
import pandas as pd
import requests
pd.set_option('display.max_columns', 8)
pd.set_option('max_seq_item', None)
pd.set_option('display.width', 200)
zones = [
'US East (N. Virginia)',
'US East (Ohio)',
'US West (N. California)',
'US West (Oregon)',
'Canada (Central)',
]
headers = {
'authority': 'b0.p.awsstatic.com',
'accept': '*/*',
'accept-language': 'en-US,en;q=0.9',
'cache-control': 'no-cache',
'origin': 'https://c0.b0.p.awsstatic.com',
'pragma': 'no-cache',
'referer': 'https://c0.b0.p.awsstatic.com/',
'sec-ch-ua': '"Brave";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'sec-gpc': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
}
now = int(datetime.now().timestamp() * 1000)
params = {
'timestamp': now
}
session = requests.Session()
session.headers.update(headers)
df = pd.DataFrame()
for z in zones:
z_encoded = z.replace(' ', '%20')
print(f'Getting data for {z}')
url = f'https://b0.p.awsstatic.com/pricing/2.0/meteredUnitMaps/ec2/USD/current/ec2-ondemand-without-sec-sel/{z_encoded}/Linux/index.json'
response = session.get(url, params=params, headers=headers)
response_json = response.json()
for instance_name, instance_dict in response_json['regions'][z].items():
server_info = {
'instance_name': instance_name,
'price': instance_dict['price'],
'memory': instance_dict['Memory'],
'vCPU': instance_dict['vCPU'],
'Instance Type': instance_dict['Instance Type'],
'location': instance_dict['Location'],
}
server_info_df = pd.DataFrame([server_info])
df = pd.concat([df, server_info_df], ignore_index=True)
df.to_csv(f'dump_all_regions{now}.csv', index=False)
df["Memory_"] = df["memory"].str.extract('(\d+)').astype(float)
df["vCPU"] = df["vCPU"].astype(float)
df["price"] = df["price"].astype(float)
df = df[(df["vCPU"] >= 16) & (df["Memory_"] > 64) & (df["price"] < 0.9)]
df["Price-Cpu Ratio"] = df["vCPU"] / df["price"]
df["Price-Memory Ratio"] = df["Memory_"] / df["price"]
df["Monthly Estimation"] = df["price"] * 24 * 31
df["RPS-Memory"] = df["Memory_"] * 10
df["RPS-Network"] = df["Price-Memory Ratio"] * 1000
df["Request-Per-Second"] = df[["RPS-Memory", "RPS-Network"]].min(axis=1)
df["RPS-Capacity"] = df["Request-Per-Second"] * (1 / 0.1)
cpu_weight = 0.4
memory_weight = 0.3
price_weight = 0.5
rps_weight = 0.6
df["Score"] = (
(cpu_weight * df["Price-Cpu Ratio"]) / 100 +
(memory_weight * df["Price-Memory Ratio"]) / 100 +
(price_weight * df["price"]) / 100 +
(rps_weight * df["RPS-Capacity"]) / 100
)
df = df.sort_values(by="Score", ascending=False)
df.reset_index(drop=True, inplace=True)
df = df[["Instance Type", "price", "Monthly Estimation", "memory", "vCPU", "location", "Score"]]
top_10_machines = df.head(10)
print(top_10_machines)
@iklobato
Copy link
Author

Output

Screen Shot 2023-10-24 at 20 34 58

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment