Created
November 23, 2018 03:23
-
-
Save utdemir/e806b03edbebe45e8bc2e59f1a11046a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import sys | |
import argparse | |
from typing import * | |
from pprint import pprint | |
from multiprocessing import Pool | |
from datetime import datetime, timedelta | |
import boto3 | |
from botocore.config import Config | |
import requests | |
from tqdm import tqdm | |
config = Config( | |
retries = { | |
"max_attempts": 24 | |
} | |
) | |
emr = boto3.client('emr', config=config) | |
region = boto3.session.Session().region_name | |
InstanceType = NewType("InstanceType", str) | |
ClusterId = NewType("ClusterId", str) | |
class ClusterMeta(NamedTuple): | |
id: ClusterId | |
name: str | |
start: datetime | |
class Cluster(NamedTuple): | |
meta: ClusterMeta | |
instances: List[Tuple[InstanceType, timedelta]] | |
def list_clusters(after: datetime) -> List[ClusterMeta]: | |
paginator = emr.get_paginator("list_clusters") | |
iterator = paginator.paginate( | |
CreatedAfter=after, | |
ClusterStates=[ 'TERMINATED', 'TERMINATED_WITH_ERRORS' ], | |
), | |
ret = [] | |
with tqdm(desc="Found clusters: ") as bar: | |
for page in iterator: | |
for content in page: | |
for cluster in content["Clusters"]: | |
ret.append( | |
ClusterMeta( | |
cluster['Id'], | |
cluster["Name"], | |
cluster['Status']['Timeline']['CreationDateTime'] | |
) | |
) | |
bar.update() | |
return ret | |
def cluster_instances(cid: ClusterId) -> List[Tuple[InstanceType, timedelta]]: | |
ret = [] | |
for i in emr.list_instances(ClusterId=cid)["Instances"]: | |
ty = InstanceType(i["InstanceType"]) | |
start = i["Status"]["Timeline"]["CreationDateTime"] | |
end = i["Status"]["Timeline"]["EndDateTime"] | |
ret.append((ty, end-start)) | |
return ret | |
##### | |
Pricing = NewType("Pricing", Dict[InstanceType, float]) | |
def get_pricing() -> Pricing: | |
with tqdm(desc="Downloading pricing...", total=1) as bar: | |
resp = requests.get("https://raw.githubusercontent.com/powdahound/ec2instances.info/master/www/instances.json") | |
ret = {} | |
for i in resp.json(): | |
ty = InstanceType(i["instance_type"]) | |
region_pricing = i["pricing"].get(region) | |
if not region_pricing: continue | |
emr_pricing = region_pricing.get("emr") | |
if not emr_pricing: continue | |
total = float(emr_pricing["emr"]) + float(emr_pricing["ec2"]) | |
ret[ty] = total | |
bar.update() | |
return Pricing(ret) | |
def cluster_cost(pricing: Pricing, cluster: Cluster) -> float: | |
total = 0.0 | |
for ty, d in cluster.instances: | |
total += pricing[ty] * d.seconds / 60 / 60 | |
return total | |
##### | |
patterns = map(lambda s: re.compile(s), sys.argv[1:]) | |
if not patterns: | |
print("Usage: ./emr-cost-calculator.py [PATTERN...]", file=sys.stderr) | |
sys.exit(1) | |
pricing = get_pricing() | |
metas = list_clusters(datetime.now() - timedelta(days=7)) | |
for pattern in patterns: | |
relevant = [i for i in metas if pattern.match(i.name)] | |
clusters = [ | |
Cluster(meta, cluster_instances(meta.id)) | |
for meta in tqdm(relevant, desc=f"Fetching instances for '{pattern.pattern}'") | |
] | |
costs = [(c, cluster_cost(pricing, c)) for c in clusters] | |
total_cost = sum(cost for (_, cost) in costs) | |
print(total_cost) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment