rcj4747/AWS_Pricing.ipynb

## AWS_Pricing.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Playing with the AWS pricing API for EC2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/index.json\n",
      "Reading EC2 pricing data from https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json\n",
      "Data ingested\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import urllib\n",
    "\n",
    "from urllib.request import urlopen\n",
    "\n",
    "api_base = 'https://pricing.us-east-1.amazonaws.com'\n",
    "offer_index = 'offers/v1.0/aws/index.json'\n",
    "offers_url = urllib.parse.urljoin(api_base, offer_index)\n",
    "\n",
    "print('Reading {}'.format(offers_url))\n",
    "offers = json.loads(urlopen(offers_url).read().decode('utf-8'))\n",
    "\n",
    "ec2_index = offers['offers']['AmazonEC2']['currentVersionUrl']\n",
    "ec2_url = urllib.parse.urljoin(api_base, ec2_index)\n",
    "\n",
    "print('Reading EC2 pricing data from {}'.format(ec2_url))\n",
    "data = json.loads(urlopen(ec2_url).read().decode('utf-8'))\n",
    "\n",
    "print('Data ingested')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Offer file formatting\n",
    "The format of the offer file is defined @ http://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/reading-an-offer.html\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_keys(['products', 'offerCode', 'publicationDate', 'formatVersion', 'disclaimer', 'version', 'terms'])\n",
      "\"terms\" is huge and we can ignore it's 2 elements\n"
     ]
    }
   ],
   "source": [
    "# what are the top-level keys?\n",
    "print(data.keys())\n",
    "\n",
    "# Save some memory by deleting large dictionaries we won't use\n",
    "# We are not interested in the offer terms (hourly, reserve, etc) as we're looking to\n",
    "# discover which instances are available per region and what features each supports\n",
    "del(data['terms'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## EC2 \"Products\"\n",
    "\n",
    "The instance types are in the products section, but each instance type is listed multpile times per region.  An instance is listed one for dedicated and again for shared tenancy, for each of these it is listed for each operating system.  Each product has to match a set of terms for the product which means a massive matrix of data.  Luckily we don't care about pricing let's just see what instance types we have."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Let's take a look at a product entry before we start filtering\n",
      "{'attributes': {'clockSpeed': '2.5 GHz',\n",
      "                'currentGeneration': 'Yes',\n",
      "                'instanceFamily': 'General purpose',\n",
      "                'instanceType': 'm3.xlarge',\n",
      "                'licenseModel': 'License Included',\n",
      "                'location': 'EU (Ireland)',\n",
      "                'locationType': 'AWS Region',\n",
      "                'memory': '15 GiB',\n",
      "                'networkPerformance': 'High',\n",
      "                'operatingSystem': 'Windows',\n",
      "                'operation': 'RunInstances:0002',\n",
      "                'physicalProcessor': 'Intel Xeon E5-2670 v2 (Ivy Bridge/Sandy '\n",
      "                                     'Bridge)',\n",
      "                'preInstalledSw': 'NA',\n",
      "                'processorArchitecture': '64-bit',\n",
      "                'processorFeatures': 'Intel AVX; Intel Turbo',\n",
      "                'servicecode': 'AmazonEC2',\n",
      "                'storage': '2 x 40 SSD',\n",
      "                'tenancy': 'Host',\n",
      "                'usagetype': 'EU-HostBoxUsage:m3.xlarge',\n",
      "                'vcpu': '4'},\n",
      " 'productFamily': 'Compute Instance',\n",
      " 'sku': '2R74T3WSRWYRW2NP'}\n"
     ]
    }
   ],
   "source": [
    "from pprint import pprint\n",
    "print(\"Let's take a look at a product entry before we start filtering\")\n",
    "for key in data['products']:\n",
    "    pprint(data['products'][key])\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Selecting just shared tenacy products reduces the number of products to 5786, but that is still a lot.\n",
      "Filtering those for only Linux brings the resultdown to 753 products\n",
      "For Dublin we have only 68 products, does that look better?\n",
      "Instance types per family in Dublin:\n",
      "{'i2': 4, 'p2': 3, 'hi1': 1, 't1': 1, 'c4': 5, 'c3': 5, 'd2': 4, 'cg1': 1, 'm1': 4, 'cc2': 1, 'm3': 4, 'g2': 2, 'x1': 2, 'cr1': 1, 'c1': 2, 'hs1': 1, 'r4': 6, 't2': 7, 'r3': 5, 'm4': 6, 'm2': 3}\n",
      "That looks correct.\n"
     ]
    }
   ],
   "source": [
    "shared_tenancy = [data['products'][product] for product in data['products'] \\\n",
    "                  if data['products'][product]['attributes'].get('tenancy') == 'Shared']\n",
    "\n",
    "print('Selecting just shared tenacy products reduces the '\n",
    "      'number of products to {}, but that is still a lot.'.format(len(shared_tenancy)))\n",
    "\n",
    "shared_linux = [shared_tenancy for shared_tenancy in shared_tenancy \\\n",
    "                if shared_tenancy['attributes']['operatingSystem'] == 'Linux']\n",
    "\n",
    "print('Filtering those for only Linux brings the result'\n",
    "      'down to {} products'.format(len(shared_linux)))\n",
    "\n",
    "dub = [shared_linux for shared_linux in shared_linux \\\n",
    "       if shared_linux['attributes']['location'] == 'EU (Ireland)']\n",
    "\n",
    "print('For Dublin we have only {} products, does that look better?'.format(len(dub)))\n",
    "\n",
    "from collections import OrderedDict\n",
    "\n",
    "dub_families = set()\n",
    "for p in dub:\n",
    "    dub_families.add(p['attributes']['instanceType'].split('.')[0])\n",
    "dublin = {family: [] for family in dub_families}\n",
    "for product in dub:\n",
    "    family = product['attributes']['instanceType'].split('.')[0]\n",
    "    dublin[family].append(product)\n",
    "\n",
    "print('Instance types per family in Dublin:')\n",
    "print({family: len(dublin[family]) for family in dublin})\n",
    "print('That looks correct.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Playing with the AWS pricing API for EC2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Reading https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/index.json\n",
	"Reading EC2 pricing data from https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json\n",
	"Data ingested\n"
	]
	}
	],
	"source": [
	"import json\n",
	"import urllib\n",
	"\n",
	"from urllib.request import urlopen\n",
	"\n",
	"api_base = 'https://pricing.us-east-1.amazonaws.com'\n",
	"offer_index = 'offers/v1.0/aws/index.json'\n",
	"offers_url = urllib.parse.urljoin(api_base, offer_index)\n",
	"\n",
	"print('Reading {}'.format(offers_url))\n",
	"offers = json.loads(urlopen(offers_url).read().decode('utf-8'))\n",
	"\n",
	"ec2_index = offers['offers']['AmazonEC2']['currentVersionUrl']\n",
	"ec2_url = urllib.parse.urljoin(api_base, ec2_index)\n",
	"\n",
	"print('Reading EC2 pricing data from {}'.format(ec2_url))\n",
	"data = json.loads(urlopen(ec2_url).read().decode('utf-8'))\n",
	"\n",
	"print('Data ingested')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Offer file formatting\n",
	"The format of the offer file is defined @ http://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/reading-an-offer.html\n",
	"\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"dict_keys(['products', 'offerCode', 'publicationDate', 'formatVersion', 'disclaimer', 'version', 'terms'])\n",
	"\"terms\" is huge and we can ignore it's 2 elements\n"
	]
	}
	],
	"source": [
	"# what are the top-level keys?\n",
	"print(data.keys())\n",
	"\n",
	"# Save some memory by deleting large dictionaries we won't use\n",
	"# We are not interested in the offer terms (hourly, reserve, etc) as we're looking to\n",
	"# discover which instances are available per region and what features each supports\n",
	"del(data['terms'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## EC2 \"Products\"\n",
	"\n",
	"The instance types are in the products section, but each instance type is listed multpile times per region. An instance is listed one for dedicated and again for shared tenancy, for each of these it is listed for each operating system. Each product has to match a set of terms for the product which means a massive matrix of data. Luckily we don't care about pricing let's just see what instance types we have."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 78,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Let's take a look at a product entry before we start filtering\n",
	"{'attributes': {'clockSpeed': '2.5 GHz',\n",
	" 'currentGeneration': 'Yes',\n",
	" 'instanceFamily': 'General purpose',\n",
	" 'instanceType': 'm3.xlarge',\n",
	" 'licenseModel': 'License Included',\n",
	" 'location': 'EU (Ireland)',\n",
	" 'locationType': 'AWS Region',\n",
	" 'memory': '15 GiB',\n",
	" 'networkPerformance': 'High',\n",
	" 'operatingSystem': 'Windows',\n",
	" 'operation': 'RunInstances:0002',\n",
	" 'physicalProcessor': 'Intel Xeon E5-2670 v2 (Ivy Bridge/Sandy '\n",
	" 'Bridge)',\n",
	" 'preInstalledSw': 'NA',\n",
	" 'processorArchitecture': '64-bit',\n",
	" 'processorFeatures': 'Intel AVX; Intel Turbo',\n",
	" 'servicecode': 'AmazonEC2',\n",
	" 'storage': '2 x 40 SSD',\n",
	" 'tenancy': 'Host',\n",
	" 'usagetype': 'EU-HostBoxUsage:m3.xlarge',\n",
	" 'vcpu': '4'},\n",
	" 'productFamily': 'Compute Instance',\n",
	" 'sku': '2R74T3WSRWYRW2NP'}\n"
	]
	}
	],
	"source": [
	"from pprint import pprint\n",
	"print(\"Let's take a look at a product entry before we start filtering\")\n",
	"for key in data['products']:\n",
	" pprint(data['products'][key])\n",
	" break"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 87,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Selecting just shared tenacy products reduces the number of products to 5786, but that is still a lot.\n",
	"Filtering those for only Linux brings the resultdown to 753 products\n",
	"For Dublin we have only 68 products, does that look better?\n",
	"Instance types per family in Dublin:\n",
	"{'i2': 4, 'p2': 3, 'hi1': 1, 't1': 1, 'c4': 5, 'c3': 5, 'd2': 4, 'cg1': 1, 'm1': 4, 'cc2': 1, 'm3': 4, 'g2': 2, 'x1': 2, 'cr1': 1, 'c1': 2, 'hs1': 1, 'r4': 6, 't2': 7, 'r3': 5, 'm4': 6, 'm2': 3}\n",
	"That looks correct.\n"
	]
	}
	],
	"source": [
	"shared_tenancy = [data['products'][product] for product in data['products'] \\\n",
	" if data['products'][product]['attributes'].get('tenancy') == 'Shared']\n",
	"\n",
	"print('Selecting just shared tenacy products reduces the '\n",
	" 'number of products to {}, but that is still a lot.'.format(len(shared_tenancy)))\n",
	"\n",
	"shared_linux = [shared_tenancy for shared_tenancy in shared_tenancy \\\n",
	" if shared_tenancy['attributes']['operatingSystem'] == 'Linux']\n",
	"\n",
	"print('Filtering those for only Linux brings the result'\n",
	" 'down to {} products'.format(len(shared_linux)))\n",
	"\n",
	"dub = [shared_linux for shared_linux in shared_linux \\\n",
	" if shared_linux['attributes']['location'] == 'EU (Ireland)']\n",
	"\n",
	"print('For Dublin we have only {} products, does that look better?'.format(len(dub)))\n",
	"\n",
	"from collections import OrderedDict\n",
	"\n",
	"dub_families = set()\n",
	"for p in dub:\n",
	" dub_families.add(p['attributes']['instanceType'].split('.')[0])\n",
	"dublin = {family: [] for family in dub_families}\n",
	"for product in dub:\n",
	" family = product['attributes']['instanceType'].split('.')[0]\n",
	" dublin[family].append(product)\n",
	"\n",
	"print('Instance types per family in Dublin:')\n",
	"print({family: len(dublin[family]) for family in dublin})\n",
	"print('That looks correct.')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}