Created
May 1, 2019 14:15
-
-
Save djw8605/04e9c7e2328e02648e1e4e9e7cb1cc75 to your computer and use it in GitHub Desktop.
Comparison of Local vs. GRACC.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Comparison of Local vs. GRACC.ipynb", | |
"version": "0.3.2", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/djw8605/04e9c7e2328e02648e1e4e9e7cb1cc75/comparison-of-local-vs-gracc.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "2GYt4v5l3etj", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"# Compare Local HTCondor vs. GRACC\n", | |
"\n", | |
"In this notebook, we will download the output from `condor_history` from the MWT2 and compare the same time range to GRACC. The `condor_history` is stored in Google Drive." | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "DhPKmM33IPiW", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Install the PyDrive wrapper & import libraries.\n", | |
"# This only needs to be done once per notebook.\n", | |
"!pip install -U -q PyDrive htcondor\n", | |
"from pydrive.auth import GoogleAuth\n", | |
"from pydrive.drive import GoogleDrive\n", | |
"from google.colab import auth\n", | |
"from oauth2client.client import GoogleCredentials\n", | |
"\n", | |
"# Authenticate and create the PyDrive client.\n", | |
"# This only needs to be done once per notebook.\n", | |
"auth.authenticate_user()\n", | |
"gauth = GoogleAuth()\n", | |
"gauth.credentials = GoogleCredentials.get_application_default()\n", | |
"drive = GoogleDrive(gauth)\n", | |
"\n", | |
"# Download a file based on its file ID.\n", | |
"#\n", | |
"# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz\n", | |
"file_id = '1hMXSA60pEW2PH-uveOYyKzzjK_4r-n6i'\n", | |
"downloaded = drive.CreateFile({'id': file_id})\n", | |
"#len(downloaded.GetContentString())\n", | |
"#print(downloaded)\n", | |
"#help(downloaded)\n", | |
"#print('Downloaded content \"{}\"'.format(downloaded.GetContentString()))" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "KF52DoLiI2nM", | |
"colab_type": "code", | |
"outputId": "31661fcb-8a57-4ed7-8578-c0d526764303", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"# Parse the file with htcondor\n", | |
"import classad\n", | |
"import sys\n", | |
"import datetime\n", | |
"\n", | |
"start_time = datetime.datetime.fromtimestamp(1556409600)\n", | |
"print(\"Start Time: {}\".format(str(start_time)))\n", | |
"end_time = datetime.datetime.fromtimestamp(1556495999)\n", | |
"print(\"End Time: {}\".format(str(end_time)))\n", | |
"wall_duration = 0\n", | |
"counter = 0\n", | |
"min_time = sys.maxsize\n", | |
"max_time = 0\n", | |
"core_time = 0\n", | |
"jobids = {}\n", | |
"for ad in classad.parseAds(downloaded.GetContentString()):\n", | |
" min_time = min(min_time, ad['CompletionDate'])\n", | |
" max_time = max(max_time, ad['CompletionDate'])\n", | |
" counter += 1\n", | |
" cpus = 1\n", | |
" if 'RequestCpus' in ad:\n", | |
" cpus = ad['RequestCpus']\n", | |
" if 'RemoteWallClockTime' in ad:\n", | |
" wall_duration += ad['RemoteWallClockTime']\n", | |
" core_time += ad['RemoteWallClockTime'] * cpus\n", | |
" jobid_str = \"{}\".format(ad['ClusterId'])\n", | |
" if jobid_str in jobids:\n", | |
" print(\"Duplicate jobid: {}\".format(jobid_str))\n", | |
" jobids[jobid_str] = (ad['RemoteWallClockTime']/3600) * cpus\n" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Start Time: 2019-04-28 00:00:00\n", | |
"End Time: 2019-04-28 23:59:59\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "q5TZAK5-Jmx9", | |
"colab_type": "code", | |
"outputId": "9fbab8dc-3dae-4af2-897d-434483592fdc", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 102 | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"print(\"Number of jobs from local scheduler: {}\".format(counter))\n", | |
"# 1556409600\n", | |
"print(\"Start Time: {}\".format(str(datetime.datetime.fromtimestamp(min_time))))\n", | |
"print(\"End Time: {}\".format(str(datetime.datetime.fromtimestamp(max_time))))\n", | |
"\n", | |
"print(\"Wall duration in seconds: {}\".format(wall_duration))\n", | |
"print(\"Core hours (Processors * WallHours): {}\".format(core_time / 3600))" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Number of jobs from local scheduler: 19324\n", | |
"Start Time: 2019-04-28 00:00:17\n", | |
"End Time: 2019-04-28 23:59:50\n", | |
"Wall duration in seconds: 219802559.0\n", | |
"Core hours (Processors * WallHours): 344436.75111111114\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "qUQdN8LM8Hhs", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"# GRACC Data" | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "0S4VlaJX8Lau", | |
"colab_type": "code", | |
"outputId": "d1bc75e7-b438-46c1-be3b-c63793b24275", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 71 | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"!pip install -U -q elasticsearch elasticsearch_dsl\n", | |
"import elasticsearch\n", | |
"from elasticsearch_dsl import Search, Q\n", | |
"import urllib3\n", | |
"import math\n", | |
"\n", | |
"# Get rid of insecure warning\n", | |
"urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)\n", | |
"es = elasticsearch.Elasticsearch(\n", | |
" ['https://gracc.opensciencegrid.org/q'],\n", | |
" timeout=300, use_ssl=True, verify_certs=False)\n" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/elasticsearch/connection/http_urllib3.py:178: UserWarning: Connecting to gracc.opensciencegrid.org using SSL with verify_certs=False is insecure.\n", | |
" % host\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "zQ3Q4Jsb8wrX", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"osg_summary_index = 'gracc.osg.raw-*'\n", | |
"s = Search(using=es, index=osg_summary_index)\n", | |
"probe_name = 'condor:iut2-gk.mwt2.org'\n", | |
"\n", | |
"s = s.query('bool',\n", | |
" filter=[\n", | |
" Q('range', EndTime={'gt': start_time, 'lt': end_time })\n", | |
" & Q('term', ProbeName=probe_name)\n", | |
" & Q('term', ResourceType='Batch')\n", | |
" ]\n", | |
" )\n", | |
"bkt = s.aggs\n", | |
"bkt = bkt.metric('WallDuration', 'sum', field='WallDuration')\n", | |
"bkt = bkt.metric('NumberOfJobs', 'sum', field='Count')\n", | |
"bkt = bkt.metric('EarliestEndTime', 'min', field='EndTime')\n", | |
"bkt = bkt.metric('LatestEndTime', 'max', field='EndTime')\n", | |
"bkt = bkt.metric('CoreHours', 'sum', field='CoreHours')" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"id": "c2n0_RjM9381", | |
"colab_type": "code", | |
"outputId": "9d10a19c-743b-456e-8180-750f9b9c5143", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"result = s.execute()\n", | |
"print(result.aggs)" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"{'LatestEndTime': {'value': 1556495990000.0, 'value_as_strin...}\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "FlmsM0gI-zn0", | |
"colab_type": "code", | |
"outputId": "64bf5167-fcf6-43f9-df58-09dd215410e9", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 102 | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"print(\"Earliest EndTime: {}\".format(result.aggregations.EarliestEndTime.value_as_string))\n", | |
"print(\"Last EndTime: {}\".format(result.aggregations.LatestEndTime.value_as_string))\n", | |
"print(\"Number of jobs: {}\".format(result.hits.total))\n", | |
"print(\"Wall Duration in Seconds: {}\".format(result.aggregations.WallDuration.value))\n", | |
"print(\"Core Hours: {}\".format(result.aggregations.CoreHours.value))" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Earliest EndTime: 2019-04-28T00:00:17.000Z\n", | |
"Last EndTime: 2019-04-28T23:59:50.000Z\n", | |
"Number of jobs: 19402\n", | |
"Wall Duration in Seconds: 222711384.0\n", | |
"Core Hours: 345509.98222222296\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "aPKieDQu_bg4", | |
"colab_type": "code", | |
"outputId": "a4f95515-eebb-4bf3-cb61-78097794218c", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"missing_jobs = []\n", | |
"tmp_jobids = dict(jobids)\n", | |
"for hit in s.scan():\n", | |
" if hit['LocalJobId'] in tmp_jobids and hit['CoreHours'] == tmp_jobids[hit['LocalJobId']]:\n", | |
" del tmp_jobids[hit['LocalJobId']]\n", | |
" else:\n", | |
" #print(\"JobId not found in condor: {}\".format(hit['LocalJobId']))\n", | |
" #print(\"ES: {}, HTCondor: {}\".format(hit['CoreHours'], tmp_jobids[hit['LocalJobId']]))\n", | |
" missing_jobs.append(hit)\n", | |
"print(\"Jobs that were found in HTCondor but not in GRACC: {}\".format(str(len(tmp_jobids))))\n", | |
"print(\"Jobs found in GRACC but not in HTCondor: {}\".format(len(missing_jobs)))\n", | |
"\n" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Jobs that were found in HTCondor but not in GRACC: 0\n", | |
"Jobs found in GRACC but not in HTCondor: 78\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "_MHXIGIbwkQL", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"## Missing Jobs\n", | |
"\n", | |
"The jobs from the MWT2 were queried using `CompletionDate` as the \"date filter\".\n", | |
"HTCondor does not add a `CompletionDate` attribute if the job is removed. Therefore, we \n", | |
"search the `RawXML` element for the `EndTime`. If the `RawXML` from the gratia probe does not include `EndTime`, the GRACC Collector will try to add an `EndTime` by calculating:\n", | |
"\n", | |
" StartTime + WallTime = EndTime\n", | |
"\n", | |
"The calculations below will search for `EndTime` within the `RawXML` to find all removed jobs." | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "8Lr74qpOv5Hh", | |
"colab_type": "code", | |
"outputId": "89c9efd7-5c41-4911-fac3-32bedbc9e714", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"missing_corehours = 0\n", | |
"remaining_jobs = []\n", | |
"for job in missing_jobs:\n", | |
" # Search for the EndTime element\n", | |
" if job['RawXML'].find(\"EndTime\") == -1:\n", | |
" missing_corehours += job['CoreHours']\n", | |
" else:\n", | |
" remaining_jobs.append(job)\n", | |
"\n", | |
"for job in remaining_jobs:\n", | |
" print(\"{} ended {}: {}\".format(job['LocalJobId'], job['EndTime'], job['CoreHours']))\n", | |
" #missing_corehours += job['CoreHours']\n", | |
"\n", | |
"print(\"{0:.2f} Core hours found in GRACC but not in HTCondor\".format(missing_corehours))" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"1073.23 Core hours found in GRACC but not in HTCondor\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "YD-dXh6wzLJ8", | |
"colab_type": "text" | |
}, | |
"cell_type": "markdown", | |
"source": [ | |
"# Calculate Differences\n", | |
"\n", | |
"We want to know if HTCondor is different from GRACC. We add the \"missing\" data to the HTCondor data from GRACC that do not have the `EndTime` attribute, as described above." | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "YwgE4oGmzNEl", | |
"colab_type": "code", | |
"outputId": "2ee7793c-9291-4766-8722-6569dded8093", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"htcondor_coretime = (core_time / 3600) + missing_corehours\n", | |
"perc_diff = (result.aggregations.CoreHours.value - htcondor_coretime) / htcondor_coretime\n", | |
"print(\"{:.2%} Difference between GRACC and HTCondor\".format(perc_diff))" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"0.00% Difference between GRACC and HTCondor\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"id": "kkLfy3sO0Mwa", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"cell_type": "code", | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment