Skip to content

Instantly share code, notes, and snippets.

@onefoursix
Last active January 16, 2021 23:30
Show Gist options
  • Save onefoursix/93f80f042018029310eb to your computer and use it in GitHub Desktop.
Save onefoursix/93f80f042018029310eb to your computer and use it in GitHub Desktop.
Python CM-API Example to pull Impala Query metrics
#!/usr/bin/python
## *******************************************************************************************
## impalaQueries.py
##
## Getting Info on Impala Queries
##
## Usage: ./impalaQueries.py
##
## *******************************************************************************************
## ** imports *******************************
import sys
import pprint
from datetime import datetime, timedelta
from cm_api.api_client import ApiResource
fmt = '%Y-%m-%d %H:%M:%S %Z'
pp = pprint.PrettyPrinter(indent=4)
## ** Settings ******************************
## Cloudera Manager Host
cm_host = "toronto"
cm_port = "7180"
## Cloudera Manager login
cm_login = "admin"
## Cloudera Manager password
cm_password = "admin"
## Cluster Name
cluster_name = "Cluster 1"
## *****************************************
impala_service = None
## Connect to CM
print "\nConnecting to Cloudera Manager at " + cm_host + ":" + cm_port
api = ApiResource(server_host=cm_host, server_port=cm_port, username=cm_login, password=cm_password)
## Get the Cluster
cluster = api.get_cluster(cluster_name)
## Get the IMPALA service
service_list = cluster.get_all_services()
for service in service_list:
if service.type == "IMPALA":
impala_service = service
print "Located Impala Service: " + service.name
break
if impala_service is None:
print "Error: Could not locate Impala Service"
quit(1)
## I'll configure this example to use a window of one day
now = datetime.utcnow()
start = now - timedelta(days=1)
print "Looking for Impala queries executed by the user \"mark\""
filterStr = 'user = mark'
impala_query_response = impala_service.get_impala_queries(start_time=start, end_time=now, filter_str=filterStr, limit=1000)
queries = impala_query_response.queries
for i in range (0, len(queries)):
query = queries[i]
if (query.statement != "GET_SCHEMAS") and not (query.statement[:3] =="USE") and not (query.statement[:4] =="SHOW"):
print '-------------------------------------'
print "queryState : " + query.queryState
print "queryId: " + query.queryId
print "user: " + query.user
print "startTime: " + query.startTime.strftime(fmt)
print "SQL: " + query.statement
pp.pprint(query.attributes)
print '-------------------------------------'
print "done"
@turai94
Copy link

turai94 commented May 2, 2017

how can i get database and table in same script? using queryId.. is there any query attributes to show database and table name?

@rajalaxman
Copy link

Your CM API code is really useful.
I am looking for a similar code to monitor hive queries and CM API services.py doesn't have anything related to the hive. do you done anything on the hive ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment