Skip to content

Instantly share code, notes, and snippets.

@salrashid123
Last active June 10, 2018 21:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save salrashid123/4afca078c6bac1a0490ba937547094ac to your computer and use it in GitHub Desktop.
Save salrashid123/4afca078c6bac1a0490ba937547094ac to your computer and use it in GitHub Desktop.
Google Cloud Logging events as Dataframes
# virtualenv env
# source env/bin/activate
# pip install jupyter matplotlib pandas google-cloud-logging
#%%
import collections
def flatten(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
#%%
import os
import pprint
os.environ["GOOGLE_CLOUD_DISABLE_GRPC"] = "false"
from google.cloud import logging
from google.cloud.logging import ASCENDING
from google.cloud.logging import DESCENDING
pp = pprint.PrettyPrinter(indent=4)
FILTER = 'resource.type="bigquery_resource" AND protoPayload.methodName="jobservice.getqueryresults" AND severity="INFO"'
client = logging.Client()
entries = []
iterator = client.list_entries(filter_=FILTER, order_by=DESCENDING)
for page in iterator.pages:
print(' Page number: %d' % (iterator.page_number,))
print(' Items in page: %d' % (page.num_items,))
print('Items remaining: %d' % (page.remaining,))
for entry in page:
entries.append(entry)
print "Number of Log entries recalled: " + str(len(entries))
#%%
import copy
payloads = [copy.deepcopy(e.payload) for e in entries]
payloads[0]
#%%
for p in payloads:
service_data = p.get('serviceData', {})
if not isinstance(service_data, dict):
service_data = {}
flattened_data = flatten({'serviceData': service_data})
p.update(flattened_data)
p.pop('serviceData', None)
#%%
import pandas
df = pandas.DataFrame(payloads)
df.head()
#%%
df.columns.tolist()
#%%
df[u'serviceData_jobGetQueryResultsResponse_job_jobStatus_state'].value_counts()
#%%
df['methodName'].value_counts()
#%%
df.groupby('methodName')['serviceData_jobGetQueryResultsResponse_job_jobStatistics_totalProcessedBytes'].max()
#%%
x = 'serviceData_jobGetQueryResultsResponse_job_jobStatistics_createTime'
y = 'serviceData_jobGetQueryResultsResponse_job_jobStatistics_totalProcessedBytes'
df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment