Skip to content

Instantly share code, notes, and snippets.

@shantanuo
Created November 15, 2019 05:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shantanuo/d0af78a06a55fdf1feab375296807162 to your computer and use it in GitHub Desktop.
Save shantanuo/d0af78a06a55fdf1feab375296807162 to your computer and use it in GitHub Desktop.
audit trail query to pandas dataframe
import pandas as pd
import numpy as np
import elasticsearch
from elasticsearch import helpers
myquery = 'your kibana query here...'
es_client = elasticsearch.Elasticsearch(
"https://xxx.us-east-1.es.amazonaws.com"
)
myrp = list()
mysource = list()
mykey = list()
for r in helpers.scan(es_client, index="cwl-*", query=myquery):
myrp.append((r["_source"]))
mysource.append((r["_source"]["userIdentity"]))
mykey.append((r["_source"]["requestParameters"]))
df_myrp = pd.DataFrame(myrp)
df_mysource = pd.DataFrame(mysource)
df_mykey = pd.DataFrame(mykey)
df = pd.merge(
pd.merge(df_myrp, df_mysource, left_index=True, right_index=True),
df_mykey,
left_index=True,
right_index=True,
)
# remove empty lists
df = df.mask(df.applymap(str).eq("['']"))
# remove list columns
exclude_cols = [
"userIdentity",
"requestParameters",
"responseElements",
"additionalEventData",
"resources",
"@message",
]
new_cols = set(df.columns) - set(exclude_cols)
df = df[list(new_cols)]
# Remove columns where there is only 1 unique value
ndf = df.describe(include="all").T
new_cols = set(df.columns) - set(ndf[ndf.unique < 2].index)
df = df[list(new_cols)]
df.to_csv("myreport.csv", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment