Skip to content

Instantly share code, notes, and snippets.

@plavjanik
Created March 22, 2017 15:33
Show Gist options
  • Save plavjanik/cfed8174f7c0a6a22eaf539aedad9dee to your computer and use it in GitHub Desktop.
Save plavjanik/cfed8174f7c0a6a22eaf539aedad9dee to your computer and use it in GitHub Desktop.
Download Splunk result in multiple parts
from __future__ import print_function
import base64
import os
import sys
from datetime import datetime, timedelta
from pprint import pprint
from time import sleep
import splunklib.client as client
import splunklib.results as results
# http://dev.splunk.com/view/python-sdk/SP-CAAAEE5
def format_splunk_datetime(t):
return t.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
def search(service, query, basename, start, end):
output_filename = basename + "_%s-%s.log" % (format_splunk_datetime(start), format_splunk_datetime(end))
if os.path.exists(output_filename):
print("File %s already exists. Skipping" % output_filename)
return
with open(output_filename, "w") as csv_file:
csv_file.write(".")
kwargs_normalsearch = {"exec_mode": "normal",
"max_count": 999999999999,
"enable_lookups": False,
"earliest_time": format_splunk_datetime(start),
"latest_time": format_splunk_datetime(end)}
pprint(kwargs_normalsearch)
job = service.jobs.create("search %s" % query, **kwargs_normalsearch)
# A normal search returns the job's SID right away, so we need to poll for completion
while True:
while not job.is_ready():
pass
stats = {"isDone": job["isDone"],
"doneProgress": float(job["doneProgress"]) * 100,
"scanCount": int(job["scanCount"]),
"eventCount": int(job["eventCount"]),
"resultCount": int(job["resultCount"])}
status = ("\r%(doneProgress)03.1f%% %(scanCount)d scanned "
"%(eventCount)d matched %(resultCount)d results") % stats
sys.stdout.write(status)
sys.stdout.flush()
if stats["isDone"] == "1":
sys.stdout.write("\n\nDone!\n\n")
break
sleep(2)
with open(output_filename, "w") as output:
rows = list(results.ResultsReader(job.results()))
if rows:
for row in rows:
output.write(row["_raw"])
output.write("\n")
job.cancel()
sys.stdout.write('\n')
def main():
interval_size = 60
query = 'index=gdc source="/mnt/log/gdc-taskman" component="gcfc" | fields _raw'
output_basename = "gcfc"
service = client.connect(scheme="https", host="splunk-sh2.na.intgdc.com",
username="petr.plavjanik", password="...")
last_end = datetime(2017, 3, 2)
for i in range(7 * 24):
start = last_end - timedelta(minutes=interval_size)
end = last_end
last_end = last_end - timedelta(minutes=interval_size)
search(service, query, output_basename, start, end)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment