plavjanik/splunk_download_search_result.py

## splunk_download_search_result.py
from __future__ import print_function

import base64
import os
import sys
from datetime import datetime, timedelta
from pprint import pprint
from time import sleep

import splunklib.client as client
import splunklib.results as results


# http://dev.splunk.com/view/python-sdk/SP-CAAAEE5

def format_splunk_datetime(t):
    return t.strftime("%Y-%m-%dT%H:%M:%S.%f%z")


def search(service, query, basename, start, end):
    output_filename = basename + "_%s-%s.log" % (format_splunk_datetime(start), format_splunk_datetime(end))
    if os.path.exists(output_filename):
        print("File %s already exists. Skipping" % output_filename)
        return

    with open(output_filename, "w") as csv_file:
        csv_file.write(".")

    kwargs_normalsearch = {"exec_mode": "normal",
                           "max_count": 999999999999,
                           "enable_lookups": False,
                           "earliest_time": format_splunk_datetime(start),
                           "latest_time": format_splunk_datetime(end)}

    pprint(kwargs_normalsearch)

    job = service.jobs.create("search %s" % query, **kwargs_normalsearch)

    # A normal search returns the job's SID right away, so we need to poll for completion
    while True:
        while not job.is_ready():
            pass
        stats = {"isDone": job["isDone"],
                 "doneProgress": float(job["doneProgress"]) * 100,
                 "scanCount": int(job["scanCount"]),
                 "eventCount": int(job["eventCount"]),
                 "resultCount": int(job["resultCount"])}

        status = ("\r%(doneProgress)03.1f%%   %(scanCount)d scanned   "
                  "%(eventCount)d matched   %(resultCount)d results") % stats

        sys.stdout.write(status)
        sys.stdout.flush()
        if stats["isDone"] == "1":
            sys.stdout.write("\n\nDone!\n\n")
            break
        sleep(2)

    with open(output_filename, "w") as output:
        rows = list(results.ResultsReader(job.results()))
        if rows:
            for row in rows:
                output.write(row["_raw"])
                output.write("\n")

    job.cancel()
    sys.stdout.write('\n')


def main():
    interval_size = 60
    query = 'index=gdc source="/mnt/log/gdc-taskman" component="gcfc" | fields _raw'
    output_basename = "gcfc"

    service = client.connect(scheme="https", host="splunk-sh2.na.intgdc.com",
                             username="petr.plavjanik", password="...")

    last_end = datetime(2017, 3, 2)
    for i in range(7 * 24):
        start = last_end - timedelta(minutes=interval_size)
        end = last_end
        last_end = last_end - timedelta(minutes=interval_size)
        search(service, query, output_basename, start, end)


if __name__ == '__main__':
    main()
	from __future__ import print_function

	import base64
	import os
	import sys
	from datetime import datetime, timedelta
	from pprint import pprint
	from time import sleep

	import splunklib.client as client
	import splunklib.results as results


	# http://dev.splunk.com/view/python-sdk/SP-CAAAEE5

	def format_splunk_datetime(t):
	return t.strftime("%Y-%m-%dT%H:%M:%S.%f%z")


	def search(service, query, basename, start, end):
	output_filename = basename + "_%s-%s.log" % (format_splunk_datetime(start), format_splunk_datetime(end))
	if os.path.exists(output_filename):
	print("File %s already exists. Skipping" % output_filename)
	return

	with open(output_filename, "w") as csv_file:
	csv_file.write(".")

	kwargs_normalsearch = {"exec_mode": "normal",
	"max_count": 999999999999,
	"enable_lookups": False,
	"earliest_time": format_splunk_datetime(start),
	"latest_time": format_splunk_datetime(end)}

	pprint(kwargs_normalsearch)

	job = service.jobs.create("search %s" % query, **kwargs_normalsearch)

	# A normal search returns the job's SID right away, so we need to poll for completion
	while True:
	while not job.is_ready():
	pass
	stats = {"isDone": job["isDone"],
	"doneProgress": float(job["doneProgress"]) * 100,
	"scanCount": int(job["scanCount"]),
	"eventCount": int(job["eventCount"]),
	"resultCount": int(job["resultCount"])}

	status = ("\r%(doneProgress)03.1f%% %(scanCount)d scanned "
	"%(eventCount)d matched %(resultCount)d results") % stats

	sys.stdout.write(status)
	sys.stdout.flush()
	if stats["isDone"] == "1":
	sys.stdout.write("\n\nDone!\n\n")
	break
	sleep(2)

	with open(output_filename, "w") as output:
	rows = list(results.ResultsReader(job.results()))
	if rows:
	for row in rows:
	output.write(row["_raw"])
	output.write("\n")

	job.cancel()
	sys.stdout.write('\n')


	def main():
	interval_size = 60
	query = 'index=gdc source="/mnt/log/gdc-taskman" component="gcfc" \| fields _raw'
	output_basename = "gcfc"

	service = client.connect(scheme="https", host="splunk-sh2.na.intgdc.com",
	username="petr.plavjanik", password="...")

	last_end = datetime(2017, 3, 2)
	for i in range(7 * 24):
	start = last_end - timedelta(minutes=interval_size)
	end = last_end
	last_end = last_end - timedelta(minutes=interval_size)
	search(service, query, output_basename, start, end)


	if __name__ == '__main__':
	main()