Skip to content

Instantly share code, notes, and snippets.

Last active November 2, 2017 21:19
Show Gist options
  • Save LukeMurphey/7479309 to your computer and use it in GitHub Desktop.
Save LukeMurphey/7479309 to your computer and use it in GitHub Desktop.
A base class that can be used for making Python-based modular inputs for Splunk #Splunk
This is a base class for making Python modular inputs for Splunk.
To make a modular input based on this class, you should follow the steps defined below.
Note that this example assumes you are making an input named "my_input_name".
1) Define the input in inputs.conf.spec
You will need to define you input in a spec file within the following directory within your app:
You should define your input in the inputs.conf.spec by declaring the fields your input accepts.
This file should look something like this:
* Configure an input for do something
title = <value>
* The title of the input
url = <value>
* The URL to be checked
2) Include this module in your app
Put this module within your app. You can put this within you bin directory or within a
sub-directory of the bin directory. I generally recommend putting python modules under the bin
directory with a directory that is specific to your app. Something like:
3) Define defaults for your inputs in inputs.conf
You can define default values for your inputs within the inputs.conf file. This file would be:
The contents of the file would be something like this:
url =
4) Create your modular input class
Create your modular input class. This class must be named the same as your input name and it must
be placed within the bin directory. In this example, the input should be in the following path
since the input is named "my_input_name":
Below is an example of a modular input class. This class does the following:
1) Defines the scheme_args which provides some info about the modular input
2) Defines the parameters that the input accepts
3) Runs the modular input
import sys
from modular_input import Field, ModularInput, URLField
class MyInput(ModularInput):
def __init__(self, timeout=30):
scheme_args = {'title': "My input name",
'description': "This input is an example",
'use_external_validation': "true",
'streaming_mode': "xml",
'use_single_instance': "true"}
args = [
Field("title", "Title", "A short description of the input", empty_allowed=False),
URLField("url", "URL", "The URL to connect to", empty_allowed=False)
ModularInput.__init__(self, scheme_args, args, logger_name='my_input_modular_input')
def run(self, stanza, cleaned_params, input_config):
interval = cleaned_params["interval"]
title = cleaned_params["title"]
host = cleaned_params.get("host", None)
index = cleaned_params.get("index", "default")
sourcetype = cleaned_params.get("sourcetype", "my_app_name")
url = cleaned_params["url"]
if self.needs_another_run(input_config.checkpoint_dir, stanza, interval):
self.logger.debug("Your input should do something here, stanza=%s", stanza)
if __name__ == '__main__':
my_input = None
my_input = MyInput()
except Exception as e:
# This logs general exceptions that would have been unhandled otherwise (such as coding
# errors)
if my_input is not None and my_input.logger is not None:
my_input.logger.exception("Unhandled exception was caught, this may be due to a defect in the script")
raise e
import logging
from logging import handlers
import xml.dom
from xml.dom.minidom import Document
import sys
import re
import time
import os
import hashlib
import json
from urlparse import urlparse
from threading import RLock
# Try to load Splunk's libraries. An inability to do so likely means we are running on a universal
# forwarder (since it doesn't include Python). We will proceed but will be unable to access
# Splunk's endpoints via simple request which means we will not able to load secure credentials.
from splunk.appserver.mrsparkle.lib.util import make_splunkhome_path
from splunk.util import normalizeBoolean as normBool
uf_mode = False
def normBool(value):
if str(value).strip().lower() in ['1', 'true']:
return True
return False
uf_mode = True
class FieldValidationException(Exception):
class Field(object):
This is the base class that should be used to for field validators. Sub-class this and
override to_python if you need custom validation.
def get_data_type(self):
Get the type of the field.
def __init__(self, name, title, description, none_allowed=False, empty_allowed=True,
required_on_create=None, required_on_edit=None):
Create the field.
name -- Set the name of the field (e.g. "database_server")
title -- Set the human readable title (e.g. "Database server")
description -- Set the human readable description of the field (e.g. "The IP or domain name
of the database server")
none_allowed -- Is a value of none allowed?
empty_allowed -- Is an empty string allowed?
required_on_create -- Is this field required when creating?
required_on_edit -- Is this field required when editing?
# Try to set required_on_create and required_on_edit to sane defaults if not defined
if required_on_create is None and none_allowed:
required_on_create = False
elif required_on_create is None and not none_allowed:
required_on_create = True
if required_on_edit is None and required_on_create is not None:
required_on_edit = required_on_create
if name is None:
raise ValueError("The name parameter cannot be none")
if len(name.strip()) == 0:
raise ValueError("The name parameter cannot be empty")
if title is None:
raise ValueError("The title parameter cannot be none")
if len(title.strip()) == 0:
raise ValueError("The title parameter cannot be empty")
if description is None:
raise ValueError("The description parameter cannot be none")
if len(description.strip()) == 0:
raise ValueError("The description parameter cannot be empty") = name
self.title = title
self.description = description
self.none_allowed = none_allowed
self.empty_allowed = empty_allowed
self.required_on_create = required_on_create
self.required_on_edit = required_on_edit
def to_python(self, value, session_key=None):
Convert the field to a Python object. Should throw a FieldValidationException if the data
is invalid.
value -- The value to convert
session_key- The session key to access Splunk (if needed)
if not self.none_allowed and value is None:
raise FieldValidationException("The value for the '%s' parameter cannot be empty" % (
if not self.empty_allowed and len(str(value).strip()) == 0:
raise FieldValidationException("The value for the '%s' parameter cannot be empty" % (
return value
def to_string(self, value):
Convert the field to a string value that can be returned. Should throw a
FieldValidationException if the data is invalid.
value -- The value to convert
return str(value)
class BooleanField(Field):
A validator that converts string versions of boolean to a real boolean.
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
if value in [True, False]:
return value
elif str(value).strip().lower() in ["true", "1"]:
return True
elif str(value).strip().lower() in ["false", "0"]:
return False
raise FieldValidationException("The value of '%s' for the '%s' parameter is not a valid boolean" % (str(value),
def to_string(self, value):
if value == True:
return "1"
elif value == False:
return "0"
return str(value)
def get_data_type(self):
class ListField(Field):
A validator that converts a comma seperated string to an array.
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
if value is not None:
return value.split(",")
return []
def to_string(self, value):
if value is not None:
return ",".join(value)
return ""
class StaticListField(Field):
This allows you to specify a list of field values that are allowed.
All other values will be rejected.
_valid_values = None
def __init__(self, name, title, description, none_allowed=False, empty_allowed=True, required_on_create=None, required_on_edit=None, valid_values=None):
super(StaticListField, self).__init__(name, title, description, none_allowed, empty_allowed, required_on_create, required_on_edit)
self.valid_values = valid_values
def valid_values(self):
return self._valid_values
def valid_values(self, values):
self._valid_values = values
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
if value is None:
return None
elif value not in self.valid_values:
raise FieldValidationException('The value of the "' + + '" field is invalid, it must be one of:' + ','.join(self.valid_values))
return value
class RegexField(Field):
A validator that validates input matches a regular expression.
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
if value is not None:
return re.compile(value)
except Exception as exception:
raise FieldValidationException(str(exception))
return None
def to_string(self, value):
if value is not None:
return value.pattern
return ""
class IntegerField(Field):
A validator that converts string input to an integer.
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
if value is not None:
return int(value)
except ValueError as exception:
raise FieldValidationException(str(exception))
return None
def to_string(self, value):
if value is not None:
return str(value)
return ""
def get_data_type(self):
class FloatField(Field):
A validator that converts string input to a float.
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
if value is not None:
return float(value)
except ValueError as exception:
raise FieldValidationException(str(exception))
return None
def to_string(self, value):
if value is not None:
return str(value)
return ""
def get_data_type(self):
class RangeField(Field):
A validator that converts string input to a pair of integers indicating a range.
def __init__(self, name, title, description, low, high, none_allowed=False, empty_allowed=True):
super(RangeField, self).__init__(name, title, description, none_allowed=False,
self.low = low
self.high = high
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
if value is not None:
tmp = int(value)
return tmp >= self.low and tmp <= self.high
except ValueError as exception:
raise FieldValidationException(str(exception))
return None
def to_string(self, value):
if value is not None:
return str(value)
return ""
def get_data_type(self):
class URLField(Field):
Represents a URL. The URL is converted to a Python object that was created via urlparse.
require_https_on_cloud = False
def __init__(self, name, title, description, none_allowed=False, empty_allowed=True,
required_on_create=None, required_on_edit=None, require_https_on_cloud=False):
super(URLField, self).__init__(name, title, description, none_allowed,
empty_allowed, required_on_create, required_on_edit)
self.require_https_on_cloud = require_https_on_cloud
def parse_url(cls, value, name):
Parse a URL and generation an exception if it is invalid.BaseException
Otherwise, return a parsed URL (via urlparse).
parsed_value = urlparse(value)
if parsed_value.hostname is None or len(parsed_value.hostname) <= 0:
raise FieldValidationException("The value of '%s' for the '%s' parameter does not contain a host name" % (str(value), name))
if parsed_value.scheme not in ["http", "https"]:
raise FieldValidationException("The value of '%s' for the '%s' parameter does not contain a valid protocol (only http and https are supported)" % (str(value), name))
return parsed_value
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
parsed_value = URLField.parse_url(value.strip(),
if self.require_https_on_cloud and parsed_value.scheme == "http" and session_key is not None and ModularInput.is_on_cloud(session_key):
raise FieldValidationException("The value of '%s' for the '%s' parameter must use encryption (be HTTPS not HTTP)" % (str(value),
return parsed_value
def to_string(self, value):
return value.geturl()
class DurationField(Field):
The duration field represents a duration as represented by a string such as 1d for a 24 hour
The string is converted to an integer indicating the number of seconds.
DURATION_RE = re.compile("(?P<duration>[0-9]+)\s*(?P<units>[a-z]*)", re.IGNORECASE)
DAY = 24 * HOUR
WEEK = 7 * DAY
'w' : WEEK,
'week' : WEEK,
'd' : DAY,
'day' : DAY,
'h' : HOUR,
'hour' : HOUR,
'm' : MINUTE,
'min' : MINUTE,
'minute' : MINUTE,
's' : 1
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
# Parse the duration
duration_match = DurationField.DURATION_RE.match(value)
# Make sure the duration could be parsed
if duration_match is None:
raise FieldValidationException("The value of '%s' for the '%s' parameter is not a valid duration" % (str(value),
# Get the units and duration
match_dict = duration_match.groupdict()
units = match_dict['units']
# Parse the value provided
duration = int(match_dict['duration'])
except ValueError:
raise FieldValidationException("The duration '%s' for the '%s' parameter is not a valid number" % (match_dict['duration'],
# Make sure the units are valid
if len(units) > 0 and units not in DurationField.UNITS:
raise FieldValidationException("The unit '%s' for the '%s' parameter is not a valid unit of duration" % (units,
# Convert the units to seconds
if len(units) > 0:
return duration * DurationField.UNITS[units]
return duration
def to_string(self, value):
return str(value)
class DeprecatedField(Field):
Represents a field that is no longer used. This should be used when you want the input to pass
validation with arguments that are no longer used.
def __init__(self, name, title, description, none_allowed=True, empty_allowed=True,
required_on_create=False, required_on_edit=False):
Create the field.
name -- Set the name of the field (e.g. "database_server")
title -- Set the human readable title (e.g. "Database server")
description -- Set the human readable description of the field (e.g. "The IP or domain name of the database server")
none_allowed -- Is a value of none allowed?
empty_allowed -- Is an empty string allowed?
required_on_create -- Is this field required when creating?
required_on_edit -- Is this field required when editing?
super(DeprecatedField, self).__init__(name, title, description,
def to_python(self, value, session_key=None):
return None
def to_string(self, value):
return ""
class FilePathField(Field):
Represents a path to file.
def __init__(self, name, title, description, none_allowed=False, empty_allowed=True,
required_on_create=None, required_on_edit=None, validate_file_existence=True):
Create the field.
name -- Set the name of the field (e.g. "database_server")
title -- Set the human readable title (e.g. "Database server")
description -- Set the human readable description of the field (e.g. "The IP or domain name
of the database server")
none_allowed -- Is a value of none allowed?
empty_allowed -- Is an empty string allowed?
required_on_create -- Is this field required when creating?
required_on_edit -- Is this field required when editing?
validate_file_existence -- If true, this field will generate an error if the file doesn't exist
super(FilePathField, self).__init__(name, title, description, none_allowed, empty_allowed, required_on_create, required_on_edit)
self.validate_file_existence = validate_file_existence
def to_python(self, value, session_key=None):
Field.to_python(self, value, session_key)
# Don't bother validating if the parameter wasn't provided
if value is None or len(value.strip()) == 0:
return value
# Resolve the file path as necessary
resolved_path = None
if value is not None:
if os.path.isabs(value) or uf_mode:
resolved_path = value
path = os.path.join(make_splunkhome_path([value]))
resolved_path = path
# Validate the file existence if requested
if self.validate_file_existence and not os.path.isfile(resolved_path):
raise FieldValidationException("The parameter '%s' is not a valid path; '%s' does not exist" % (, resolved_path))
return resolved_path
def to_string(self, value):
return value
class ModularInputConfig():
This class represents the configuration related to a running modular input.
def __init__(self, server_host, server_uri, session_key, checkpoint_dir, configuration):
self.server_host = server_host
self.server_uri = server_uri
self.session_key = session_key
self.checkpoint_dir = checkpoint_dir
self.configuration = configuration
def __str__(self):
attrs = ['server_host', 'server_uri', 'session_key', 'checkpoint_dir', 'configuration']
return str({attr: str(getattr(self, attr)) for attr in attrs})
def get_text(node, default=None):
Get the value of the text in the first node under the given node.
node -- The node that should have a text node under it.
default -- The default text that ought to be returned if no text node could be found
(defaults to none).
if node and node.firstChild and node.firstChild.nodeType == node.firstChild.TEXT_NODE:
return default
def get_config_from_xml(config_str_xml):
Get the config from the given XML and return a ModularInputConfig instance.
config_str_xml -- A string of XML that represents the configuration provided by Splunk.
# Here are the parameters we are going to fill out
server_host = None
server_uri = None
session_key = None
checkpoint_dir = None
configuration = {}
# Parse the document
doc = xml.dom.minidom.parseString(config_str_xml)
root = doc.documentElement
# Get the server_host
server_host_node = root.getElementsByTagName("server_host")[0]
server_host = ModularInputConfig.get_text(server_host_node)
# Get the server_uri
server_uri_node = root.getElementsByTagName("server_uri")[0]
server_uri = ModularInputConfig.get_text(server_uri_node)
# Get the session_key
session_key_node = root.getElementsByTagName("session_key")[0]
session_key = ModularInputConfig.get_text(session_key_node)
# Get the checkpoint directory
checkpoint_node = root.getElementsByTagName("checkpoint_dir")[0]
checkpoint_dir = ModularInputConfig.get_text(checkpoint_node)
# Parse the config
conf_node = root.getElementsByTagName("configuration")[0]
if conf_node:
for stanza in conf_node.getElementsByTagName("stanza"):
config = {}
if stanza:
stanza_name = stanza.getAttribute("name")
if stanza_name:
config["name"] = stanza_name
params = stanza.getElementsByTagName("param")
for param in params:
param_name = param.getAttribute("name")
config[param_name] = ModularInputConfig.get_text(param)
configuration[stanza_name] = config
return ModularInputConfig(server_host, server_uri, session_key, checkpoint_dir,
def forgive_splunkd_outages(function):
Try the given function and swallow Splunkd connection exceptions until the limit is reached or
the function works.
function -- The function to call
def wrapper(*args, **kwargs):
This wrapper will provide the swallowing of the exception for the provided function call.
attempts = 6
attempt_delay = 5
attempts_tried = 0
while attempts_tried < attempts:
return function(*args, **kwargs)
except splunk.SplunkdConnectionException:
# Sleep for a bit in order to let Splunk recover in case this is a temporary issue
attempts_tried += 1
# If we hit the limit of the attempts, then throw the exception
if attempts_tried >= attempts:
return wrapper
class ModularInput():
This class functions as a base-class for modular inputs.
# These arguments cover the standard fields that are always supplied
standard_args = [
Field("name", "Stanza name", "The name of the stanza for this modular input", empty_allowed=True),
Field("stanza", "Stanza name", "The name of the stanza for this modular input", empty_allowed=True),
Field("source", "Source", "The source for events created by this modular input", empty_allowed=True),
Field("sourcetype", "Stanza name", "The name of the stanza for this modular input", empty_allowed=True, none_allowed=True),
Field("index", "Index", "The index that data should be sent to", empty_allowed=True, none_allowed=True),
Field("host", "Host", "The host that is running the input", empty_allowed=True),
BooleanField("disabled", "Disabled", "Whether the modular input is disabled or not", empty_allowed=True)
title = 'No title was provided'
use_external_validation = True
description = ""
streaming_mode = 'true'
server_info = None
def _is_valid_param(self, name, val):
'''Raise an error if the parameter is None or empty.'''
if val is None:
raise ValueError("The {0} parameter cannot be none".format(name))
if len(str(val).strip()) == 0:
raise ValueError("The {0} parameter cannot be empty".format(name))
return val
def _create_formatter_textnode(self, xmldoc, nodename, value):
'''Shortcut for creating a formatter textnode.
xmldoc - A Document object.
nodename - A string name for the node.
node = xmldoc.createElement(nodename)
text = xmldoc.createTextNode(str(value))
return node
def _create_document(self):
'''Create the document for sending XML streaming events.'''
doc = Document()
# Create the <stream> base element
stream = doc.createElement('stream')
return doc
def _create_event(self, doc, params, stanza, unbroken=False, close=True):
'''Create an event for XML streaming output.
doc - a Document object.
params - a dictionary of attributes for the event.
stanza_name - the stanza
# Create the <event> base element
event = doc.createElement('event')
# Indicate if this event is to be unbroken (meaning a </done> tag will
# need to be added by a future event.
if unbroken:
event.setAttribute('unbroken', '1')
# Indicate if this script is single-instance mode or not.
if self.streaming_mode == 'true':
event.setAttribute('stanza', stanza)
# Define the possible elements
valid_elements = ['host', 'index', 'source', 'sourcetype', 'time', 'data']
# Append the valid child elements. Invalid elements will be dropped.
for element in filter(lambda x: x in valid_elements, params.keys()):
event.appendChild(self._create_formatter_textnode(doc, element, params[element]))
if close:
return event
def _print_event(self, doc, event):
'''Adds an event to XML streaming output.'''
# Get the stream from the document.
stream = doc.firstChild
# Append the event.
# Return the content as a string WITHOUT the XML header; remove the
# child object so the next event can be returned and reuse the same
# Document object.
output = doc.documentElement.toxml()
return output
def _add_events(self, doc, events):
'''Adds a set of events to XML streaming output.'''
# Get the stream from the document.
stream = doc.firstChild
# Add the <event> node.
for event in events:
# Return the content as a string WITHOUT the XML header.
return doc.documentElement.toxml()
def escape_spaces(self, s, encapsulate_in_double_quotes=False):
If the string contains spaces or is empty, then add double quotes around the string. This
is useful when outputting fields and values to Splunk since a space will cause Splunk to
not recognize the entire value.
s -- A string to escape.
encapsulate_in_double_quotes -- If true, the value will have double-spaces added around it.
# Make sure the input is a string
if s is not None:
s = str(s)
# Escape the spaces within the string (will need KV_MODE = auto_escaped for this to work)
if s is not None:
s = s.replace('"', '\\"')
s = s.replace("'", "\\'")
if s is not None and (" " in s or encapsulate_in_double_quotes or s == ""):
return '"' + s + '"'
return s
def create_event_string(self, data_dict, stanza, sourcetype, source, index, host=None,
unbroken=False, close=False, encapsulate_value_in_double_quotes=False):
Create a string representing the event.
data_dict -- A dictionary containing the fields
stanza -- The stanza used for the input
sourcetype -- The sourcetype
source -- The source field value
index -- The index to send the event to
unbroken --
close --
encapsulate_value_in_double_quotes -- If true, the value will have double-quotes added around it.
# Make the content of the event
data_str = ''
for k, v in data_dict.items():
# If the value is a list, then write out each matching value with the same name (as mv)
if isinstance(v, list) and not isinstance(v, basestring):
values = v
values = [v]
k_escaped = self.escape_spaces(k)
# Write out each value
for v in values:
v_escaped = self.escape_spaces(v, encapsulate_in_double_quotes=encapsulate_value_in_double_quotes)
if len(data_str) > 0:
data_str += ' '
data_str += '%s=%s' % (k_escaped, v_escaped)
# Make the event
event_dict = {'stanza': stanza,
'data' : data_str}
if index is not None:
event_dict['index'] = index
if sourcetype is not None:
event_dict['sourcetype'] = sourcetype
if source is not None:
event_dict['source'] = source
if host is not None:
event_dict['host'] = host
event = self._create_event(self.document,
# If using unbroken events, the last event must have been
# added with a "</done>" tag.
return self._print_event(self.document, event)
def output_event(self, data_dict, stanza, index=None, sourcetype=None, source=None, host=None,
unbroken=False, close=False, out=sys.stdout,
Output the given even so that Splunk can see it.
data_dict -- A dictionary containing the fields
stanza -- The stanza used for the input
sourcetype -- The sourcetype
source -- The source to use
index -- The index to send the event to
unbroken --
close --
out -- The stream to send the event to (defaults to standard output)
host -- The host
encapsulate_value_in_double_quotes -- If true, the value will have double-quotes added
around it. This is useful in cases where the app
contains props & transforms that require the value
to have double-spaces.
output = self.create_event_string(data_dict, stanza, sourcetype, source, index, host,
unbroken, close,
with self.lock:
def __init__(self, scheme_args, args=None, sleep_interval=5, logger_name='python_modular_input',
Set up the modular input.
scheme_args -- The scheme args indicating the run-time mode of the input
args -- A list of Field instances for validating the arguments
sleep_interval -- How often to sleep between runs
logger_name -- The logger name to append to the logger
# Setup defaults
default_scheme_args = {
"use_external_validation" : "true",
"streaming_mode" : "xml",
"use_single_instance" : True
scheme_args = dict(default_scheme_args.items() + scheme_args.items())
# Set the scheme arguments.
for arg in scheme_args:
setattr(self, arg, self._is_valid_param(arg, scheme_args.get(arg)))
# Convert over the use_single_instance argument to a boolean
self.use_single_instance = normBool(self.use_single_instance)
if args is None:
self.args = []
self.args = args[:]
if sleep_interval > 0:
self.sleep_interval = sleep_interval
self.sleep_interval = 5
# Create the document used for sending events to Splunk through
self.document = self._create_document()
# Make a lock for controlling access to underlying functions
self.lock = RLock()
# Initialize the logger level
if logger_level is None:
self.logger_level = logging.INFO
self.logger_level = logger_level
# Check and save the logger name
self._logger = None
if logger_name is None or len(logger_name) == 0:
raise Exception("Logger name cannot be empty")
self.logger_name = logger_name
# Keep an instance of the server-info around to prevent unnecessary REST calls
self.server_info = None
def addArg(self, arg):
Add a given argument to the list of arguments.
arg -- An instance of Field that represents an argument.
if self.args is None:
self.args = []
def usage(self, out=sys.stdout):
Print a usage statement.
out -- The stream to write the message to (defaults to standard output)
out.write("usage: %s [--scheme|--validate-arguments]")
def do_scheme(self, out=sys.stdout):
Get the scheme and write it out to standard output.
out -- The stream to write the message to (defaults to standard output)
self.logger.debug("Modular input: scheme requested")
return True
def logger(self):
Returns a logger. A logger will be created if necessary.
# Make a logger unless it already exists
if self._logger is not None:
return self._logger
logger = logging.getLogger(self.logger_name)
# Prevent the log messages from being duplicated in the python.log file
logger.propagate = False
if uf_mode:
file_handler = handlers.RotatingFileHandler(os.path.join(os.environ['SPLUNK_HOME'], 'var', 'log', self.logger_name + '.log'), maxBytes=25000000, backupCount=5)
file_handler = handlers.RotatingFileHandler(make_splunkhome_path(['var', 'log', 'splunk', self.logger_name + '.log']), maxBytes=25000000, backupCount=5)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
self._logger = logger
return self._logger
def logger(self, logger):
self._logger = logger
def escape_colons(self, string_to_escape):
Escape the colons. This is necessary for secure password stanzas.
return string_to_escape.replace(":", "\\:")
def get_secure_password_stanza(self, username, realm=""):
Make the stanza name for a entry in the storage/passwords endpoint from the username and
return self.escape_colons(realm) + ":" + self.escape_colons(username) + ":"
def get_secure_password(self, realm, username=None, session_key=None):
Get the secure password that matches the given realm and username. If no username is
provided, the first entry with the given realm will be returned.
if uf_mode:
self.logger.warn("Unable to retrieve the secure credential since the input appears " +
"to be running in a Univeral Forwarder")
# Cannot get the secure password in universal forwarder mode since we don't
# have access to Splunk libraries
return None
# Look up the entry by realm only if no username is provided.
if username is None or len(username) == 0:
return self.get_secure_password_by_realm(realm, session_key)
# Get secure password
stanza = self.get_secure_password_stanza(username, realm)
server_response, server_content ='/services/storage/passwords/' + stanza + '?output_mode=json', sessionKey=session_key)
except splunk.ResourceNotFound:
return None
if server_response['status'] == '404':
return None
elif server_response['status'] != '200':
raise Exception("Could not get the secure passwords")
passwords_content = json.loads(server_content)
password = passwords_content['entry']
return password[0]
def get_secure_password_by_realm(self, realm, session_key):
Get the secure password that matches the given realm.
# Get secure passwords
server_response, server_content ='/services/storage/passwords?output_mode=json', sessionKey=session_key)
if server_response['status'] != '200':
raise Exception("Could not get the secure passwords")
passwords_content = json.loads(server_content)
passwords = passwords_content['entry']
# Filter down output to the ones matching the realm
matching_passwords = filter(lambda x: x['content']['realm'] == realm, passwords)
if len(matching_passwords) > 0:
return matching_passwords[0]
return None
def get_server_info(cls, session_key, force_refresh=False):
Get the server information object.
# Use the cached server information if possible
if not force_refresh and cls.server_info is not None:
return cls.server_info
# Get the server info
_, server_content ='/services/server/info/server-info?output_mode=json', sessionKey=session_key)
info_content = json.loads(server_content)
cls.server_info = info_content['entry'][0]
return cls.server_info
def is_on_cloud(cls, session_key):
Determine if the host is running on cloud.
server_info = cls.get_server_info(session_key)
return server_info['content'].get('instance_type', None) == 'cloud'
def bool_to_str(self, bool_value):
Convert a value that operates like a boolean and return a string.
if bool_value:
return "true"
return "false"
def get_scheme(self):
Get the scheme of the inputs parameters and return as a string.
# Create the XML document
doc = Document()
# Create the <scheme> base element
element_scheme = doc.createElement("scheme")
# Create the title element
element_title = doc.createElement("title")
element_title_text = doc.createTextNode(self.title)
# Create the description element
element_desc = doc.createElement("description")
element_desc_text = doc.createTextNode(self.description)
# Create the use_external_validation element
element_external_validation = doc.createElement("use_external_validation")
element_external_validation_text = doc.createTextNode(self.use_external_validation)
# Create the streaming_mode element
element_streaming_mode = doc.createElement("streaming_mode")
element_streaming_mode_text = doc.createTextNode(self.streaming_mode)
# Create the use_single_instance element
element_use_single_instance = doc.createElement("use_single_instance")
element_use_single_instance_text = doc.createTextNode(self.bool_to_str(self.use_single_instance))
# Create the elements to stored args element
element_endpoint = doc.createElement("endpoint")
element_args = doc.createElement("args")
# Create the argument elements
self.add_xml_args(doc, element_args)
# Return the content as a string
return doc.toxml()
def add_xml_args(self, doc, element_args):
Add the arguments to the XML scheme.
doc -- The XML document
element_args -- The element that should be the parent of the arg elements that will be
for arg in self.args:
# Skip the interval argument if in multi-instance mode since Splunk will complain
# otherwise
if not self.use_single_instance and == "interval":
element_arg = doc.createElement("arg")
# Create the title element
element_title = doc.createElement("title")
element_title_text = doc.createTextNode(arg.title)
# Create the description element
element_desc = doc.createElement("description")
element_desc_text = doc.createTextNode(arg.description)
# Create the data_type element
element_data_type = doc.createElement("data_type")
element_data_type_text = doc.createTextNode(arg.get_data_type())
# Create the required_on_create element
element_required_on_create = doc.createElement("required_on_create")
element_required_on_create_text = doc.createTextNode("true" if arg.required_on_create else "false")
# Create the required_on_save element
element_required_on_edit = doc.createElement("required_on_edit")
element_required_on_edit_text = doc.createTextNode("true" if arg.required_on_edit else "false")
def do_validation(self, in_stream=sys.stdin):
Get the validation data from standard input and attempt to validate it. Returns true if
the arguments validated, false otherwise.
in_stream -- The stream to get the input from (defaults to standard input)
data, session_key = self.get_validation_data()
self.validate_parameters(None, data, session_key)
return True
except FieldValidationException as e:
return False
def validate(self, arguments, session_key=None):
Validate the argument dictionary where each key is a stanza.
arguments -- The arguments as an dictionary where the key is the stanza and the value is a
dictionary of the values.
session_key -- The session key for accessing Splunkd
# Check each stanza
for stanza, parameters in arguments.items():
self.validate_parameters(stanza, parameters, session_key)
return True
def validate_parameters(self, stanza, parameters, session_key=None):
Validate the parameter set for a stanza and returns a dictionary of cleaned parameters.
stanza -- The stanza name
parameters -- The list of parameters
session_key -- The session key for accessing Splunkd
cleaned_params = {}
# Append the arguments list such that the standard fields that Splunk provides are included
all_args = {}
for argument_validator in self.standard_args:
all_args[] = argument_validator
for argument_validator in self.args:
all_args[] = argument_validator
# Convert and check the parameters
for name, value in parameters.items():
# If the argument was found, then validate and convert it
if name in all_args:
cleaned_params[name] = all_args[name].to_python(value, session_key=session_key)
# Allow the interval argument since it is internal but allowed even if not explicitly
# declared
elif name == "interval" and self.use_single_instance == False:
# Throw an exception if the argument could not be found
raise FieldValidationException("The parameter '%s' is not a valid argument" % (name))
return cleaned_params
def print_error(self, error, out=sys.stdout):
Prints the given error message to standard output.
error -- The message to be printed
out -- The stream to write the message to (defaults to standard output)
out.write("<error><message>%s</message></error>" % error)
def read_config(self, in_stream=sys.stdin):
Read the config from standard input and return the configuration.
in_stream -- The stream to get the input from (defaults to standard input)
config_str_xml =
return ModularInputConfig.get_config_from_xml(config_str_xml)
def run(self, stanza, cleaned_params, input_config):
Run the input using the arguments provided.
stanza -- The name of the stanza
cleaned_params -- The arguments following validation and conversion to Python objects.
input_config -- A dictionary that provides configuration data like session keys
raise Exception("Run function was not implemented")
def is_expired(cls, last_run, interval, cur_time=None):
Indicates if the last run time is expired based on the value of the last_run parameter.
last_run -- The time that the analysis was last done
interval -- The interval that the analysis ought to be done (as an integer)
cur_time -- The current time (will be automatically determined if not provided)
if cur_time is None:
cur_time = time.time()
if last_run is None:
return True
elif (last_run + interval) < cur_time:
return True
return False
def last_ran(cls, checkpoint_dir, stanza):
Determines the date that the analysis was last performed for the given input (denoted by
the stanza name).
checkpoint_dir -- The directory where checkpoints ought to be saved
stanza -- The stanza of the input being used
checkpoint_dict = cls.get_checkpoint_data(checkpoint_dir, stanza)
if checkpoint_dict is None or 'last_run' not in checkpoint_dict:
return None
return checkpoint_dict['last_run']
def needs_another_run(cls, checkpoint_dir, stanza, interval, cur_time=None):
Determines if the given input (denoted by the stanza name) ought to be executed.
checkpoint_dir -- The directory where checkpoints ought to be saved
stanza -- The stanza of the input being used
interval -- The frequency that the analysis ought to be performed
cur_time -- The current time (will be automatically determined if not provided)
last_ran = cls.last_ran(checkpoint_dir, stanza)
return cls.is_expired(last_ran, interval, cur_time)
except IOError as e:
# The file likely doesn't exist
return True
except ValueError as e:
# The file could not be loaded
return True
# Default return value
return True
def get_file_path(cls, checkpoint_dir, stanza):
Get the path to the checkpoint file.
checkpoint_dir -- The directory where checkpoints ought to be saved
stanza -- The stanza of the input being used
return os.path.join(checkpoint_dir, hashlib.sha224(stanza).hexdigest() + ".json")
def get_checkpoint_data(cls, checkpoint_dir, stanza="(undefined)", throw_errors=False):
Gets the checkpoint for this input (if it exists)
checkpoint_dir -- The directory where checkpoints ought to be saved
stanza -- The stanza of the input being used
throw_errors -- If false, then None will be returned if the data could not be loaded
file_pointer = None
file_pointer = open(cls.get_file_path(checkpoint_dir, stanza))
checkpoint_dict = json.load(file_pointer)
return checkpoint_dict
except IOError:
if throw_errors:
return None
except ValueError:
if throw_errors:
return None
if file_pointer is not None:
def get_non_deviated_last_run(self, last_ran, interval, stanza):
This method will return a last_run time that doesn't carry over the processing time.
If you used the current time and the script took 5 seconds to run, then next run will
actually be 5 seconds after it should have been.
Basically, it computes when the interval _should_ have executed so that the input runs on
the correct frequency.
interval -- The execution interval
last_ran -- When the input last ran (Unix epoch).
stanza -- The stanza that this is for
# If this is the first run, then set it to the current time
if last_ran is None:
return time.time()
# We don't want the input to interval to slide by including the processing time in the
# interval. In other words, if the interval is 60 and it takes 5 seconds to process,
# then we don't just want to set the last_run to now because then the interval would
# actually be 65 seconds. So, let's assume that the processing time was 0 and we are
# right on time. If we assume this, then we would have ran at last_run + interval exactly.
# There is a potential problem with this though. We'll deal with that in a bit.
last_ran_derived = last_ran + interval
# There is a one problem with correcting the last run to the previous time plus the
# interval. If the input failed to run for a long time, then we might keep creating a
# last_run that is in the past and thus, keep executing the input until we finally come to
# the current time. I would rather just skip the ones in the past and start back over.
# That is what we will do.
if last_ran_derived < (time.time() - interval):
# The last run isn't within one interval of the current time. That means we either ran
# too long and missed a subsequent run or we just weren't running for a long-time.
# To catch up, we'll set it to the current time
last_ran_derived = time.time()"Previous run was too far in the past (gap=%rs) and thus some executions of the input may have been missed (stanza=%s)", int(round(last_ran_derived-last_ran)), stanza)"Calculated non-deviated last_ran=%r from previous_last_ran=%r", last_ran_derived, last_ran)
return last_ran_derived
def save_checkpoint_data(self, checkpoint_dir, stanza, data):
Save the checkpoint state.
checkpoint_dir -- The directory where checkpoints ought to be saved
stanza -- The stanza of the input being used
data -- A dictionary with the data to save
with self.lock:
fp = None
fp = open(self.get_file_path(checkpoint_dir, stanza), 'w')
json.dump(data, fp)
except Exception:
self.logger.exception('Failed to save checkpoint directory, check the permissions of the directory="%s"' % checkpoint_dir)
if fp is not None:
def do_shutdown(self):
This function is called when the modular input should shut down.
def do_run(self, in_stream=sys.stdin, log_exception_and_continue=False):
Read the config from standard input and return the configuration.
in_stream -- The stream to get the input from (defaults to standard input)
log_exception_and_continue -- If true, exceptions will not be thrown for invalid
configurations and instead the stanza will be skipped.
# Run the modular import
input_config = self.read_config(in_stream)
if input_config is None:
self.logger.error("Did not receive an input configuration stream from Splunk, input will not run")
while True:
# If Splunk is no longer the parent process, then it has shut down and this input
# needs to terminate
if hasattr(os, 'getppid') and os.getppid() == 1:
logging.warn("Modular input is no longer running under Splunk; script will now exit")
# Initialize the document that will be used to output the results
self.document = self._create_document()
for stanza, conf in input_config.configuration.items():
cleaned_params = self.validate_parameters(stanza, conf), cleaned_params, input_config)
except FieldValidationException as exception:
if log_exception_and_continue:
self.logger.error("The input stanza '%s' is invalid: %s" % (stanza, str(exception)))
raise exception
# Stop if the input is not running in single instance mode and allow Splunk to manage
# scheduling this input
if not self.use_single_instance:"Successfully executed all of the inputs")
# Sleep for a bit
except IOError:
# Exceptions such as KeyboardInterrupt and IOError can be thrown in order to
# interrupt sleep calls
def get_validation_data(self, in_stream=sys.stdin):
Get the validation data from standard input
in_stream -- The stream to get the input from (defaults to standard input)
val_data = {}
# Read everything from stdin
val_str =
# Parse the validation XML
doc = xml.dom.minidom.parseString(val_str)
root = doc.documentElement
# Parse the session key
session_key_node = root.getElementsByTagName("session_key")[0]
if session_key_node.firstChild and session_key_node.firstChild.nodeType == session_key_node.firstChild.TEXT_NODE:
session_key =
session_key = None
# Parse the parameters
item_node = root.getElementsByTagName("item")[0]
if item_node:
name = item_node.getAttribute("name")
val_data["stanza"] = name
params_node = item_node.getElementsByTagName("param")
for param in params_node:
name = param.getAttribute("name")
if name and param.firstChild and param.firstChild.nodeType == param.firstChild.TEXT_NODE:
val_data[name] =
return val_data, session_key
def validate_parameters_from_cli(self, argument_array=None):
Load the arguments from the given array (or from the command-line) and validate them.
argument_array -- An array of arguments (will get them from the command-line arguments if
# Get the arguments from the sys.argv if not provided
if argument_array is None:
argument_array = sys.argv[1:]
# This is the list of parameters we will generate
parameters = {}
for i in range(0, len(self.args)):
arg = self.args[i]
if i < len(argument_array):
parameters[] = argument_array[i]
parameters[] = None
# Now that we have simulated the parameters, go ahead and test them
self.validate_parameters("unnamed", parameters)
def execute(self, in_stream=sys.stdin, out_stream=sys.stdout):
Get the arguments that were provided from the command-line and execute the script.
in_stream -- The stream to get the input from (defaults to standard input)
out_stream -- The stream to write the output to (defaults to standard output)
self.logger.debug("Modular input started (execute called)")
if len(sys.argv) > 1:
if sys.argv[1] == "--scheme":
elif sys.argv[1] == "--validate-arguments":
self.logger.debug("Validate arguments called: input verifying arguments")
# Exit with an code if validation failed
if self.do_validation() == False:
# Run the modular input
self.do_run(in_stream, log_exception_and_continue=True)
self.logger.debug("Execution completed successfully")
except Exception as exception:
self.logger.exception("Execution failed")
# Make sure to grab any exceptions so that we can print a valid error message
self.print_error(str(exception), out_stream)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment