Joe Larsen jplsightm

## message_remap.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jplsightm
                / message_remap.md
            
            
              Created
              October 14, 2020 20:13
            
              
                Datadog Message Remapping
              
          
    Enabling message search and pattern recognition with JSON logs in Datadog

It is generally recomended that logs sent to Datadog should be in a json format. This allows for attributes to be easily identified and thus avoids the need to created (what at times can be complex) grok processors. These attributes are important, because you can then create facets to help you identify the logs that are important to you without needing to understand a complex query langauge. The challenge with this is that the message of the log does not appear to be available. This can be easily resolved using the Message Remapper.
Setup and defining the problem.

I have created some sample logs that I am forwarding onto Datadog. A sample of how these logs look can be found below:
{"attribute": "abc", "timestamp": "10/14/2020, 14:15:04", "host": "a_computer", "text": "testing for the win, 10"}


## sample_json_logs.py
import json
from datetime import datetime
from random import randint
from time import sleep

a = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': 'testing for the win, {}'.format(randint(0,10)), 'host': 'a_computer', 'attribute': 'abc'}
b = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': 'here_we_are for the win, {}'.format(randint(0,10)), 'host': 'a_computer', 'attribute': 'abc'}
c = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': 'testing for the win, {}'.format(randint(0,10)), 'host': 'b_computer', 'attribute': 'abc'}
d = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': '{} - what now'.format(randint(0,10)), 'host': 'b_computer', 'attribute': 'abc'}
e = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': 'testing for the win, {}'.format(randint(0,10)), 'host': 'c_computer', 'attribute': 'zyx'}

## flaten_sslogs.py
import json

def dummy(element):
    return element

def get_value(_dict):
    try:
        return _dict.get('value', _dict)
    except AttributeError:
        return _dict

## sm_mong_objs.py
def mongo_objs(conn_str, database, tests=[lambda x: x.document_count({})]):
    client = pymongo.MongoClient(conn_str)
    db = client[database]
    sslog = db.sslog
    cycle = db.cycle
    sslog_results = {}
    cycle_results = {}
    for test in tests:
        try:
            sslog_results[test.__name__] = test(sslog)

## ORDERID_correct_type_mongo.py
import pymongo

# get mongo sslog ids and ORDERIDS
ORDERIDs = {log['_id']: log['data']['fieldvalues']['ORDERID']['value']
            for log in sslog.find({'data.fieldvalues.ORDERID.value': {'$exists': True}}, {'data.fieldvalues.ORDERID.value': 1})}

# do some parsing because there was all sorts of badness - floats cast as strings, integers, etc
def order_id_to_string(_id, orderid):
    try:
        orderid = str(int(float(orderid)))

## process_tall_records.py
import pandas as pd

def sensor_csv(frame, sensor_name, sensor_column, prefix, keep_columns, timestamp):
    """
    frame = input frame
    sensor_name = sensor name to filter on
    sensor_column = column that contains the sensor name
    prefix = prefilx to add to column (make themn unique)
    keep_columns = what columns should be kept .... I am not doing any checks on data types, make this a list
    timestamp = timestamp column

## step_back_ts.py
import pandas as pd

def step_back_ts(frame, ts_col, shift):
    timestamps = pd.DataFrame(frame[ts_col].iloc[shift:], columns=['timestamp'])
    timestamps.reset_index(inplace=True, drop=True)
    for i in range(shift):
        timestamps.loc[len(timestamps)+1, 'timestamp'] = np.nan
    return timestamps

## csvs_to_dfs.py
def process_files(path, extention, func, *args, **kwargs):
    """
    Take a directory of files and apply a function to those files.

    The first parameter of the function (`func`) must be a file name. This is typically
    the file to parse to df before apply some function
    """
    dfs = {}

    for fname in os.listdir(path):

## fiscal_week.py
from datetime import datetime, timedelta
import pandas as pd

def get_fw(date, fiscal_start=datetime(1970, 1,1), calendar_day=False):
    """
    Obtain fiscal week from a datetime object.

    :fiscal_start: Indicate the start of a fiscal year
    :calendar_day: If False the first full week is Week 1.
    """

## df_to_markdown.py
import pandas as pd
from tabulate import tabulate

def pandas_df_to_markdown_table(df):
    # Dependent upon ipython
    # shamelessly stolen from https://stackoverflow.com/questions/33181846/programmatically-convert-pandas-dataframe-to-markdown-table
    from IPython.display import Markdown, display
    fmt = ['---' for i in range(len(df.columns))]
    df_fmt = pd.DataFrame([fmt], columns=df.columns)
    df_formatted = pd.concat([df_fmt, df])
	import json
	from datetime import datetime
	from random import randint
	from time import sleep

	a = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': 'testing for the win, {}'.format(randint(0,10)), 'host': 'a_computer', 'attribute': 'abc'}
	b = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': 'here_we_are for the win, {}'.format(randint(0,10)), 'host': 'a_computer', 'attribute': 'abc'}
	c = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': 'testing for the win, {}'.format(randint(0,10)), 'host': 'b_computer', 'attribute': 'abc'}
	d = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': '{} - what now'.format(randint(0,10)), 'host': 'b_computer', 'attribute': 'abc'}
	e = {'timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 'text': 'testing for the win, {}'.format(randint(0,10)), 'host': 'c_computer', 'attribute': 'zyx'}
	import json

	def dummy(element):
	return element

	def get_value(_dict):
	try:
	return _dict.get('value', _dict)
	except AttributeError:
	return _dict
	def mongo_objs(conn_str, database, tests=[lambda x: x.document_count({})]):
	client = pymongo.MongoClient(conn_str)
	db = client[database]
	sslog = db.sslog
	cycle = db.cycle
	sslog_results = {}
	cycle_results = {}
	for test in tests:
	try:
	sslog_results[test.__name__] = test(sslog)
	import pymongo

	# get mongo sslog ids and ORDERIDS
	ORDERIDs = {log['_id']: log['data']['fieldvalues']['ORDERID']['value']
	for log in sslog.find({'data.fieldvalues.ORDERID.value': {'$exists': True}}, {'data.fieldvalues.ORDERID.value': 1})}

	# do some parsing because there was all sorts of badness - floats cast as strings, integers, etc
	def order_id_to_string(_id, orderid):
	try:
	orderid = str(int(float(orderid)))
	import pandas as pd

	def sensor_csv(frame, sensor_name, sensor_column, prefix, keep_columns, timestamp):
	"""
	frame = input frame
	sensor_name = sensor name to filter on
	sensor_column = column that contains the sensor name
	prefix = prefilx to add to column (make themn unique)
	keep_columns = what columns should be kept .... I am not doing any checks on data types, make this a list
	timestamp = timestamp column
	import pandas as pd

	def step_back_ts(frame, ts_col, shift):
	timestamps = pd.DataFrame(frame[ts_col].iloc[shift:], columns=['timestamp'])
	timestamps.reset_index(inplace=True, drop=True)
	for i in range(shift):
	timestamps.loc[len(timestamps)+1, 'timestamp'] = np.nan
	return timestamps
	def process_files(path, extention, func, args, *kwargs):
	"""
	Take a directory of files and apply a function to those files.

	The first parameter of the function (`func`) must be a file name. This is typically
	the file to parse to df before apply some function
	"""
	dfs = {}

	for fname in os.listdir(path):
	from datetime import datetime, timedelta
	import pandas as pd

	def get_fw(date, fiscal_start=datetime(1970, 1,1), calendar_day=False):
	"""
	Obtain fiscal week from a datetime object.

	:fiscal_start: Indicate the start of a fiscal year
	:calendar_day: If False the first full week is Week 1.
	"""
	import pandas as pd
	from tabulate import tabulate

	def pandas_df_to_markdown_table(df):
	# Dependent upon ipython
	# shamelessly stolen from https://stackoverflow.com/questions/33181846/programmatically-convert-pandas-dataframe-to-markdown-table
	from IPython.display import Markdown, display
	fmt = ['---' for i in range(len(df.columns))]
	df_fmt = pd.DataFrame([fmt], columns=df.columns)
	df_formatted = pd.concat([df_fmt, df])