Skip to content

Instantly share code, notes, and snippets.

@NiceRath
Last active May 6, 2024 12:19
Show Gist options
  • Save NiceRath/533366c47121824722042d85c12f5e3c to your computer and use it in GitHub Desktop.
Save NiceRath/533366c47121824722042d85c12f5e3c to your computer and use it in GitHub Desktop.
Graylog Pipeline Rules to extract fields for some common Services

Graylog menu: Graylog - System - Pipelines - Manage rules

All rules will assume you pre-filter your logs on an application-basis. Else the matching will get horrible.

Use regex101.com for testing expressions. Make sure to escape all the backslashes \\ (and so on..) before adding it as Graylog rule.

GENERIC: Use lookup tables to translate IPs to Hostnames

/*
  this is just a little snippet showing how useful graylog lookup-tables can be (menu: System - Lookup Tables)
  we assume the IP to lookup is stored inside the 'src_ip' field
  the IP to Hostname CSV needs to be updated externally (dynamic script/cronjob/whatever..)
*/
set_field("src_hostname", lookup_value("ip-to-hostname", $message.src_ip));

GENERIC: Censor Public IPv4 (GDPR)

rule "rule_dsgvo_censor-public-ip"
when
  regex("(^|[^0-9\\.])(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])[^0-9\\.]", to_string($message.message)).matches == true

then
  let new_message = regex_replace("(^|[^0-9\\.])((?!(10\\.|127)|192\\.168|172\\.(2[0-9]|1[6-9]|3[0-1])|(25[6-9]|2[6-9][0-9]|[3-9][0-9][0-9]|99[1-9]))[0-9]{1,3}\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)([^0-9\\.])", to_string($message.message), "$1$2.x$9");
  set_field("message", new_message);
end

POSTFIX: Split

rule "rule_postfix_split"
when
  has_field("message")

then
  let status = regex("status=(.*?) ", to_string($message.message));
  set_field("postfix_status", status["0"]);
  let error = regex("said: (.*)", to_string($message.message));
  set_field("postfix_error", error["0"]);
  let errorcode = regex("said: ([0-9]{3})", to_string($message.message));
  set_field("postfix_errorcode", errorcode["0"]);
  let relay = regex("relay=(.*?)(,|\\[)", to_string($message.message));
  set_field("postfix_relay", relay["0"]);

  let from = regex("from=<(.*?)>,", to_string($message.message));
  set_field("postfix_from", from["0"]);
  let to = regex("to=<(.*?)>,", to_string($message.message));
  set_field("postfix_to", to["0"]);

  let subject = regex("header Subject: (.*?) from", to_string($message.message));
  set_field("postfix_subject", subject["0"]);
end

POSTFIX: censor mail receivers (GDPR)

rule "rule_gdpr_censor-mail-address"
when
  regex("to=\\<(.*?)\\>", to_string($message.message)).matches == true

then
  let new_message = regex_replace("to=<(.{4}).*?\\@(.*?)>", to_string($message.message), "to=<$1_censored_@$2>");
  set_field("message", new_message);
end

POSTFIX: message size MB

rule "rule_postfix_size"
when
  contains(to_string($message.message), "size=")

then
  let size = regex("size=(.*?),", to_string($message.message));
  set_field("postfix_size_mb", to_double(size["0"]) / 100000.0);
end

SUDO: Split

rule "rule_sudo"
when
  has_field("message")

then
  let user = regex("(.*?):", to_string($message.message));
  let tty = regex("TTY=(.*?);", to_string($message.message));
  let user2 = regex("USER=(.*?);", to_string($message.message));
  let pwd = regex("PWD=(.*?);", to_string($message.message));
  let cmd = regex("COMMAND=(.*)(;|)", to_string($message.message));
  let cmd_bin = regex("COMMAND=(.*?)\\s", to_string($message.message));
  let cmd2 = regex("COMMAND=.*(;|)(.*)", to_string($message.message));
  let cmd_ans = regex("AnsiballZ_(.*?)\\.py", to_string($message.message));

  set_field("sudo_user", user["0"]);
  set_field("sudo_become_user", user2["0"]);
  set_field("sudo_tty", tty["0"]);
  set_field("sudo_pwd", pwd["0"]);
  set_field("sudo_cmd", cmd["0"]);
  set_field("sudo_cmd_bin", cmd_bin["0"]);
  set_field("sudo_cmd_ansible_module", cmd_ans["0"]);
  set_field("sudo_cmd_add", cmd2["0"]);
end

NGINX: Split

rule "rule_nginx_split-message"
when
  has_field("message")

then
  let splitmsg = regex("^(([0-9]{1,3}\\.){3}.*?)\\s.*?\"(.*?)\\s(.*?)\\sHTTP\\/(.*?)\"\\s(\\d{3}).*?\"(.*?)\"\\s\"(.*?)\"", to_string($message.message));
  set_field("nginx_client", splitmsg["0"]);
  set_field("nginx_method", splitmsg["2"]);
  set_field("nginx_request", splitmsg["3"]);
  set_field("nginx_http_version", splitmsg["4"]);
  set_field("nginx_status_code", splitmsg["5"]);
  set_field("nginx_referer", splitmsg["6"]);
  set_field("nginx_useragent", splitmsg["7"]);
  let sitename = regex("nginx_(.*)", to_string($message.application_name));
  set_field("nginx_site", sitename["0"]);
end

HAPROXY ENTERPRISE: WAF

NOTE: Not designed for IPv6 usage

rule "rule_haproxy_waf"
when
  // NOTE: add the 'WAF:' prefix to the waf-logformat so you can easily filter on it
  starts_with(to_string($message.message), "WAF:")

then
  let waf_src = regex("ip=(.*?)&", to_string($message.message));
  let ip = to_string(waf_src["0"]);

  // NOTE: you can use graylog GeoIP lookups to enhance your log information
  /*
  let geoipCountry = lookup("geoip_country", ip);
  set_field("haproxy_geoip_country", geoipCountry["country"].iso_code);

  let geoipASN = lookup("geoip_asn", ip);
  set_field("haproxy_geoip_asn", geoipASN["as_number"]);
  set_field("haproxy_geoip_asn_name", geoipASN["as_organization"]);
  */

  // NOTE: will censor the last part of IPv4 adresses
  set_field("haproxy_client", regex_replace("\\.\\d{1,3}$", ip, ".x"));
  set_field("haproxy_waf_src", regex_replace("\\.\\d{1,3}$", ip, ".x"));

  // NOTE: add 'uid=%[unique-id]' to logformat to relate WAF to ACCESS logs
  let waf_uid = regex("WAF:\\s.*?\\s(.*?)\\s", to_string($message.message));
  set_field("haproxy_uid", waf_uid["0"]);

  let waf_fe = regex("frontend=(.*?)&", to_string($message.message));
  set_field("haproxy_frontend", waf_fe["0"]);
  let waf_be = regex("backend=(.*?)&", to_string($message.message));
  set_field("haproxy_backend", waf_be["0"]);
  let waf_path = regex("uri=(.*?)&", to_string($message.message));
  set_field("haproxy_path", waf_path["0"]);
  let waf_learning = regex("learning=(.*?)&", to_string($message.message));
  set_field("haproxy_waf_learning", waf_learning["0"]);
  let waf_block = regex("block=(.*?)&", to_string($message.message));
  set_field("haproxy_waf_block", waf_block["0"]);
  let waf_time = regex("processing_time=(.*?)&", to_string($message.message));
  set_field("haproxy_waf_processing_time", to_double(waf_time["0"]));
  let waf_level = regex("threat_level=(.*?)&", to_string($message.message));
  set_field("haproxy_waf_threat", to_double(waf_level["0"]));

  let waf_score0 = regex("cscore0=\\$(.*?)&score0=(.*?)(&|$)", to_string($message.message));
  let waf_score1 = regex("cscore1=\\$(.*?)&score1=(.*?)(&|$)", to_string($message.message));
  let waf_score2 = regex("cscore2=\\$(.*?)&score2=(.*?)(&|$)", to_string($message.message));
  set_field("haproxy_waf_threat_" + lowercase(to_string(waf_score0["0"])), to_double(waf_score0["1"]));
  set_field("haproxy_waf_threat_" + lowercase(to_string(waf_score1["0"])), to_double(waf_score1["1"]));
  set_field("haproxy_waf_threat_" + lowercase(to_string(waf_score2["0"])), to_double(waf_score2["1"]));

  // NOTE: you can use a graylog-lookup-table to map the generic IDs to human-readable names (see commented-out lines below)
  let cmt_on = "[";
  let cmt_off = "]";
  let waf_match1 = regex("zone0=(.*?)&id0=(.*?)&var_name0=(.*?)(&|$)", to_string($message.message));
  // let waf_match1_cmt = cmt_on + to_string(lookup_value("haproxy-waf-rule-id-to-name", to_string(waf_match1["1"]))) + cmt_off;
  let waf_match2 = regex("zone1=(.*?)&id1=(.*?)&var_name1=(.*?)(&|$)", to_string($message.message));
  // let waf_match2_cmt = cmt_on + to_string(lookup_value("haproxy-waf-rule-id-to-name", to_string(waf_match2["1"]))) + cmt_off;
  let waf_match3 = regex("zone2=(.*?)&id2=(.*?)&var_name2=(.*?)(&|$)", to_string($message.message));
  // let waf_match3_cmt = cmt_on + to_string(lookup_value("haproxy-waf-rule-id-to-name", to_string(waf_match3["1"]))) + cmt_off;

  let space = " ";
  set_field("haproxy_waf_rule01", to_string(waf_match1["1"]) + space + to_string(waf_match1["0"]) + space + to_string(waf_match1["2"]) + space + to_string(waf_match1_cmt));
  // set_field("haproxy_waf_rule01_comment", waf_match1_cmt);
  set_field("haproxy_waf_rule02", to_string(waf_match2["1"]) + space + to_string(waf_match2["0"]) + space + to_string(waf_match2["2"]) + space + to_string(waf_match2_cmt));
  // set_field("haproxy_waf_rule02_comment", waf_match2_cmt);
  set_field("haproxy_waf_rule03", to_string(waf_match3["1"]) + space + to_string(waf_match3["0"]) + space + to_string(waf_match3["2"]) + space + to_string(waf_match3_cmt));
  // set_field("haproxy_waf_rule03_comment", waf_match3_cmt);

end

HAPROXY: Split

NOTE: Not designed for IPv6 usage

WARNING: Might temporarily kill your log processing whenever the format is changed (and many logs are processed)

rule "rule_haproxy_access"
when
  // NOTE: add the 'WAF:' prefix to the waf-logformat so you can easily filter on it
  starts_with(to_string($message.message), "REQ:")

then
  let splitmsg = regex(".*?\\[(.*?)\\]:[0-9]{3,5}\\s.*?]\\s(.*?)\\s(.*?)\\/(.*?)\\s.*?\\s([0-9]{3}).*?\\\"(.*?)\\s(.*?)(\\?(.*?))?\\s(.*)\\\"", to_string($message.message));
  let ip = regex_replace("::ffff:", to_string(splitmsg["0"]), "");

  // NOTE: you can use graylog GeoIP lookups to enhance your log information
  /*
  let geoipCountry = lookup("geoip_country", ip);
  set_field("haproxy_geoip_country", geoipCountry["country"].iso_code);

  let geoipASN = lookup("geoip_asn", ip);
  set_field("haproxy_geoip_asn", geoipASN["as_number"]);
  set_field("haproxy_geoip_asn_name", geoipASN["as_organization"]);
  */

  set_field("haproxy_client", regex_replace("\\.\\d{1,3}$", ip, ".x"));
  set_field("haproxy_frontend", splitmsg["1"]);
  set_field("haproxy_backend", splitmsg["2"]);
  set_field("haproxy_backend_server", splitmsg["3"]);
  set_field("haproxy_status", splitmsg["4"]);
  set_field("haproxy_method", splitmsg["5"]);
  set_field("haproxy_path", splitmsg["6"]);
  set_field("haproxy_query", splitmsg["8"]);
  set_field("haproxy_http_version", splitmsg["9"]);

  // CAPTURES - optional - see https://www.haproxy.com/documentation/haproxy-enterprise/administration/logs/#log-asap
  let captures = regex(".*\\{(.*?)\\|([0-1]*?)\\|(.*?)\\}\\s\\\".*", to_string($message.message));

  // http-request capture req.hdr(Host) len 50
  set_field("haproxy_domain", "-");
  set_field("haproxy_domain", captures["0"]);
  
  // http-request set-var(txn.is_flagged) int(1) if { ... }
  // http-request set-var(txn.is_flagged) int(0) if !{ var(txn.is_flagged) -m found }
  // http-request capture var(txn.is_flagged) len 1  // boolean; 0 or 1
  set_field("haproxy_is_flagged", "0");
  set_field("haproxy_is_flagged", captures["1"]);

  // http-request capture req.fhdr(User-Agent) len 200
  set_field("haproxy_user_agent", captures["2"]);
end

SQUID: Split

rule "rule_squid_http"
when
  contains(to_string($message.message), "http://")

then
  let msg = regex("(.*?)\\s(.*?)\\/(.*?)\\s(.*?)\\s(.*?)\\shttp:\\/\\/(.*?)\\/(.*?)\\s-\\s(.*?)\\/(.*?)\\s", to_string($message.message));
  set_field("squid_src", msg["0"]);
  set_field("squid_action", msg["1"]);
  set_field("squid_action_code", msg["2"]);
  set_field("squid_size", msg["3"]);
  set_field("squid_method", msg["4"]);
  set_field("squid_dst_sni", msg["5"]);
  set_field("squid_dst_url", msg["6"]);
  set_field("squid_dst_port", "80");
  set_field("squid_dst_method", msg["7"]);
  set_field("squid_dst_ip", msg["8"]);
end

rule "rule_squid_https"
when
  not contains(to_string($message.message), "http://")

then
  let msg = regex("(.*?)\\s(.*?)\\/(.*?)\\s(.*?)\\s(.*?)\\s(.*?):(.*?)\\s-\\s(.*?)\\/(.*?)\\s", to_string($message.message));
  set_field("squid_src", msg["0"]);
  set_field("squid_action", msg["1"]);
  set_field("squid_action_code", msg["2"]);
  set_field("squid_size", msg["3"]);
  set_field("squid_method", msg["4"]);
  set_field("squid_dst_sni", msg["5"]);
  set_field("squid_dst_port", msg["6"]);
  set_field("squid_dst_method", msg["7"]);
  set_field("squid_dst_ip", msg["8"]);
end

SYSTEMD: Timer

rule "rule_systemd_timer_start"
when
  contains(to_string($message.message), "Started Timer")

then
  let msg = regex("Started Timer (.*)\\.", to_string($message.message));
  set_field("systemd_timer_name", msg["0"]);
  set_field("systemd_timer_action", "start");
end

rule "rule_systemd_timer_stop"
when
  contains(to_string($message.message), "Stopped Timer")

then
  let msg = regex("Stopped Timer (.*)\\.", to_string($message.message));
  set_field("systemd_timer_name", msg["0"]);
  set_field("systemd_timer_action", "stop");
end

APACHE2: Split

With LogFormat set to LogFormat "%V:%p %a %l %u %t \"%r\" %>s %B %D \"%{Referer}i\" \"%{User-Agent}i\" combined

rule "rule_apache_split-message"
when
  has_field("message")

then
  let result = regex("^(.*?):([\\d]{1,5})\\s(.*?)\\s.*?\\[(.*?)\\]\\s\\\"(.*?)\\s(.*?)(\\?.*?|)\\s(.*?)\\\"\\s([\\d]{1,3})\\s([\\d]{1,20})\\s([\\d]{1,20})\\s\\\"((.*?):\\/\\/(.*?)\\/(.*?)(\\?.*?|)|-)\\\"\\s\\\"(.*?)\\\"", to_string($message.message));
  set_field("apache2_domain", result["0"]);
  set_field("apache2_port", result["1"]);
  set_field("apache2_client", result["2"]);
  set_field("apache2_date", result["3"]);
  set_field("apache2_method", result["4"]);
  set_field("apache2_path", result["5"]);
  set_field("apache2_query", result["6"]);
  set_field("apache2_proto", result["7"]);
  set_field("apache2_status", result["8"]);
  set_field("apache2_time", to_long(result["9"]));
  set_field("apache2_size", result["10"]);
  set_field("apache2_referrer", result["11"]);
  set_field("apache2_referrer_proto", result["12"]);
  set_field("apache2_referrer_domain", result["13"]);
  set_field("apache2_referrer_path", "/" + to_string(result["14"]));
  set_field("apache2_referrer_query", result["15"]);
  set_field("apache2_user_agent", result["16"]);
end

NFTABLES: Split

Stream filters:

  • application_name must match exactly kernel
  • message must contain NFT => add the prefix 'NFT' to all NFTables logs, so you can easily filter them from other kernel messages
rule "rule_nftables_split"
when
  has_field("message")

then
  let prefix = regex("NFT\\s(.*?)\\s(.*?)\\sIN=", to_string($message.message));
  set_field("nft_action", prefix["0"]);
  set_field("nft_comment", prefix["1"]);

  let fields = regex("IN=(.*?)\\sOUT=(.*?)\\s.*?SRC=(.*?)\\sDST=(.*?)\\s.*?PROTO=(.*?)\\sSPT=(.*?)\\sDPT=(.*?)\\s", to_string($message.message));

  set_field("nft_if_in", fields["0"]);
  set_field("nft_if_out", fields["1"]);
  set_field("nft_ip_src", fields["2"]);
  set_field("nft_ip_dst", fields["3"]);
  set_field("nft_proto", fields["4"]);
  set_field("nft_port_src", fields["5"]);
  set_field("nft_port_dst", fields["6"]);

  // NOTE: it can be useful to translate IPs to Hostnames
  /*
  let src_hostname = lookup_value("ip-to-hostname", fields["2"]);
  set_field("nft_hostname_src", src_hostname);
  let dest_hostname = lookup_value("ip-to-hostname", fields["3"]);
  set_field("nft_hostname_dst", dest_hostname);
  */
end

OPENVPN COMMUNITY: Split

rule "rule_openvpn_split"
when
  has_field("message")

then
  let user = regex("username\\s'(.*?)'", to_string($message.message));
  let user_ip = regex("^(.*?)\\/([a-fA-F1-9\\.:]{1,50}):[1-9]{1,5}", to_string($message.message));
  let ip = regex("((?:[0-9]{1,3}\\.){3}[0-9]{1,3})", to_string($message.message));
  let cert = regex("CN=(.*?)(,|\\s)", to_string($message.message));
  let status = regex("authentication\\s(.*?)\\s", to_string($message.message));

  set_field("openvpn_user", user["0"]);
  set_field("openvpn_user", user_ip["0"]);
  set_field("openvpn_client", user_ip["1"]);
  set_field("openvpn_client", ip["0"]);
  set_field("openvpn_status", status["0"]);
  set_field("openvpn_certificate", cert["0"]);
end

FAIL2BAN: Basic Split

rule "rule_fail2ban"
when
  has_field("message")
then
  let result = regex("^([A-Z]{1,6})\\s\\[(.*?)\\]\\s(Ban|Unban|Found|Ignore)\\s*(.*?)\\s", to_string($message.message));
  set_field("fail2ban_level", result["0"]);
  set_field("fail2ban_filter", result["1"]);
  set_field("fail2ban_action", result["2"]);
  set_field("fail2ban_ip", result["3"]);
end

WINDOWS SERVER - remove duplicated field

rule "rule_ms_clear_fullmsg"
when
  has_field("full_message")

then
  remove_field("full_message");
end

WINDOWS SERVER - Fileserver Audit Log - File Access

Stream filters:

  • Category must match regular expression File System|Detailed File Share
  • Optionally filter on Graylog-Input and Source Host
rule "rule_ms_file"
when
  has_field("RelativeTargetName")

then
  let file_action = regex(".*Accesses:\\s*([A-Za-z_]*)", to_string($message.message));
  set_field("ms_file_action", file_action["0"]);
  set_field("RelativeTargetName", lowercase(to_string($message.RelativeTargetName)));
  set_field("ms_file", lowercase(to_string($message.RelativeTargetName)));
end

rule "rule_ms_file2"
when
  has_field("ObjectName")

then
  let file_action = regex(".*Accesses:\\s*([A-Za-z_]*)", to_string($message.message));
  set_field("ms_file_action", file_action["0"]);
  set_field("ObjectName", lowercase(to_string($message.ObjectName)));
  set_field("ms_file", lowercase(to_string($message.ObjectName)));
end

WINDOWS SERVER - RDP

Stream filters:

  • SourceName must match regular expression Microsoft-Windows-WinRM|Microsoft-Windows-TerminalServices.*
  • Optionally filter on Graylog-Input
rule "rule_ms_rdp"
when
  has_field("message")

then
  let user = regex("User:\\s(.*)", to_string($message.message));
  set_field("ms_rdp_user", user["0"]);
  let src = regex("Source Network Address:\\s(.*)", to_string($message.message));
  set_field("ms_rdp_src", src["0"]);
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment