Created
September 29, 2012 18:01
-
-
Save ianrumford/3804733 to your computer and use it in GitHub Desktop.
Cascalog filter auditd log fields
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns aud_cas.filter_fields | |
(:use cascalog.api) | |
(:require (cascalog [workflow :as w]) | |
[clojure.string :as str]) | |
(:import [java.util UUID ] | |
[org.apache.hadoop.hbase.util Bytes] | |
[com.twitter.maple.hbase HBaseTap HBaseScheme])) | |
(defmapop dmo-uuid [& any] [(.toString (UUID/randomUUID))]) ;; custom operation to generate UUIDs | |
(defmapop dmo-passed [& any] [true]) ;; custom operation to generate "true" | |
(defmapop dmo-unique [& any] [(rand-int 1000000)]) ;; custom operation to generate randon numbers | |
(defn hbase-tap [table-name key-field column-family & value-fields] | |
(let [scheme (HBaseScheme. (w/fields key-field) column-family (w/fields value-fields))] | |
(HBaseTap. table-name scheme))) | |
(defn parse_input_record | |
"Parse the text of the input record into fields in a map" | |
[input_record] | |
(let [prefix_string (get (str/split input_record #"\: ") 0) | |
prefix_pairs (str/split prefix_string #" ") | |
prefix_pair_vecs (map #(str/split % #"=") prefix_pairs ) | |
prefix_map (into {} prefix_pair_vecs) | |
] | |
prefix_map)) | |
(defmapop dmo-parse-log-record-to-tuple | |
"Parse the text of the audit record into fields in a tuple / vector" | |
[input_record] | |
(let [prefix_map (parse_input_record input_record) | |
{:strs [type node msg]} prefix_map | |
] | |
[type node msg])) | |
(defn query-log-lines | |
"Return a query for the log lines" | |
[log-path] | |
(let [text-tap (lfs-textline log-path) | |
query-line (<- [?line] (text-tap :> ?line)) | |
] | |
query-line)) | |
(defn query-log-tuples | |
"Return a query for the log tuples" | |
[log-path] | |
(let [q-log-lines (query-log-lines log-path) | |
q-log-tuples (<- [?type ?node ?msg] (q-log-lines :> ?line) | |
(dmo-parse-log-record-to-tuple ?line :> ?type ?node ?msg)) | |
] | |
q-log-tuples)) | |
(defn filter-fields | |
"Use cascalog to filter save the prefix fields of an auditd log into HBase" | |
[log-path] | |
(let [q-tuples (query-log-tuples log-path) | |
f-tuples (<- [?t ?n ?m] (q-tuples :> ?t ?n ?m) (= ?t "LOGIN")) ;; Filter on LOGIN records | |
hbase-sink (hbase-tap "blog_cascalog_tuple" "?uuid" "record" "?type" "?node" "?unique" "?passed" )] | |
(?<- hbase-sink [?uuid ?type ?node ?unique ?passed] (f-tuples :> ?type ?node ?msg) (dmo-uuid :> ?uuid) (dmo-passed :> ?passed) (dmo-unique :> ?unique)))) | |
(defn -main | |
"Entry point" | |
[log-path] | |
(println "Filtering and saving fields in file" log-path) | |
(filter-fields log-path)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks