Last active
April 2, 2023 05:54
-
-
Save BeanBagKing/aa81fa62b2d40e1598e0677a5585aace to your computer and use it in GitHub Desktop.
Returns an English letter frequency score for command line logs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# Article reference: https://nullsec.us/finding-unusual-powershell-with-frequency-analysis/ | |
import urllib | |
import httplib2 | |
from xml.dom import minidom | |
import math | |
baseurl = 'https://<domain>.splunkcloud.com:8089' | |
userName = '<username>' | |
password = '<password>' | |
output = 'csv' #options are: raw, csv, xml, json, json_cols, json_rows | |
# If you are using "table" in your search result, you must(?) use "csv" | |
longest = 0 | |
average = 0 | |
averageEntropy = 0 | |
largestEntropy = 0 | |
averageEngFreq = 0 | |
count = 1 | |
searchSet = [] | |
englishLetterFreq = {'E': 12.70, 'T': 9.06, 'A': 8.17, 'O': 7.51, 'I': 6.97, 'N': 6.75, 'S': 6.33, 'H': 6.09, 'R': 5.99, 'D': 4.25, 'L': 4.03, 'C': 2.78, 'U': 2.76, 'M': 2.41, 'W': 2.36, 'F': 2.23, 'G': 2.02, 'Y': 1.97, 'P': 1.93, 'B': 1.29, 'V': 0.98, 'K': 0.77, 'J': 0.15, 'X': 0.15, 'Q': 0.10, 'Z': 0.07} | |
ETAOIN = 'ETAOINSHRDLCUMWFGYPBVKJXQZ' | |
LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
# The below was trained on ~1.1 commands from a 24 hour period, for testing purposes currently | |
#englishLetterFreq = {'E': 12.07, 'S': 7.93, 'O': 6.69, 'T': 6.41, 'I': 6.08, 'N': 6.02, 'R': 5.98, 'C': 5.94, 'A': 5.92, 'F': 5.68, 'M': 4.74, 'P': 3.54, 'D': 3.42, 'L': 3.09, 'W': 2.85, 'G': 2.71, 'X': 2.56, 'H': 1.62, 'V': 1.58, 'B': 1.36, 'Y': 1.34, 'U': 1.18, 'K': 0.68, 'Q': 0.4, 'J': 0.15, 'Z': 0.03} | |
#ETAOIN = 'ESOTINRCAFMPDLWGXHVBYUKQJZ' | |
searchQuery = '(index=wineventlog EventCode=4688 Process_Command_Line="*") OR (index=wineventlog EventCode=1 CommandLine="*") earliest=-1d latest=now | eval Commands=coalesce(CommandLine,Process_Command_Line) | table Commands' | |
try: | |
serverContent = httplib2.Http(disable_ssl_certificate_validation=True).request(baseurl + '/services/auth/login','POST', headers={}, body=urllib.parse.urlencode({'username':userName, 'password':password}))[1] | |
except: | |
print("error in retrieving login.") | |
try: | |
sessionKey = minidom.parseString(serverContent).getElementsByTagName('sessionKey')[0].childNodes[0].nodeValue | |
except: | |
print("error in retrieving sessionKey") | |
print(minidom.parseString(serverContent).toprettyxml(encoding='UTF-8')) | |
# Remove leading and trailing whitespace from the search | |
searchQuery = searchQuery.strip() | |
# If the query doesn't already start with the 'search' operator or another | |
# generating command (e.g. "| inputcsv"), then prepend "search " to it. | |
if not (searchQuery.startswith('search') or searchQuery.startswith("|")): | |
searchQuery = 'search ' + searchQuery | |
print(searchQuery) # Just for reference | |
print("----- RESULTS BELOW -----") | |
# Run the search. | |
searchResults = httplib2.Http(disable_ssl_certificate_validation=True).request(baseurl + '/services/search/jobs/export?output_mode='+output,'POST',headers={'Authorization': 'Splunk %s' % sessionKey},body=urllib.parse.urlencode({'search': searchQuery}))[1] | |
searchResults = searchResults.decode('utf-8') | |
for result in searchResults.splitlines(): | |
searchSet.append(result) | |
print("Org Set Len: " + str(len(searchSet))) | |
searchSet = sorted(set(searchSet)) | |
print("New Set Len: " + str(len(searchSet))) | |
def entropy(string): | |
# get probability of chars in string | |
prob = [ float(string.count(c)) / len(string) for c in dict.fromkeys(list(string)) ] | |
# calculate the entropy | |
entropy = - sum([ p * math.log(p) / math.log(2.0) for p in prob ]) | |
return entropy | |
def getLetterCount(message): | |
# Returns a dictionary with keys of single letters and values of the | |
# count of how many times they appear in the message parameter. | |
letterCount = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0, 'O': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0, 'W': 0, 'X': 0, 'Y': 0, 'Z': 0} | |
for letter in message.upper(): | |
if letter in LETTERS: | |
letterCount[letter] += 1 | |
return letterCount | |
def getItemAtIndexZero(x): | |
return x[0] | |
def getFrequencyOrder(message): | |
# Returns a string of the alphabet letters arranged in order of most | |
# frequently occurring in the message parameter. | |
# first, get a dictionary of each letter and its frequency count | |
letterToFreq = getLetterCount(message) | |
# second, make a dictionary of each frequency count to each letter(s) | |
# with that frequency | |
freqToLetter = {} | |
for letter in LETTERS: | |
if letterToFreq[letter] not in freqToLetter: | |
freqToLetter[letterToFreq[letter]] = [letter] | |
else: | |
freqToLetter[letterToFreq[letter]].append(letter) | |
# third, put each list of letters in reverse "ETAOIN" order, and then | |
# convert it to a string | |
for freq in freqToLetter: | |
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True) | |
freqToLetter[freq] = ''.join(freqToLetter[freq]) | |
# fourth, convert the freqToLetter dictionary to a list of tuple | |
# pairs (key, value), then sort them | |
freqPairs = list(freqToLetter.items()) | |
freqPairs.sort(key=getItemAtIndexZero, reverse=True) | |
# fifth, now that the letters are ordered by frequency, extract all | |
# the letters for the final string | |
freqOrder = [] | |
for freqPair in freqPairs: | |
freqOrder.append(freqPair[1]) | |
return ''.join(freqOrder) | |
def englishFreqMatchScore(message): | |
# Return the number of matches that the string in the message | |
# parameter has when its letter frequency is compared to English | |
# letter frequency. A "match" is how many of its six most frequent | |
# and six least frequent letters is among the six most frequent and | |
# six least frequent letters for English. | |
freqOrder = getFrequencyOrder(message) | |
matchScore = 0 | |
# Find how many matches for the six most common letters there are. | |
for commonLetter in ETAOIN[:6]: | |
if commonLetter in freqOrder[:6]: | |
matchScore += 1 | |
# Find how many matches for the six least common letters there are. | |
for uncommonLetter in ETAOIN[-6:]: | |
if uncommonLetter in freqOrder[-6:]: | |
matchScore += 1 | |
return matchScore | |
exclusions = [" --mgmtConnKey "] # Use this to exclude items | |
for event in searchSet: | |
if len(str(event)) >= 1: # We did have some that were 0 somehow | |
average = average + len(str(event)) | |
averageEntropy = averageEntropy + entropy(str(event)) | |
averageEngFreq = averageEngFreq + englishFreqMatchScore(event) | |
count = count+1 | |
if len(str(event)) > longest: | |
longest = len(str(event)) | |
if entropy(str(event)) > largestEntropy: | |
largestEntropy = entropy(str(event)) | |
if len(str(event)) > 300 and englishFreqMatchScore(event) <= 5: # Only show us matches that are of sufficent length | |
res = [ele for ele in exclusions if(ele in str(event))] # Test for list of exclusions | |
if bool(res) == False: | |
print(str(event)) | |
print(str(englishFreqMatchScore(event))) | |
print(entropy(str(event))) | |
print("-------------------------------------------------") | |
average = average / count | |
averageEntropy = averageEntropy / count | |
averageEngFreq = averageEngFreq / count | |
print("average freq: "+str(averageEngFreq)) | |
print("average ent: "+str(averageEntropy)) | |
print("largest ent: "+str(largestEntropy)) | |
print("average: "+str(average)) | |
print("count: "+str(count)) | |
print("longest: "+str(longest)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment