Skip to content

Instantly share code, notes, and snippets.

View jgoodie's full-sized avatar

John Goodman jgoodie

  • San Diego
View GitHub Profile
# recombine training and validation data
X_tv = torch.cat((X_train.cpu(), X_val.cpu()))
y_tv = torch.cat((y_train.cpu(), y_val.cpu()))
np.random.seed(101)
sample_weights = compute_sample_weight(class_weight='balanced',y=y_tv)
model = XGBClassifier()
model.fit(X_tv, y_tv, sample_weight=sample_weights)
np.random.seed(42)
df = pd.read_csv("data/RT_IOT2022.csv")
s = ['DOS_SYN_Hping', 'Thing_Speak', 'ARP_poisioning', 'MQTT_Publish']
s_dict = {'DOS_SYN_Hping':0, 'Thing_Speak':1, 'ARP_poisioning':2, 'MQTT_Publish':3}
df = df[df.Attack_type.isin(s)]
df.drop("Unnamed: 0", axis=1, inplace=True)
df.drop("service", axis=1, inplace=True)
df.drop("proto", axis=1, inplace=True)
# df = pd.get_dummies(df, columns=['service', 'proto'])*1
df["label"] = df.Attack_type.apply(lambda x: s_dict[x])
torch.manual_seed(101)
# Make predictions
model.eval()
with torch.inference_mode():
y_logits = model(X_test).to(device)
y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)
accuracy = Accuracy(task="multiclass", num_classes=model.output_features).to(device)
confusion = ConfusionMatrix(task="multiclass", num_classes=model.output_features).to(device)
plt.plot(train_losses, label='train')
plt.plot(val_losses, label='validation')
plt.title("Train/Validation Loss")
plt.legend()
plt.tight_layout()
plt.show()
plt.plot(train_accs, label='train')
plt.plot(val_accs, label='validation')
plt.title("Train/Validation Accuracy")
torch.manual_seed(101)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# input_features = 91
input_features = 81
output_features = 4
hidden_units = 128 #128
dropout = 0.0
lr = 0.001
sample_weights = compute_sample_weight(class_weight='balanced',y=df.label)
label_weights = { k:v for k, v in sorted(list(zip(df.label, sample_weights)))}
label_weights = torch.tensor(list(label_weights.values()), dtype=torch.float32)
label_weights
df = pd.read_csv("data/RT_IOT2022.csv")
print(df.shape)
print("===========================================")
print(df.head())
print(df[['service', 'proto', 'Attack_type']])
labels = dict(Counter(df.Attack_type).most_common())
print(labels)
def training_loop(model, X_train, X_val, y_train, y_val, epochs = 1000, weight_decay = 0.0, lr=0.001, device='cuda'):
# Put all data on target device
X_train, y_train = X_train.to(device), y_train.to(device) #y_train.unsqueeze(dim=1).to(device)
X_val, y_val = X_val.to(device), y_val.to(device) #y_test.unsqueeze(dim=1).to(device)
# Define the accuracy function and initialize train/validation accuracy and loss lists
accuracy = Accuracy(task="multiclass", num_classes=model.output_features).to(device)
train_losses, train_accs, val_losses, val_accs = [], [], [], []
loss_fn = nn.CrossEntropyLoss(weight=label_weights.to(device))
# Build model
class IoTMultiClassModel(nn.Module):
def __init__(self, input_features=91, output_features=4, hidden_units=128, dropout=0.0):
super().__init__()
self.input_features = input_features
self.output_features = output_features
self.hidden_units = hidden_units
self.linear_layer_stack = nn.Sequential(
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()
#Additional Info when using cuda
dev = 0
if device.type == 'cuda':
print(torch.cuda.get_device_name(dev))
print('Memory Usage:')