Skip to content

Instantly share code, notes, and snippets.

@fabriciojoc
Created September 29, 2021 13:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fabriciojoc/2184b555c2b575bc7de3761dd4f00b68 to your computer and use it in GitHub Desktop.
Save fabriciojoc/2184b555c2b575bc7de3761dd4f00b68 to your computer and use it in GitHub Desktop.
MLSEC 2021 Model Definition
# This is only a code snippet extracted from
# https://github.com/fabriciojoc/2021-Machine-Learning-Security-Evasion-Competition/blob/master/defender/train_classifier.py
class NeedForSpeedModel():
# numerical attributes
NUMERICAL_ATTRIBUTES = [
'virtual_size', 'has_debug', 'imports', 'exports', 'has_relocations',
'has_resources', 'has_signature', 'has_tls', 'symbols', 'timestamp',
'numberof_sections', 'major_image_version', 'minor_image_version',
'major_linker_version', 'minor_linker_version', 'major_operating_system_version',
'minor_operating_system_version', 'major_subsystem_version',
'minor_subsystem_version', 'sizeof_code', 'sizeof_headers', 'sizeof_heap_commit'
]
# categorical attributes
CATEGORICAL_ATTRIBUTES = [
'machine', 'magic'
]
# textual attributes
TEXTUAL_ATTRIBUTES = ['libraries', 'functions', 'exports_list',
'dll_characteristics_list', 'characteristics_list']
# label
LABEL = "label"
# initialize NFS classifier
def __init__(self,
categorical_extractor = OneHotEncoder(handle_unknown="ignore"),
textual_extractor = HashingVectorizer(n_features=50000, token_pattern=r"(?<=\s)(.*?)(?=\s)"),
feature_scaler = MaxAbsScaler(),
classifier = RandomForestClassifier()):
self.base_categorical_extractor = categorical_extractor
self.base_textual_extractor = textual_extractor
self.base_feature_scaler = feature_scaler
self.base_classifier = classifier
...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment