Skip to content

Instantly share code, notes, and snippets.

@ZaydH
Created August 12, 2019 11:33
Show Gist options
  • Save ZaydH/1c70a1b254d5326250cd561e59e10c9b to your computer and use it in GitHub Desktop.
Save ZaydH/1c70a1b254d5326250cd561e59e10c9b to your computer and use it in GitHub Desktop.
Script for Merging all SLEIPNIR Feature Vectors
import sys
import pickle
from pathlib import Path
import torch
import numpy as np
from tqdm import tqdm
DATA_DIR = Path("data")
def main():
if len(sys.argv) != 3:
print("Command Args: %s <MalwareFolder> <BenignFolder>" % sys.argv[0])
exit(1)
malicious_dir = Path(sys.argv[1])
# malicious_dir += os.sep if malicious_dir[-1] != os.sep else ""
benign_dir = Path(sys.argv[2])
filez = dict()
# benign_dir += os.sep if benign_dir[-1] != os.sep else ""
for vector_dir in [malicious_dir, benign_dir]:
export_name = "malicious" if vector_dir == malicious_dir else "benign"
all_vec, file_names = [], []
num_files = sum(1 for _ in vector_dir.iterdir())
f_iter = vector_dir.iterdir()
desc = export_name + " processing"
for vec_file in tqdm(f_iter, ncols=80, total=num_files, file=sys.stdout, desc=desc):
file_names.append(vec_file.name)
with open(str(vec_file), "rb") as vec_in:
all_vec.append(pickle.load(vec_in))
# Export the file information
merged_vec = torch.cat(all_vec)
np.save(str(DATA_DIR / (export_name + ".npy")), merged_vec)
filez["x" + export_name[:3]] = merged_vec
with open(DATA_DIR / ("files_" + export_name + ".txt"), "w+") as f_out:
f_out.write("\n".join(file_names))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment