Skip to content

Instantly share code, notes, and snippets.

@ZaydH
Created March 23, 2021 08:17
Show Gist options
  • Save ZaydH/7d4d9b281385f8bff6ba823fc65684a4 to your computer and use it in GitHub Desktop.
Save ZaydH/7d4d9b281385f8bff6ba823fc65684a4 to your computer and use it in GitHub Desktop.
import sys
import pickle
from pathlib import Path
import torch
import numpy as np
from tqdm import tqdm
DATA_DIR = Path("data")
def main():
if len(sys.argv) != 3:
print("Command Args: %s <MalwareFolder> <BenignFolder>" % sys.argv[0])
exit(1)
malicious_dir = Path(sys.argv[1])
# malicious_dir += os.sep if malicious_dir[-1] != os.sep else ""
benign_dir = Path(sys.argv[2])
filez = dict()
# benign_dir += os.sep if benign_dir[-1] != os.sep else ""
for vector_dir in [malicious_dir, benign_dir]:
export_name = "malicious" if vector_dir == malicious_dir else "benign"
all_vec, file_names = [], []
num_files = sum(1 for _ in vector_dir.iterdir())
f_iter = vector_dir.iterdir()
desc = export_name + " processing"
for vec_file in tqdm(f_iter, ncols=80, total=num_files, file=sys.stdout, desc=desc):
file_names.append(vec_file.name)
with open(str(vec_file), "rb") as vec_in:
all_vec.append(pickle.load(vec_in))
# Export the file information
merged_vec = torch.cat(all_vec)
np.save(str(DATA_DIR / (export_name + ".npy")), merged_vec)
filez["x" + export_name[:3]] = merged_vec
with open(DATA_DIR / ("files_" + export_name + ".txt"), "w+") as f_out:
f_out.write("\n".join(file_names))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment