Skip to content

Instantly share code, notes, and snippets.

@dmknght
Created June 22, 2023 21:09
Show Gist options
  • Save dmknght/b6e6940cd767d98481bbf625c5b558d8 to your computer and use it in GitHub Desktop.
Save dmknght/b6e6940cd767d98481bbf625c5b558d8 to your computer and use it in GitHub Desktop.
Use rizin / radare2 to collect function calls of a function, then generate hash. The point is to find code reuse
import rzpipe # Using rizin framework. Replace with r2pipe for radare2
import json
import hashlib
import os
class BinaryMetadata:
def __init__(self, path: str):
self.pipe = rzpipe.open(path)
self.bin_path = path
self.analysis_funcs()
def analysis_funcs(self):
"""
Analysis the current binary to get function names
"""
self.pipe.cmd("aac")
def get_function_sum(self):
"""
Get function calls from a function using Rizin, then generate checksum
"""
data = self.pipe.cmd("aflmj")
for each_func_call in json.loads(data):
list_func_call = []
for call_func in each_func_call["calls"]:
list_func_call.append(call_func["name"])
call_sum = hashlib.md5("\n".join(sorted(list_func_call)).encode()).hexdigest()
yield call_sum, each_func_call['name']
def find_simi(dir: str):
for root, dirs, files in os.walk(dir):
for file in files:
path = root + file
analysis = BinaryMetadata(path)
for checksum, name in analysis.get_function_sum():
if checksum in ("5caaee57766e657a9662d01a45a9c2ec", "795c7294ae8d72d57ea5756867dc86a4", "795c7294ae8d72d57ea5756867dc86a4", "c59425f0f7e5192422d393b28cfb99ea", "cdfc6520ff61969c374564240a120c2e"):
print(f"Detected func call: {name}")
print(f"Checksum: {checksum}")
print(f"File: {path}")
# # Checksum: 5caaee57766e657a9662d01a45a9c2ec Func: sym.processCmd
# x = BinaryMetadata("/home/dmknght/Desktop/MalwareLab/LinuxMalwareDetected/eef8b97feeca17f7aa0037e98b4d53fc0f07dc8fe80b195c26ef087ab4334955_detected_detected")
# for checksum, name in x.get_function_sum():
# print(f"Md5: {checksum} Func: {name}")
print("Find if we can find similar functions in malicious samples")
find_simi("/home/dmknght/Desktop/MalwareLab/LinuxMalwareDetected/")
# print("\nFind if we have simil signature in whitelist dir")
# find_simi("/usr/bin/")
@dmknght
Copy link
Author

dmknght commented Jun 22, 2023

hashes in tuple in the find_simi are selected manually by researcher, assuming they are unique by the malware variant and could be re-used

@dmknght
Copy link
Author

dmknght commented Jun 22, 2023

Result: 403 functions were reused from 88 samples of 254 files.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment