anthonykasza/encoder.py

## encoder.py
# A script which single-byte XOR encodes an input file

import sys

ifn = sys.argv[1]
data = open(ifn, "rb").read()

c = "a"
for key in [0xaa, 0xab, 0x57, 0x07, 0x13]:
  ofn = c + ifn
  c += "a"
  print(ofn)
  with open(ofn, "wb") as ofh:
    ofh.write(bytearray([data[idx] ^ key for idx in range(len(data))]))

## poc.py
## Single-byte XOR magic sequence proof-of-concept
#  This script demonstrates that magic strings are identifiable
#  based on their sequences regardless of any single-byte XOR encoding

for plaintext in ["cannot be run", "!This", "in DOS mode"]:
  for key in [0xaa, 0xab, 0x10, 0x58, 0x77, 0x01]:

    diffs = []
    for idx in range(len(plaintext)):
      if idx == len(plaintext) - 1:
        break

      c_this = ord(plaintext[idx])
      c_next = ord(plaintext[idx+1])

      e_this = c_this ^ key
      e_next = c_next ^ key

      diffs.append(e_this ^ e_next)
    print(plaintext, hex(key), diffs)
  print(plaintext, hex(0x00), [ord(plaintext[idx]) ^ ord(plaintext[idx+1]) for idx in range(len(plaintext)) if idx != len(plaintext)-1])

## scanner.py
# This script finds common PEs strings in single-byte encoded data

import sys


###
# 1. calculate sequence differences in magic strings
pe_strings = [
  "Borland Edition",
  "This program",
  "run under Win32"
  "!This",
  "cannot be run",
  "in DOS mode",
  "kernel32"
]
pe_seqs = {}
for string in pe_strings:
  pe_seqs[string] = [ord(string[idx]) ^ ord(string[idx+1]) for idx in range(len(string)) if idx != len(string)-1]


###
# 2. calculate sequence differences in input
with open(sys.argv[1], "rb") as fh:
  input_file = fh.read()
file_seqs = [input_file[idx] ^ input_file[idx+1] for idx in range(len(input_file)) if idx != len(input_file)-1]


###
# 3. search for magic subsequences in input
def isSubSequence(str1, str2):
  # props: https://www.geeksforgeeks.org/given-two-strings-find-first-string-subsequence-second/
  m = len(str1)
  n = len(str2)
  j = 0
  i = 0
  while j < m and i < n:
    if str1[j] == str2[i]:
      j = j+1
    i = i + 1
  return j == m

for string, magic_seq in pe_seqs.items():
  if isSubSequence(magic_seq, file_seqs):
    print("Found: {}".format(string))
	# A script which single-byte XOR encodes an input file

	import sys

	ifn = sys.argv[1]
	data = open(ifn, "rb").read()

	c = "a"
	for key in [0xaa, 0xab, 0x57, 0x07, 0x13]:
	ofn = c + ifn
	c += "a"
	print(ofn)
	with open(ofn, "wb") as ofh:
	ofh.write(bytearray([data[idx] ^ key for idx in range(len(data))]))
	## Single-byte XOR magic sequence proof-of-concept
	# This script demonstrates that magic strings are identifiable
	# based on their sequences regardless of any single-byte XOR encoding

	for plaintext in ["cannot be run", "!This", "in DOS mode"]:
	for key in [0xaa, 0xab, 0x10, 0x58, 0x77, 0x01]:

	diffs = []
	for idx in range(len(plaintext)):
	if idx == len(plaintext) - 1:
	break

	c_this = ord(plaintext[idx])
	c_next = ord(plaintext[idx+1])

	e_this = c_this ^ key
	e_next = c_next ^ key

	diffs.append(e_this ^ e_next)
	print(plaintext, hex(key), diffs)
	print(plaintext, hex(0x00), [ord(plaintext[idx]) ^ ord(plaintext[idx+1]) for idx in range(len(plaintext)) if idx != len(plaintext)-1])
	# This script finds common PEs strings in single-byte encoded data

	import sys


	###
	# 1. calculate sequence differences in magic strings
	pe_strings = [
	"Borland Edition",
	"This program",
	"run under Win32"
	"!This",
	"cannot be run",
	"in DOS mode",
	"kernel32"
	]
	pe_seqs = {}
	for string in pe_strings:
	pe_seqs[string] = [ord(string[idx]) ^ ord(string[idx+1]) for idx in range(len(string)) if idx != len(string)-1]



	###
	# 2. calculate sequence differences in input
	with open(sys.argv[1], "rb") as fh:
	input_file = fh.read()
	file_seqs = [input_file[idx] ^ input_file[idx+1] for idx in range(len(input_file)) if idx != len(input_file)-1]



	###
	# 3. search for magic subsequences in input
	def isSubSequence(str1, str2):
	# props: https://www.geeksforgeeks.org/given-two-strings-find-first-string-subsequence-second/
	m = len(str1)
	n = len(str2)
	j = 0
	i = 0
	while j < m and i < n:
	if str1[j] == str2[i]:
	j = j+1
	i = i + 1
	return j == m

	for string, magic_seq in pe_seqs.items():
	if isSubSequence(magic_seq, file_seqs):
	print("Found: {}".format(string))