Skip to content

Instantly share code, notes, and snippets.

@tmfink
Created July 2, 2018 14:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tmfink/de3683c1969c3d3b7b05b5fa141e3667 to your computer and use it in GitHub Desktop.
Save tmfink/de3683c1969c3d3b7b05b5fa141e3667 to your computer and use it in GitHub Desktop.
Capstone C library source generators
#!/bin/sh
# Generate *Module.h include files for Capstone C library
arch_dirs_cs3=$(ls -d arch/* | grep -v M68K)
arch_dirs_cs4=$(ls -d arch/*)
for ARCH in $arch_dirs_cs4;
do
ARCH_N=$(basename $ARCH)
ARCH_GUARD=$(echo "CS_${ARCH_N}_MODULE_H" | tr '[:lower:]' '[:upper:]')
echo
echo $ARCH
echo $ARCH_N
{
cat <<EOF
/* Capstone Disassembly Engine */
/* By Travis Finkenauer <tmfinken@gmail.com>, 2018 */
#ifndef $ARCH_GUARD
#define $ARCH_GUARD
#include "../../utils.h"
EOF
grep -rhE 'global_init|_option|_destroy' $ARCH/*Module.c | sed 's/$/;/'
cat <<EOF
#endif
EOF
} | tee $(echo $ARCH/*Module.c | sed 's,\.c,.h,')
done
#!/usr/bin/env python
# Capstone Disassembly Engine
# By Travis Finkenauer <tmfinken@gmail.com>, 2018 */
"""Generates global Capstone variable declarations"""
from __future__ import print_function
import os
import sys
import time
ARCH_INFO_CS4 = [
("CAPSTONE_HAS_ARM", "CS_ARCH_ARM", "ARM",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_ARM", "CS_MODE_V8",
"CS_MODE_MCLASS", "CS_MODE_THUMB", "CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_ARM64", "CS_ARCH_ARM64", "AArch64",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_ARM", "CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_MIPS", "CS_ARCH_MIPS", "Mips",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_32", "CS_MODE_64", "CS_MODE_MICRO",
"CS_MODE_MIPS32R6", "CS_MODE_BIG_ENDIAN", "CS_MODE_MIPS2",
"CS_MODE_MIPS3"]),
("CAPSTONE_HAS_X86", "CS_ARCH_X86", "X86",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_32", "CS_MODE_64", "CS_MODE_16"]),
("CAPSTONE_HAS_POWERPC", "CS_ARCH_PPC", "PPC",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_32", "CS_MODE_64",
"CS_MODE_BIG_ENDIAN", "CS_MODE_QPX"]),
("CAPSTONE_HAS_SPARC", "CS_ARCH_SPARC", "Sparc",
["CS_MODE_BIG_ENDIAN", "CS_MODE_V9"]),
("CAPSTONE_HAS_SYSZ", "CS_ARCH_SYSZ", "SystemZ",
["CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_XCORE", "CS_ARCH_XCORE", "XCore",
["CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_M68K", "CS_ARCH_M68K", "M68K",
["CS_MODE_BIG_ENDIAN", "CS_MODE_M68K_000", "CS_MODE_M68K_010",
"CS_MODE_M68K_020", "CS_MODE_M68K_030", "CS_MODE_M68K_040",
"CS_MODE_M68K_060"]),
("CAPSTONE_HAS_TMS320C64X", "CS_ARCH_TMS320C64X", "TMS320C64x",
["CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_M680X", "CS_ARCH_M680X", "M680X",
# 8-bit, so there is no byte-endianness
["CS_MODE_M680X_6301", "CS_MODE_M680X_6309", "CS_MODE_M680X_6800",
"CS_MODE_M680X_6801", "CS_MODE_M680X_6805", "CS_MODE_M680X_6808",
"CS_MODE_M680X_6809", "CS_MODE_M680X_6811", "CS_MODE_M680X_CPU12",
"CS_MODE_M680X_HCS08"]),
("CAPSTONE_HAS_EVM", "CS_ARCH_EVM", "EVM",
# No Ethereum mode
[]),
]
ARCH_INFO_CS3 = [
("CAPSTONE_HAS_ARM", "CS_ARCH_ARM", "ARM",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_ARM", "CS_MODE_V8",
"CS_MODE_MCLASS", "CS_MODE_THUMB", "CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_ARM64", "CS_ARCH_ARM64", "AArch64",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_ARM", "CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_MIPS", "CS_ARCH_MIPS", "Mips",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_32", "CS_MODE_64", "CS_MODE_MICRO",
"CS_MODE_MIPS32R6", "CS_MODE_MIPSGP64", "CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_X86", "CS_ARCH_X86", "X86",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_32", "CS_MODE_64", "CS_MODE_16"]),
("CAPSTONE_HAS_POWERPC", "CS_ARCH_PPC", "PPC",
["CS_MODE_LITTLE_ENDIAN", "CS_MODE_32", "CS_MODE_64",
"CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_SPARC", "CS_ARCH_SPARC", "Sparc",
["CS_MODE_BIG_ENDIAN", "CS_MODE_V9"]),
("CAPSTONE_HAS_SYSZ", "CS_ARCH_SYSZ", "SystemZ",
["CS_MODE_BIG_ENDIAN"]),
("CAPSTONE_HAS_XCORE", "CS_ARCH_XCORE", "XCore",
["CS_MODE_BIG_ENDIAN"]),
]
GLOBALS_FILE = 'cs_arch_globals.c'
def interpolate_format(format_, arch_info, modes_formatter=None):
"""Print the interpolated output of each arch"""
if modes_formatter is None:
modes_formatter = lambda x: ""
for cs_has, cs_arch, pretty_name, allowed_modes in arch_info:
print(format_.format(
cs_has=cs_has, cs_arch=cs_arch, pretty_name=pretty_name,
allowed_modes=modes_formatter(allowed_modes)).strip())
ARCH_INIT_FMT = """
#ifdef {cs_has}
\t{pretty_name}_global_init,
#else
\tNULL,
#endif
"""
ARCH_OPTION_FMT = """
#ifdef {cs_has}
\t{pretty_name}_option,
#else
\tNULL,
#endif
"""
ARCH_DESTROY_FMT = """
#ifdef {cs_has}
\t{pretty_name}_destroy,
#else
\tNULL,
#endif
"""
ALLOWED_MODES_FMT = """
#ifdef {cs_has}
\t{allowed_modes},
#else
\t0,
#endif
"""
ALL_ARCH_FMT = """
#ifdef {cs_has}
\t| (1 << {cs_arch})
#endif
"""
def print_global_def(cs_version):
"""Print global variables"""
if cs_version == 3:
arch_info = ARCH_INFO_CS3
elif cs_version == 4:
arch_info = ARCH_INFO_CS4
else:
raise Exception("Unsupported cs_version")
module_headers = []
arch_dir = os.path.join(os.path.dirname(__file__), 'arch')
for base_d, _, files in os.walk(arch_dir):
candidates = [f for f in files if f.endswith('Module.h')]
if not candidates:
continue
if len(candidates) > 1:
raise Exception("Found >1 *Module.h")
base_d = base_d.lstrip('./')
module_headers.append(os.path.join(base_d, candidates[0]))
module_headers.sort()
print('\n'.join('#include "{}"'.format(header) for header in module_headers))
print()
print('// constructor initialization for all archs')
print('static cs_err (*cs_arch_init[MAX_ARCH])(cs_struct *) = {')
interpolate_format(ARCH_INIT_FMT, arch_info)
print("};")
print()
print('// support cs_option() for all archs')
print('static cs_err (*cs_arch_option[MAX_ARCH]) (cs_struct *, cs_opt_type, size_t value) = {')
interpolate_format(ARCH_OPTION_FMT, arch_info)
print("};")
if cs_version == 3:
print()
print('void (*cs_arch_destroy[MAX_ARCH]) (cs_struct *) = {')
interpolate_format(ARCH_DESTROY_FMT, arch_info)
print("};")
def modes_formatter(masks):
"""Format as NOT of OR'd flags"""
if not masks:
return '0'
MAX_LINE_LEN = 80
lines = []
curr_line = '~('
trailer = ')'
first = True
for mask in masks:
if first:
addition = mask
else:
addition = ' | ' + mask
newline = False
if len(curr_line) + len(addition) > MAX_LINE_LEN:
# Wrap line
lines.append(curr_line)
curr_line = ' '
newline = True
if newline:
addition = addition.lstrip()
curr_line += addition
first = False
lines.append(curr_line)
return '\n\t'.join(lines) + trailer
print()
print('// bitmask for finding disallowed modes for an arch:')
print('// to be called in cs_open()/cs_option()')
print('static cs_mode cs_arch_disallowed_mode_mask[MAX_ARCH] = {')
interpolate_format(ALLOWED_MODES_FMT, arch_info, modes_formatter=modes_formatter)
print("};")
print()
print('// bitmask of enabled architectures')
print('static uint32_t all_arch = 0')
arch_fmts = []
for cs_has, cs_arch, pretty_name, allowed_modes in arch_info:
arch_fmt = ALL_ARCH_FMT.format(
cs_has=cs_has, cs_arch=cs_arch, pretty_name=pretty_name,
allowed_modes=allowed_modes).strip()
arch_fmts.append(arch_fmt)
print('\n'.join(arch_fmts))
print(";")
def main():
"""Parse arguments"""
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--update', action='store_true',
help='Overwrite ' + GLOBALS_FILE)
parser.add_argument('--cs-version', '-v', type=int, choices=[3, 4],
required=True)
args = parser.parse_args()
if args.update:
globals_file = os.path.join(os.path.dirname(__file__), GLOBALS_FILE)
sys.stdout = open(globals_file, 'w')
print_global_def(args.cs_version)
if __name__ == '__main__':
main()
@tmfink
Copy link
Author

tmfink commented Jul 2, 2018

I used these scripts to generate source files to declare global variables:

Next branch (v4) PR: capstone-engine/capstone#1186
Master branch (v3) PR: capstone-engine/capstone#1171

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment