Skip to content

Instantly share code, notes, and snippets.

@HookedBehemoth
Created September 22, 2022 12:09
Show Gist options
  • Save HookedBehemoth/ed0725ed571e95ca85eb1dbda8823853 to your computer and use it in GitHub Desktop.
Save HookedBehemoth/ed0725ed571e95ca85eb1dbda8823853 to your computer and use it in GitHub Desktop.
scanmem avx2 accelerated initial search patch
diff --git a/ptrace.c b/ptrace.c
index 5e71d78..7fd3545 100644
--- a/ptrace.c
+++ b/ptrace.c
@@ -46,6 +46,8 @@
#include <stdbool.h>
#include <limits.h>
#include <fcntl.h>
+#include <immintrin.h>
+#include <avx2intrin.h>
// dirty hack for FreeBSD
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -487,9 +489,15 @@ bool sm_checkmatches(globals_t *vars,
}
+#include <time.h>
+#include "endianness.h"
+
/* sm_searchregions() performs an initial search of the process for values matching `uservalue` */
bool sm_searchregions(globals_t *vars, scan_match_type_t match_type, const uservalue_t *uservalue)
{
+ clock_t t;
+ t = clock();
+
matches_and_old_values_swath *writing_swath_index;
int required_extra_bytes_to_record = 0;
unsigned long total_size = 0;
@@ -498,6 +506,69 @@ bool sm_searchregions(globals_t *vars, scan_match_type_t match_type, const userv
region_t *r;
unsigned long total_scan_bytes = 0;
unsigned char *data = NULL;
+ unsigned stride = 0;
+ __m256i mask = {};
+ __m256 maskf = {};
+ __m256d maskd = {};
+ unsigned count = 0;
+
+ switch (vars->options.scan_data_type) {
+ case INTEGER8:
+ if (uservalue->flags & flag_s8b) {
+ mask = _mm256_set1_epi8(uservalue->int8_value);
+ } else {
+ mask = _mm256_set1_epi8(uservalue->uint8_value);
+ }
+ stride = 1;
+ count = 32;
+ break;
+ case INTEGER16:
+ if (uservalue->flags & flag_s16b) {
+ mask = _mm256_set1_epi16(
+ vars->options.reverse_endianness ? swap_bytes16(uservalue->int16_value) : uservalue->int16_value);
+ } else {
+ mask = _mm256_set1_epi16(
+ vars->options.reverse_endianness ? swap_bytes16(uservalue->uint16_value) : uservalue->uint16_value);
+ }
+ stride = 2;
+ count = 16;
+ break;
+ case INTEGER32:
+ if (uservalue->flags & flag_s32b) {
+ mask = _mm256_set1_epi32(
+ vars->options.reverse_endianness ? swap_bytes32(uservalue->int32_value) : uservalue->int32_value);
+ } else {
+ mask = _mm256_set1_epi32(
+ vars->options.reverse_endianness ? swap_bytes32(uservalue->uint32_value) : uservalue->uint32_value);
+ }
+ stride = 4;
+ count = 8;
+ break;
+ case INTEGER64:
+ if (uservalue->flags & flag_s64b) {
+ mask = _mm256_set1_epi64x(
+ vars->options.reverse_endianness ? swap_bytes64(uservalue->int64_value) : uservalue->int64_value);
+ } else {
+ mask = _mm256_set1_epi64x(
+ vars->options.reverse_endianness ? swap_bytes64(uservalue->uint64_value) : uservalue->uint64_value);
+ }
+ stride = 8;
+ count = 4;
+ break;
+ case FLOAT32:
+ maskf = _mm256_set1_ps(uservalue->float32_value);
+ stride = 4;
+ count = 8;
+ break;
+ case FLOAT64:
+ maskd = _mm256_set1_pd(uservalue->float64_value);
+ stride = 8;
+ count = 4;
+ break;
+ default:
+ printf("invalid scan_data_type: %hhu\n", vars->options.scan_data_type);
+ abort();
+ }
if (sm_choose_scanroutine(vars->options.scan_data_type, match_type, uservalue, vars->options.reverse_endianness) == false)
{
@@ -505,6 +576,10 @@ bool sm_searchregions(globals_t *vars, scan_match_type_t match_type, const userv
return false;
}
+
+ printf("%d\n", match_type);
+ assert(match_type == MATCHEQUALTO);
+
assert(sm_scan_routine);
/* stop and attach to the target */
@@ -587,7 +662,7 @@ bool sm_searchregions(globals_t *vars, scan_match_type_t match_type, const userv
size_t buffer_size = 0;
void *reg_pos = r->start;
const uint8_t *buf_pos = NULL;
- for ( ; ; memlength--, buffer_size--, reg_pos++, buf_pos++) {
+ for ( ; ; ) {
/* check if the buffer is finished (or we just started) */
if (UNLIKELY(buffer_size == 0)) {
@@ -626,6 +701,86 @@ bool sm_searchregions(globals_t *vars, scan_match_type_t match_type, const userv
buf_pos = data;
}
+ if (LIKELY(buffer_size >= 32 && ((uintptr_t)buf_pos & 0x1f) == 0)) {
+ int result = 0;
+ switch (vars->options.scan_data_type) {
+ case INTEGER8:
+ result = _mm256_movemask_epi8(
+ _mm256_cmpeq_epi8(_mm256_load_si256((const __m256i_u*)buf_pos), mask));
+ break;
+ case INTEGER16:
+ result = _mm256_movemask_epi8(
+ _mm256_cmpeq_epi16(_mm256_load_si256((const __m256i_u*)buf_pos), mask));
+ break;
+ case INTEGER32:
+ result = _mm256_movemask_epi8(
+ _mm256_cmpeq_epi32(_mm256_load_si256((const __m256i_u*)buf_pos), mask));
+ break;
+ case INTEGER64:
+ result = _mm256_movemask_epi8(
+ _mm256_cmpeq_epi64(_mm256_load_si256((const __m256i_u*)buf_pos), mask));
+ break;
+ case FLOAT32:
+ if (vars->options.reverse_endianness) {
+ const __m256i toLittleEndian = _mm256_set_epi8(
+ 3, 2, 1, 0, 7, 6, 5, 4,
+ 11,10, 9, 8,15,14,13,12,
+ 19,18,17,16,23,22,21,20,
+ 27,26,25,24,31,30,29,28);
+ __m256i value = _mm256_shuffle_epi8(_mm256_load_si256((const __m256i_u*)buf_pos), toLittleEndian);
+ result = _mm256_movemask_pd(
+ _mm256_cmp_pd(value, maskd, _CMP_EQ_OQ)
+ );
+ } else {
+ result = _mm256_movemask_ps(
+ _mm256_cmp_ps(_mm256_load_ps((const float*)buf_pos), maskf, _CMP_EQ_OQ));
+ }
+ break;
+ case FLOAT64: {
+ if (vars->options.reverse_endianness) {
+ const __m256i toLittleEndian = _mm256_set_epi8(
+ 7, 6, 5, 4, 3, 2, 1, 0,
+ 15,14,13,12,11,10, 9, 8,
+ 23,22,21,20,19,18,17,16,
+ 31,30,29,28,27,26,25,24);
+ __m256i value = _mm256_shuffle_epi8(_mm256_load_si256((const __m256i_u*)buf_pos), toLittleEndian);
+ result = _mm256_movemask_pd(
+ _mm256_cmp_pd(value, maskd, _CMP_EQ_OQ)
+ );
+ } else {
+ result = _mm256_movemask_pd(
+ _mm256_cmp_pd(_mm256_load_pd((const double*)buf_pos), maskd, _CMP_EQ_OQ));
+ }
+ }
+ break;
+ }
+ if (UNLIKELY(result)) {
+ for (int i = 0; i < count; ++i) {
+ const advance = i * stride;
+ const mem64_t* memory_ptr = (mem64_t*)(buf_pos + advance);
+ const void* reg_ptr = reg_pos + advance;
+
+ if ((result >> advance) & 1) {
+ // printf("value: %f\n", memory_ptr->float32_value);
+ writing_swath_index = add_element(&(vars->matches), writing_swath_index, reg_ptr,
+ get_u8b(memory_ptr), uservalue->flags);
+
+ for (int j = 1; j < stride; ++j) {
+ const mem64_t* memory_ptr = (mem64_t*)(buf_pos + advance + j);
+ writing_swath_index = add_element(&(vars->matches), writing_swath_index, reg_ptr + j,
+ get_u8b(memory_ptr), flags_empty);
+ }
+
+ ++vars->num_matches;
+ }
+ }
+ }
+
+ memlength -= 32;
+ buffer_size -= 32;
+ reg_pos += 32;
+ buf_pos += 32;
+ } else {
const mem64_t* memory_ptr = (mem64_t*)buf_pos;
unsigned int match_length;
match_flags checkflags;
@@ -652,6 +807,10 @@ bool sm_searchregions(globals_t *vars, scan_match_type_t match_type, const userv
--required_extra_bytes_to_record;
}
+ memlength--; buffer_size--; reg_pos++; buf_pos++;
+
+ }
+
}
free(data);
@@ -678,6 +837,11 @@ bool sm_searchregions(globals_t *vars, scan_match_type_t match_type, const userv
show_info("we currently have %ld matches.\n", vars->num_matches);
+ t = clock() - t;
+ double time_taken = ((double)t)/CLOCKS_PER_SEC; // in seconds
+
+ show_info("scan took %f seconds to execute \n", time_taken);
+
/* okay, detach */
return sm_detach(vars->target);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment