Skip to content

Instantly share code, notes, and snippets.

@vlad-ulmeanu01
Created April 11, 2024 16:53
Show Gist options
  • Save vlad-ulmeanu01/38f6c8d097805b17c5077aa4c914306b to your computer and use it in GitHub Desktop.
Save vlad-ulmeanu01/38f6c8d097805b17c5077aa4c914306b to your computer and use it in GitHub Desktop.
//--------------------------------------------------------------------------
// Copyright (C) 2014-2024 Cisco and/or its affiliates. All rights reserved.
// Copyright (C) 2002-2013 Sourcefire, Inc.
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License Version 2 as published
// by the Free Software Foundation. You may not use, modify or distribute
// this program under any other version of the GNU General Public License.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//--------------------------------------------------------------------------
/*
* An abstracted interface to the Multi-Pattern Matching routines,
* thats why we're passing 'void *' objects around.
*
* Marc A Norton <mnorton@sourcefire.com>
*
* Updates:
* 3/06 - Added AC_BNFA search
*/
// lowmem.cc author Russ Combs <rucombs@cisco.com>
#include "log/messages.h"
#include "framework/mpse.h"
#include "sfksearch.h"
#include <deque>
using namespace snort;
//-------------------------------------------------------------------------
// "lowmem"
//-------------------------------------------------------------------------
class LowmemMpse : public Mpse
{
private:
KTRIE_STRUCT* obj;
std::deque<int> dbg_match_dq, dbg_match_want_dq;
public:
LowmemMpse(const MpseAgent* agent) : Mpse("lowmem")
{
obj = KTrieNew(0, agent);
dbg_match_want_dq = std::deque<int>({35, 3, 32, 7, 10, 58, 1});
obj->can_debug = false;
}
~LowmemMpse() override
{
KTrieDelete(obj);
}
int add_pattern(
const uint8_t* P, unsigned m, const PatternDescriptor& desc, void* user) override
{
int ans = KTrieAddPattern(obj, P, m, desc.no_case, desc.negated, user);
return ans;
}
int prep_patterns(SnortConfig* sc) override
{
int ans = KTrieCompile(sc, obj);
return ans;
}
int _search(
const uint8_t* T, int n, MpseMatch match,
void* context, int* current_state) override
{
*current_state = 0;
if (dbg_match_dq == dbg_match_want_dq) {
obj->can_debug = true;
}
int ans = KTrieSearch(obj, T, n, match, context);
dbg_match_dq.push_back(ans);
if (dbg_match_dq.size() > dbg_match_want_dq.size()) dbg_match_dq.pop_front();
if (obj->can_debug) {
obj->can_debug = false;
printf("search finished: matches = %d\n", ans);
printf("number of patterns: %d\n", KTriePatternCount(obj));
printf("T = ");
for (int i = 0; i < n; i++) printf("%d ", T[i]);
printf("\n");
}
return ans;
}
int get_pattern_count() const override
{
int ans = KTriePatternCount(obj);
return ans;
}
};
//-------------------------------------------------------------------------
// api
//-------------------------------------------------------------------------
static Mpse* lm_ctor(const SnortConfig*, class Module*, const MpseAgent* agent)
{
return new LowmemMpse(agent);
}
static void lm_dtor(Mpse* p)
{
delete p;
}
static void lm_init()
{
KTrie_init_xlatcase();
KTrieInitMemUsed();
}
static void lm_print()
{
if ( !KTrieMemUsed() )
return;
double x = (double)KTrieMemUsed();
LogMessage("[ LowMem Search-Method Memory Used : %g %s ]\n",
(x > 1.e+6) ? x/1.e+6 : x/1.e+3,
(x > 1.e+6) ? "MBytes" : "KBytes");
}
static const MpseApi lm_api =
{
{
PT_SEARCH_ENGINE,
sizeof(MpseApi),
SEAPI_VERSION,
0,
API_RESERVED,
API_OPTIONS,
"lowmem",
"Keyword Trie (low memory, moderate performance) MPSE",
nullptr,
nullptr
},
MPSE_BASE,
nullptr,
nullptr,
nullptr,
nullptr,
lm_ctor,
lm_dtor,
lm_init,
lm_print,
nullptr
};
const BaseApi* se_lowmem = &lm_api.base;
//--------------------------------------------------------------------------
// Copyright (C) 2001 Marc Norton
// Copyright (C) 2014-2024 Cisco and/or its affiliates. All rights reserved.
// Copyright (C) 2003-2013 Sourcefire, Inc.
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License Version 2 as published
// by the Free Software Foundation. You may not use, modify or distribute
// this program under any other version of the GNU General Public License.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//--------------------------------------------------------------------------
/*
* ksearch.c
*
* Basic Keyword Search Trie - uses linked lists to build the finite automata
*
* Keyword-Match: Performs the equivalent of a multi-string strcmp()
* - use for token testing after parsing the language tokens using lex or the like.
*
* Keyword-Search: searches the input text for one of multiple keywords,
* and supports case sensitive and case insensitive patterns.
*/
#include "sfksearch.h"
#include <cassert>
#include "main/thread.h"
#include "utils/util.h"
static void KTrieFree(KTRIENODE* n);
static unsigned int mtot = 0;
unsigned int KTrieMemUsed()
{
return mtot;
}
void KTrieInitMemUsed()
{
mtot = 0;
}
/*
* Allocate Memory
*/
static void* KTRIE_MALLOC(int n)
{
assert(n > 0);
void* p = snort_calloc(n);
mtot += n;
return p;
}
/*
* Free Memory
*/
static void KTRIE_FREE(void* p)
{
if ( p )
snort_free(p);
}
/*
* Local/Tmp nocase array
*/
static THREAD_LOCAL uint8_t Tnocase[65*1024];
/*
** Case Translation Table
*/
static uint8_t xlatcase[256];
/*
*
*/
void KTrie_init_xlatcase()
{
for (int i=0; i<256; i++)
{
xlatcase[ i ] = (uint8_t)tolower(i);
}
}
/*
*
*/
static inline void ConvertCaseEx(uint8_t* d, const uint8_t* s, int m)
{
int i;
for ( i=0; i < m; i++ )
{
d[i] = xlatcase[ s[i] ];
}
}
/*
*
*/
KTRIE_STRUCT* KTrieNew(int method, const MpseAgent* agent)
{
KTRIE_STRUCT* ts = (KTRIE_STRUCT*)KTRIE_MALLOC(sizeof(*ts));
ts->memory = sizeof(*ts);
ts->nchars = 0;
ts->npats = 0;
ts->end_states = 0;
ts->method = method; /* - old method, 1 = queue */
ts->agent = agent;
return ts;
}
int KTriePatternCount(KTRIE_STRUCT* k)
{
return k->npats;
}
/*
* Deletes memory that was used in creating trie
* and nodes
*/
void KTrieDelete(KTRIE_STRUCT* k)
{
if ( !k )
return;
KTRIEPATTERN* p = k->patrn;
KTRIEPATTERN* pnext = nullptr;
while ( p )
{
pnext = p->next;
if (k->agent)
{
if (p->user)
k->agent->user_free(p->user);
if (p->rule_option_tree)
k->agent->tree_free(&p->rule_option_tree);
if (p->neg_list)
k->agent->list_free(&p->neg_list);
}
KTRIE_FREE(p->P);
KTRIE_FREE(p->Pcase);
KTRIE_FREE(p);
p = pnext;
}
for ( int i = 0; i < KTRIE_ROOT_NODES; i++ )
KTrieFree(k->root[i]);
KTRIE_FREE(k);
}
/*
* Recursively delete all nodes in trie
*/
static void KTrieFree(KTRIENODE* n)
{
if ( !n )
return;
KTrieFree(n->child);
KTrieFree(n->sibling);
KTRIE_FREE(n);
}
/*
*
*/
static KTRIEPATTERN* KTrieNewPattern(const uint8_t* P, unsigned n)
{
if (n < 1)
return nullptr;
KTRIEPATTERN* p = (KTRIEPATTERN*)KTRIE_MALLOC(sizeof(*p));
/* Save as a nocase string */
p->P = (uint8_t*)KTRIE_MALLOC(n);
ConvertCaseEx(p->P, P, n);
/* Save Case specific version */
p->Pcase = (uint8_t*)KTRIE_MALLOC(n);
memcpy(p->Pcase, P, n);
p->n = n;
p->next = nullptr;
return p;
}
/*
* Add Pattern info to the list of patterns
*/
int KTrieAddPattern(
KTRIE_STRUCT* ts, const uint8_t* P, unsigned n,
bool nocase, bool negative, void* user)
{
KTRIEPATTERN* pnew;
if ( !ts->patrn )
{
pnew = ts->patrn = KTrieNewPattern(P, n);
if ( !pnew )
return -1;
}
else
{
pnew = KTrieNewPattern(P, n);
if ( !pnew )
return -1;
pnew->next = ts->patrn; /* insert at head of list */
ts->patrn = pnew;
}
pnew->nocase = nocase;
pnew->negative = negative;
pnew->user = user;
pnew->mnext = nullptr;
ts->npats++;
ts->memory += sizeof(KTRIEPATTERN) + 2 * n; /* Case and nocase */
return 0;
}
/*
*
*/
static KTRIENODE* KTrieCreateNode(KTRIE_STRUCT* ts)
{
KTRIENODE* t = (KTRIENODE*)KTRIE_MALLOC(sizeof(*t));
ts->memory += sizeof(*t);
return t;
}
/*
* Insert a Pattern in the Trie
*/
static int KTrieInsert(KTRIE_STRUCT* ts, KTRIEPATTERN* px)
{
int type = 0;
int n = px->n;
uint8_t* P = px->P;
KTRIENODE* root;
/* Make sure we at least have a root character for the tree */
if ( !ts->root[*P] )
{
ts->root[*P] = root = KTrieCreateNode(ts);
if ( !root )
return -1;
root->edge = *P;
}
else
{
root = ts->root[*P];
}
/* Walk existing Patterns */
while ( n )
{
if ( root->edge == *P )
{
P++;
n--;
if ( n && root->child )
{
root=root->child;
}
else /* cannot continue */
{
type = 0; /* Expand the tree via the child */
break;
}
}
else
{
if ( root->sibling )
{
root=root->sibling;
}
else /* cannot continue */
{
type = 1; /* Expand the tree via the sibling */
break;
}
}
}
/*
* Add the next char of the Keyword, if any
*/
if ( n )
{
if ( type == 0 )
{
/*
* Start with a new child to finish this Keyword
*/
root->child= KTrieCreateNode(ts);
if ( !root->child )
return -1;
root=root->child;
root->edge = *P;
P++;
n--;
ts->nchars++;
}
else
{
/*
* Start a new sibling branch to finish this Keyword
*/
root->sibling= KTrieCreateNode(ts);
if ( !root->sibling )
return -1;
root=root->sibling;
root->edge = *P;
P++;
n--;
ts->nchars++;
}
}
/*
* Finish the keyword as child nodes
*/
while ( n )
{
root->child = KTrieCreateNode(ts);
if ( !root->child )
return -1;
root=root->child;
root->edge = *P;
P++;
n--;
ts->nchars++;
}
if ( root->pkeyword )
{
px->mnext = root->pkeyword; /* insert duplicates at front of list */
root->pkeyword = px;
ts->duplicates++;
}
else
{
root->pkeyword = px;
ts->end_states++;
}
return 0;
}
/*
*
*/
static void Build_Bad_Character_Shifts(KTRIE_STRUCT* kt)
{
KTRIEPATTERN* plist;
/* Calc the min pattern size */
kt->bcSize = 32000;
for ( plist=kt->patrn; plist; plist=plist->next )
{
if ( plist->n < kt->bcSize )
{
kt->bcSize = plist->n; /* smallest pattern size */
}
}
/*
* Initialize the Bad Character shift table.
*/
for ( int i = 0; i < KTRIE_ROOT_NODES; i++ )
{
kt->bcShift[i] = (unsigned short)kt->bcSize;
}
/*
* Finish the Bad character shift table
*/
for ( plist=kt->patrn; plist; plist=plist->next )
{
for ( int k=0; k<kt->bcSize; k++ )
{
int shift = kt->bcSize - 1 - k;
int cindex = plist->P[ k ];
if ( shift < kt->bcShift[ cindex ] )
{
kt->bcShift[ cindex ] = (unsigned short)shift;
}
}
}
}
static int KTrieBuildMatchStateNode(
snort::SnortConfig* sc, KTRIENODE* root, KTRIE_STRUCT* ts)
{
int cnt = 0;
KTRIEPATTERN* p;
if (!root)
return 0;
/* each and every prefix match at this root*/
if (root->pkeyword)
{
for (p = root->pkeyword; p; p = p->mnext)
{
if (p->user)
{
if (p->negative)
{
ts->agent->negate_list(p->user, &root->pkeyword->neg_list);
}
else
{
ts->agent->build_tree(sc, p->user, &root->pkeyword->rule_option_tree);
}
}
cnt++;
}
/* Last call to finalize the tree for this root */
ts->agent->build_tree(sc, nullptr, &root->pkeyword->rule_option_tree);
}
/* for child of this root */
if (root->child)
{
cnt += KTrieBuildMatchStateNode(sc, root->child, ts);
}
/* 1st sibling of this root -- other siblings will be processed from
* within the processing for root->sibling. */
if (root->sibling)
{
cnt += KTrieBuildMatchStateNode(sc, root->sibling, ts);
}
return cnt;
}
static int KTrieBuildMatchStateTrees(snort::SnortConfig* sc, KTRIE_STRUCT* ts)
{
int cnt = 0;
/* Find the states that have a MatchList */
for (int i = 0; i < KTRIE_ROOT_NODES; i++)
{
KTRIENODE* root = ts->root[i];
/* each and every prefix match at this root*/
if ( root and ts->agent )
{
cnt += KTrieBuildMatchStateNode(sc, root, ts);
}
}
return cnt;
}
/*
* Build the Keyword TRIE
*
*/
static inline int _KTrieCompile(KTRIE_STRUCT* ts)
{
KTRIEPATTERN* p;
/*
static int tmem=0; // unused
*/
/*
* Build the Keyword TRIE
*/
for ( p=ts->patrn; p; p=p->next )
{
if ( KTrieInsert(ts, p) )
return -1;
}
/*
* Build A Setwise Bad Character Shift Table
*/
Build_Bad_Character_Shifts(ts);
/*
tmem += ts->memory;
printf(" Compile stats: %d patterns, %d chars, %d duplicate patterns, %d bytes, %d total-bytes\n",ts->npats,ts->nchars,ts->duplicates,ts->memory,tmem);
*/
return 0;
}
int KTrieCompile(snort::SnortConfig* sc, KTRIE_STRUCT* ts)
{
int rval;
if ((rval = _KTrieCompile(ts)))
return rval;
if ( ts->agent )
KTrieBuildMatchStateTrees(sc, ts);
return 0;
}
void sfksearch_print_qinfo()
{
}
/*
* Search - Algorithm
*
* This routine will log any substring of T that matches a keyword,
* and processes all prefix matches. This is used for generic
* pattern searching with a set of keywords and a body of text.
*
*
*
* kt- Trie Structure
* T - nocase text
* Tc- case specific text
* n - text length
*
* returns:
* # pattern matches
*/
static inline int KTriePrefixMatch(
KTRIE_STRUCT* kt, const uint8_t* T, const uint8_t*, const uint8_t* bT, int n,
MpseMatch match, void* context)
{
KTRIENODE* root = kt->root[ *T ];
int nfound = 0;
KTRIEPATTERN* pk;
int index;
///T este varianta nocase (tolower pe tot). argumentul urmator (nefolosit) e cel default.
if (kt->can_debug) {
printf("LOWMEM KTriePrefixMatch entry, n = %d\n", n);
}
/* Check if any keywords start with this character */
if ( !root ) {
return 0;
}
while ( n )
{
if ( root->edge == *T )
{
T++;
n--;
pk = root->pkeyword;
if (pk)
{
index = (int)(T - bT);
nfound++;
if (kt->can_debug) {
///print the pattern that matched:
printf("LOWMEM: user %p, pattern len %d, index %d, nocase %d, negative %d, n %d. P nocase = ", pk->user, pk->n, index, pk->nocase, pk->negative, n);
for (int _ = 0; _ < pk->n; _++) printf("%d ", pk->P[_]);
printf("\n");
}
if (match (pk->user, pk->rule_option_tree, index, context, pk->neg_list) > 0)
{
printf("LOWMEM match quits early.\n");
return nfound;
}
}
if ( n && root->child )
{
root = root->child;
}
else /* cannot continue -- match is over */
{
break;
}
}
else
{
if ( root->sibling )
{
root = root->sibling;
}
else /* cannot continue */
{
break;
}
}
}
return nfound;
}
/*
*
*/
static inline int KTrieSearchNoBC(
KTRIE_STRUCT* ks, const uint8_t* Tx, int n, MpseMatch match, void* context)
{
int nfound = 0;
const uint8_t* T, * bT;
ConvertCaseEx(Tnocase, Tx, n);
T = Tnocase;
bT = T;
for (; n>0; n--, T++, Tx++ )
{
nfound += KTriePrefixMatch(ks, T, Tx, bT, n, match, context);
}
return nfound;
}
/*
*
*/
static inline int KTrieSearchBC(
KTRIE_STRUCT* ks, const uint8_t* Tx, int n, MpseMatch match, void* context)
{
const uint8_t* Tend;
const uint8_t* T, * bT;
int nfound = 0;
short* bcShift = (short*)ks->bcShift;
int bcSize = ks->bcSize;
ConvertCaseEx(Tnocase, Tx, n);
T = Tnocase;
bT = T;
Tend = T + n - bcSize;
bcSize--;
for (; T <= Tend; n--, T++, Tx++ )
{
int tshift;
while ( (tshift = bcShift[ *( T + bcSize ) ]) > 0 )
{
T += tshift;
Tx += tshift;
if ( T > Tend )
return nfound;
}
nfound += KTriePrefixMatch(ks, T, Tx, bT, n, match, context);
}
return nfound;
}
int KTrieSearch(
KTRIE_STRUCT* ks, const uint8_t* T, int n, MpseMatch match, void* context)
{
if ( ks->bcSize < 3 )
return KTrieSearchNoBC(ks, T, n, match, context);
else
return KTrieSearchBC(ks, T, n, match, context);
}
//--------------------------------------------------------------------------
// Copyright (C) 2014-2024 Cisco and/or its affiliates. All rights reserved.
// Copyright (C) 2003-2013 Sourcefire, Inc.
// Copyright (C) 2001 Marc Norton
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License Version 2 as published
// by the Free Software Foundation. You may not use, modify or distribute
// this program under any other version of the GNU General Public License.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//--------------------------------------------------------------------------
#ifndef SFKSEARCH_H
#define SFKSEARCH_H
// ksearch.h - Trie based multi-pattern matcher
#include <cstdint>
#include "search_engines/search_common.h"
namespace snort
{
struct SnortConfig;
}
struct KTRIEPATTERN
{
KTRIEPATTERN* next; /* global list of all patterns*/
KTRIEPATTERN* mnext; /* matching list of duplicate keywords*/
uint8_t* P; /* no case*/
uint8_t* Pcase; /* case sensitive*/
void* user;
void* rule_option_tree;
void* neg_list;
int n;
int nocase;
int negative;
};
struct KTRIENODE
{
int edge; /* character*/
KTRIENODE* sibling;
KTRIENODE* child;
KTRIEPATTERN* pkeyword;
};
#define KTRIE_ROOT_NODES 256
struct KTRIE_STRUCT
{
KTRIEPATTERN* patrn; /* List of patterns, built as they are added*/
KTRIENODE* root[KTRIE_ROOT_NODES]; /* KTrie nodes*/
const struct MpseAgent* agent;
int memory;
int nchars;
int npats;
int duplicates;
int method;
int end_states; /* should equal npats - duplicates*/
int bcSize;
unsigned short bcShift[KTRIE_ROOT_NODES];
///not originally here:
bool can_debug;
};
void KTrie_init_xlatcase();
KTRIE_STRUCT* KTrieNew(int method, const MpseAgent*);
int KTrieAddPattern(
KTRIE_STRUCT*, const uint8_t* P, unsigned n,
bool nocase, bool negative, void* id);
int KTrieCompile(snort::SnortConfig*, KTRIE_STRUCT*);
int KTrieSearch(KTRIE_STRUCT*, const uint8_t* T, int n, MpseMatch, void* context);
unsigned int KTrieMemUsed();
void KTrieInitMemUsed();
void KTrieDelete(KTRIE_STRUCT*);
int KTriePatternCount(KTRIE_STRUCT*);
void sfksearch_print_qinfo();
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment