Skip to content

Instantly share code, notes, and snippets.

Last active December 1, 2016 11:33
Show Gist options
  • Save vlad902/9ad8b8f784892abeb1d113f0cd63dcfd to your computer and use it in GitHub Desktop.
Save vlad902/9ad8b8f784892abeb1d113f0cd63dcfd to your computer and use it in GitHub Desktop.
Random Octopus helper steps
* Find code paths that go from a source function to a destination function
* (only detects direct calls)
allPathsThatCanReach = { srcFunc, dstFunc ->
__allPathsThatCanReach(srcFunc, dstFunc, [dstFunc])
__allPathsThatCanReach = { srcFunc, dstFunc, path ->
if (srcFunc == dstFunc) {
return [ path ];
// Titan will default to using the type index instead of the code index, but
// there are WAY more Callee nodes than funcName nodes (usually) so the barrier
// will force the use of the better index
g.V().has('code', dstFunc).barrier().has('type', 'Callee')
.values('functionId').flatMap { g.V().has('_key', it.get()) }
.has('code', P.without(*path))
.map { __allPathsThatCanReach(srcFunc, it.get(), [it.get()] + path) }
addStep('reversewalkcfg', {
.sideEffect{ it.sideEffects('seen',[]) }
// Find variable declaration for the current vertex (based on 'code' name) and
// a reverse CFG search
addStep('gotoVariableDecl', {
// Strip off any modifiers possibly hanging around on the symbol
.sideEffect { varName = it.get().value('code').replace("& ", "").replace("* ", "").split(" (->|\\.|\\[)")[0] }
// This flatMap magically makes this result consistent and not buggy.
// Presumably this is because the has('code', varName) evaluation is lazy
// and varName has changed by the time this eval has been made. Is there
// a better way of doing this?
.flatMap {
has('type', 'Parameter'),
has('type', 'ClassDefStatement'),
has('type', 'IdentifierDeclStatement'))
.astNodes().has('code', varName)
// Find a struct/union ClassDef based on the variable declaration
getClassDef = { declaration ->
// TODO: Typedef support requires a parser addition
// TODO: We throw away inline ClassDefStatements right now, only do proper
// parsing for Parameter/IdentifierDeclStatements
type =
has('type', 'ParameterType'),
has('type', 'IdentifierDeclType'))
// Optimization
if (type == null || !type.contains("struct") && !type.contains("union")) {
return null;
// Turn 'const struct foo **' into 'foo'
// TODO: Strip cast prefixes as well
type = type.replaceAll("^((struct|union|const) )+", "").replaceAll(' \\**$', "")
return getNodesWithTypeAndCode("ClassDef", type)[0]
// Example: look for likely cases where a memory leak exists, copyout() is called
// with a struct that contains a union or a variable include 'pad' or 'reserved'
search = { list ->
.sideEffect { endNode = g.V(it.get()).statements()[0] }
.sideEffect { var = it.get().value('code').replace("& ", "") }
.sideEffect { startNode = g.V(it.get()).statements()[0] }
.map { getClassDef(it.get()) }
// Go out to leaf declarations (e.g. struct { struct { int f; }} )
// Has a declaration with 'reserve' or 'pad' in the name
__.out('DECLARES').has('identifier', textRegex('.*(reserve|pad).*')),
// Has a type of union
__.out('DECLARES').has('baseType', textContains('union')),
// Has an anonymous type--most likely to be a union, joern parser
// doesnt currently expose this, see
__.has('code', '<unnamed>')
// Find if there are paths from declaration to copyout where the variable
// is not cleared using bzero or copyin.
.map {
cfgPaths('FAKEFAKE', { vertex, symName ->
has('code', textContains('bzero'))
.astNodes().has('code', var),
has('code', textContains('copyin'))
.astNodes().has('code', var))
}, startNode, endNode)[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment