Random Octopus helper steps
/** | |
* Find code paths that go from a source function to a destination function | |
* (only detects direct calls) | |
*/ | |
allPathsThatCanReach = { srcFunc, dstFunc -> | |
__allPathsThatCanReach(srcFunc, dstFunc, [dstFunc]) | |
} | |
__allPathsThatCanReach = { srcFunc, dstFunc, path -> | |
if (srcFunc == dstFunc) { | |
return [ path ]; | |
} | |
// Titan will default to using the type index instead of the code index, but | |
// there are WAY more Callee nodes than funcName nodes (usually) so the barrier | |
// will force the use of the better index | |
g.V().has('code', dstFunc).barrier().has('type', 'Callee') | |
.values('functionId').flatMap { g.V().has('_key', it.get()) } | |
.has('code', P.without(*path)) | |
.values('code') | |
.map { __allPathsThatCanReach(srcFunc, it.get(), [it.get()] + path) } | |
.unfold() | |
.toList() | |
} | |
////////////////////////////// | |
// https://github.com/timhemel/joern-pubs/blob/master/source/tutorial/cfg_traversals.rst | |
addStep('reversewalkcfg', { | |
delegate | |
.sideEffect{ it.sideEffects('seen',[]) } | |
.emit() | |
.repeat( | |
aggregate('seen') | |
.in('FLOWS_TO') | |
.where(P.without('seen')) | |
) | |
}) | |
// Find variable declaration for the current vertex (based on 'code' name) and | |
// a reverse CFG search | |
addStep('gotoVariableDecl', { | |
delegate | |
// Strip off any modifiers possibly hanging around on the symbol | |
.sideEffect { varName = it.get().value('code').replace("& ", "").replace("* ", "").split(" (->|\\.|\\[)")[0] } | |
// This flatMap magically makes this result consistent and not buggy. | |
// Presumably this is because the has('code', varName) evaluation is lazy | |
// and varName has changed by the time this eval has been made. Is there | |
// a better way of doing this? | |
.flatMap { | |
g.V(it.get()) | |
.statements().reversewalkcfg() | |
.or( | |
has('type', 'Parameter'), | |
has('type', 'ClassDefStatement'), | |
has('type', 'IdentifierDeclStatement')) | |
.astNodes().has('code', varName) | |
} | |
.statements().dedup() | |
}) | |
// Find a struct/union ClassDef based on the variable declaration | |
getClassDef = { declaration -> | |
// TODO: Typedef support requires a parser addition | |
// TODO: We throw away inline ClassDefStatements right now, only do proper | |
// parsing for Parameter/IdentifierDeclStatements | |
type = | |
g.V(declaration) | |
.astNodes().or( | |
has('type', 'ParameterType'), | |
has('type', 'IdentifierDeclType')) | |
.values('code')[0] | |
// Optimization | |
if (type == null || !type.contains("struct") && !type.contains("union")) { | |
return null; | |
} | |
// Turn 'const struct foo **' into 'foo' | |
// TODO: Strip cast prefixes as well | |
type = type.replaceAll("^((struct|union|const) )+", "").replaceAll(' \\**$', "") | |
return getNodesWithTypeAndCode("ClassDef", type)[0] | |
} | |
// Example: look for likely cases where a memory leak exists, copyout() is called | |
// with a struct that contains a union or a variable include 'pad' or 'reserved' | |
search = { list -> | |
g.V(*list) | |
.as('copy') | |
.sideEffect { endNode = g.V(it.get()).statements()[0] } | |
.ithArguments('0') | |
.sideEffect { var = it.get().value('code').replace("& ", "") } | |
.gotoVariableDecl() | |
.as('decl') | |
.sideEffect { startNode = g.V(it.get()).statements()[0] } | |
.map { getClassDef(it.get()) } | |
// Go out to leaf declarations (e.g. struct { struct { int f; }} ) | |
.repeat(out('IS_CLASS_OF')).emit() | |
.or( | |
// Has a declaration with 'reserve' or 'pad' in the name | |
__.out('DECLARES').has('identifier', textRegex('.*(reserve|pad).*')), | |
// Has a type of union | |
__.out('DECLARES').has('baseType', textContains('union')), | |
// Has an anonymous type--most likely to be a union, joern parser | |
// doesnt currently expose this, see | |
// https://github.com/octopus-platform/joern/issues/133 | |
__.has('code', '<unnamed>') | |
) | |
.select('copy') | |
.dedup() | |
// Find if there are paths from declaration to copyout where the variable | |
// is not cleared using bzero or copyin. | |
.map { | |
cfgPaths('FAKEFAKE', { vertex, symName -> | |
g.V(vertex).or( | |
has('code', textContains('bzero')) | |
.astNodes().ithArguments('0') | |
.astNodes().has('code', var), | |
has('code', textContains('copyin')) | |
.astNodes().ithArguments('1') | |
.astNodes().has('code', var)) | |
}, startNode, endNode)[0] | |
} | |
} | |
search(getCallsTo('copyout')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment