Last active
December 1, 2016 11:33
-
-
Save vlad902/9ad8b8f784892abeb1d113f0cd63dcfd to your computer and use it in GitHub Desktop.
Random Octopus helper steps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Find code paths that go from a source function to a destination function | |
* (only detects direct calls) | |
*/ | |
allPathsThatCanReach = { srcFunc, dstFunc -> | |
__allPathsThatCanReach(srcFunc, dstFunc, [dstFunc]) | |
} | |
__allPathsThatCanReach = { srcFunc, dstFunc, path -> | |
if (srcFunc == dstFunc) { | |
return [ path ]; | |
} | |
// Titan will default to using the type index instead of the code index, but | |
// there are WAY more Callee nodes than funcName nodes (usually) so the barrier | |
// will force the use of the better index | |
g.V().has('code', dstFunc).barrier().has('type', 'Callee') | |
.values('functionId').flatMap { g.V().has('_key', it.get()) } | |
.has('code', P.without(*path)) | |
.values('code') | |
.map { __allPathsThatCanReach(srcFunc, it.get(), [it.get()] + path) } | |
.unfold() | |
.toList() | |
} | |
////////////////////////////// | |
// https://github.com/timhemel/joern-pubs/blob/master/source/tutorial/cfg_traversals.rst | |
addStep('reversewalkcfg', { | |
delegate | |
.sideEffect{ it.sideEffects('seen',[]) } | |
.emit() | |
.repeat( | |
aggregate('seen') | |
.in('FLOWS_TO') | |
.where(P.without('seen')) | |
) | |
}) | |
// Find variable declaration for the current vertex (based on 'code' name) and | |
// a reverse CFG search | |
addStep('gotoVariableDecl', { | |
delegate | |
// Strip off any modifiers possibly hanging around on the symbol | |
.sideEffect { varName = it.get().value('code').replace("& ", "").replace("* ", "").split(" (->|\\.|\\[)")[0] } | |
// This flatMap magically makes this result consistent and not buggy. | |
// Presumably this is because the has('code', varName) evaluation is lazy | |
// and varName has changed by the time this eval has been made. Is there | |
// a better way of doing this? | |
.flatMap { | |
g.V(it.get()) | |
.statements().reversewalkcfg() | |
.or( | |
has('type', 'Parameter'), | |
has('type', 'ClassDefStatement'), | |
has('type', 'IdentifierDeclStatement')) | |
.astNodes().has('code', varName) | |
} | |
.statements().dedup() | |
}) | |
// Find a struct/union ClassDef based on the variable declaration | |
getClassDef = { declaration -> | |
// TODO: Typedef support requires a parser addition | |
// TODO: We throw away inline ClassDefStatements right now, only do proper | |
// parsing for Parameter/IdentifierDeclStatements | |
type = | |
g.V(declaration) | |
.astNodes().or( | |
has('type', 'ParameterType'), | |
has('type', 'IdentifierDeclType')) | |
.values('code')[0] | |
// Optimization | |
if (type == null || !type.contains("struct") && !type.contains("union")) { | |
return null; | |
} | |
// Turn 'const struct foo **' into 'foo' | |
// TODO: Strip cast prefixes as well | |
type = type.replaceAll("^((struct|union|const) )+", "").replaceAll(' \\**$', "") | |
return getNodesWithTypeAndCode("ClassDef", type)[0] | |
} | |
// Example: look for likely cases where a memory leak exists, copyout() is called | |
// with a struct that contains a union or a variable include 'pad' or 'reserved' | |
search = { list -> | |
g.V(*list) | |
.as('copy') | |
.sideEffect { endNode = g.V(it.get()).statements()[0] } | |
.ithArguments('0') | |
.sideEffect { var = it.get().value('code').replace("& ", "") } | |
.gotoVariableDecl() | |
.as('decl') | |
.sideEffect { startNode = g.V(it.get()).statements()[0] } | |
.map { getClassDef(it.get()) } | |
// Go out to leaf declarations (e.g. struct { struct { int f; }} ) | |
.repeat(out('IS_CLASS_OF')).emit() | |
.or( | |
// Has a declaration with 'reserve' or 'pad' in the name | |
__.out('DECLARES').has('identifier', textRegex('.*(reserve|pad).*')), | |
// Has a type of union | |
__.out('DECLARES').has('baseType', textContains('union')), | |
// Has an anonymous type--most likely to be a union, joern parser | |
// doesnt currently expose this, see | |
// https://github.com/octopus-platform/joern/issues/133 | |
__.has('code', '<unnamed>') | |
) | |
.select('copy') | |
.dedup() | |
// Find if there are paths from declaration to copyout where the variable | |
// is not cleared using bzero or copyin. | |
.map { | |
cfgPaths('FAKEFAKE', { vertex, symName -> | |
g.V(vertex).or( | |
has('code', textContains('bzero')) | |
.astNodes().ithArguments('0') | |
.astNodes().has('code', var), | |
has('code', textContains('copyin')) | |
.astNodes().ithArguments('1') | |
.astNodes().has('code', var)) | |
}, startNode, endNode)[0] | |
} | |
} | |
search(getCallsTo('copyout')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment