Skip to content

Instantly share code, notes, and snippets.

@vlad902
Last active December 1, 2016 11:33
Show Gist options
  • Save vlad902/9ad8b8f784892abeb1d113f0cd63dcfd to your computer and use it in GitHub Desktop.
Save vlad902/9ad8b8f784892abeb1d113f0cd63dcfd to your computer and use it in GitHub Desktop.
Random Octopus helper steps
/**
* Find code paths that go from a source function to a destination function
* (only detects direct calls)
*/
allPathsThatCanReach = { srcFunc, dstFunc ->
__allPathsThatCanReach(srcFunc, dstFunc, [dstFunc])
}
__allPathsThatCanReach = { srcFunc, dstFunc, path ->
if (srcFunc == dstFunc) {
return [ path ];
}
// Titan will default to using the type index instead of the code index, but
// there are WAY more Callee nodes than funcName nodes (usually) so the barrier
// will force the use of the better index
g.V().has('code', dstFunc).barrier().has('type', 'Callee')
.values('functionId').flatMap { g.V().has('_key', it.get()) }
.has('code', P.without(*path))
.values('code')
.map { __allPathsThatCanReach(srcFunc, it.get(), [it.get()] + path) }
.unfold()
.toList()
}
//////////////////////////////
// https://github.com/timhemel/joern-pubs/blob/master/source/tutorial/cfg_traversals.rst
addStep('reversewalkcfg', {
delegate
.sideEffect{ it.sideEffects('seen',[]) }
.emit()
.repeat(
aggregate('seen')
.in('FLOWS_TO')
.where(P.without('seen'))
)
})
// Find variable declaration for the current vertex (based on 'code' name) and
// a reverse CFG search
addStep('gotoVariableDecl', {
delegate
// Strip off any modifiers possibly hanging around on the symbol
.sideEffect { varName = it.get().value('code').replace("& ", "").replace("* ", "").split(" (->|\\.|\\[)")[0] }
// This flatMap magically makes this result consistent and not buggy.
// Presumably this is because the has('code', varName) evaluation is lazy
// and varName has changed by the time this eval has been made. Is there
// a better way of doing this?
.flatMap {
g.V(it.get())
.statements().reversewalkcfg()
.or(
has('type', 'Parameter'),
has('type', 'ClassDefStatement'),
has('type', 'IdentifierDeclStatement'))
.astNodes().has('code', varName)
}
.statements().dedup()
})
// Find a struct/union ClassDef based on the variable declaration
getClassDef = { declaration ->
// TODO: Typedef support requires a parser addition
// TODO: We throw away inline ClassDefStatements right now, only do proper
// parsing for Parameter/IdentifierDeclStatements
type =
g.V(declaration)
.astNodes().or(
has('type', 'ParameterType'),
has('type', 'IdentifierDeclType'))
.values('code')[0]
// Optimization
if (type == null || !type.contains("struct") && !type.contains("union")) {
return null;
}
// Turn 'const struct foo **' into 'foo'
// TODO: Strip cast prefixes as well
type = type.replaceAll("^((struct|union|const) )+", "").replaceAll(' \\**$', "")
return getNodesWithTypeAndCode("ClassDef", type)[0]
}
// Example: look for likely cases where a memory leak exists, copyout() is called
// with a struct that contains a union or a variable include 'pad' or 'reserved'
search = { list ->
g.V(*list)
.as('copy')
.sideEffect { endNode = g.V(it.get()).statements()[0] }
.ithArguments('0')
.sideEffect { var = it.get().value('code').replace("& ", "") }
.gotoVariableDecl()
.as('decl')
.sideEffect { startNode = g.V(it.get()).statements()[0] }
.map { getClassDef(it.get()) }
// Go out to leaf declarations (e.g. struct { struct { int f; }} )
.repeat(out('IS_CLASS_OF')).emit()
.or(
// Has a declaration with 'reserve' or 'pad' in the name
__.out('DECLARES').has('identifier', textRegex('.*(reserve|pad).*')),
// Has a type of union
__.out('DECLARES').has('baseType', textContains('union')),
// Has an anonymous type--most likely to be a union, joern parser
// doesnt currently expose this, see
// https://github.com/octopus-platform/joern/issues/133
__.has('code', '<unnamed>')
)
.select('copy')
.dedup()
// Find if there are paths from declaration to copyout where the variable
// is not cleared using bzero or copyin.
.map {
cfgPaths('FAKEFAKE', { vertex, symName ->
g.V(vertex).or(
has('code', textContains('bzero'))
.astNodes().ithArguments('0')
.astNodes().has('code', var),
has('code', textContains('copyin'))
.astNodes().ithArguments('1')
.astNodes().has('code', var))
}, startNode, endNode)[0]
}
}
search(getCallsTo('copyout'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment