Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@vlad902
Created November 13, 2016 22:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vlad902/b0c6b1133a78fc4e8ddd76f4abfc034c to your computer and use it in GitHub Desktop.
Save vlad902/b0c6b1133a78fc4e8ddd76f4abfc034c to your computer and use it in GitHub Desktop.
Octopus CFG walking code with very crude alias and taint analysis.
// Simplify the argument, e.g. '( struct foo* ) & bar' to 'bar'
strip = { traversal ->
if (traversal.clone().values('type')[0] == 'CastExpression') {
traversal = traversal.ithChildren('1')
}
traversal.values('code')[0].replace("& ", "").replace("* ", "")
}
// { 'aliasing_function': [ 'src_arg', 'dst_arg' ], ... }
ALIASING_FUNCTIONS = [
'memcpy': [ '0', '1' ],
'bcopy': [ '1', '0'],
'copyin': [ '1', '0' ]
]
ALIASING_FUNCTIONS_KEYS = ALIASING_FUNCTIONS.keySet().toList()
// { 'overwriting_function': 'overwritten_arg', ... }
OVERWRITING_FUNCTIONS = [
'bzero': '0',
'memset': '0'
]
OVERWRITING_FUNCTIONS_KEYS = OVERWRITING_FUNCTIONS.keySet().toList()
// Very crude alias analysis, returns a map of the form:
// { "overwritten_variable": "variables_it_was_overwritten_with (if any)" }
aliasAnalysis = { curNode ->
retval = [:];
g.V(curNode)
.astNodes().has('type', 'Callee')
.has('code', P.within(*OVERWRITING_FUNCTIONS_KEYS))
.sideEffect { childNum = OVERWRITING_FUNCTIONS[it.get().value('code')] }
.in(AST_EDGE).sideEffect {
var = g.V(it.get()).ithArguments(childNum);
retval.put(strip(var), '')
}.iterate();
g.V(curNode)
.astNodes().has('type', 'Callee')
.has('code', P.within(*ALIASING_FUNCTIONS_KEYS))
.sideEffect { childNums = ALIASING_FUNCTIONS[it.get().value('code')] }
.in(AST_EDGE).sideEffect {
from = g.V(it.get()).ithArguments(childNums[0]);
to = g.V(it.get()).ithArguments(childNums[1]);
retval.put(strip(from), strip(to))
}.iterate();
// Find new assignments involving variables of interest
// TODO: this is going to have tons of FPs from call expressions, filter them out,
// create a verison of astNodes() that allows filtering as you go down
g.V(curNode)
.astNodes()
.has('type', 'AssignmentExpression').has('operator', '=')
.sideEffect {
from = g.V(it.get()).ithChildren('0');
to = g.V(it.get()).ithChildren('1');
retval.put(strip(from), strip(to))
}.iterate();
retval
}
TERMINATION_FUNCTION_MAP = [
'malloc': '0',
'realloc': '1'
]
TERMINATION_FUNCTION_NAMES = TERMINATION_FUNCTION_MAP.keySet().toList()
terminationCriteria = { curNode, taintList, sanitizedList, path ->
if (path.size() > 30) {
println("Path too long " + path.toString());
return []; // Terminate search (unsuccessfully)
}
interesting = g.V(curNode)
.astNodes().has('type', 'Callee')
.or(
__.has('code', 'malloc').in(AST_EDGE).ithArguments('0')
.not(has('code', P.within(*sanitizedList.toList())))
.astNodes().has('type', 'MultiplicativeExpression')
.astNodes().has('code', P.within(*taintList.toList())),
__.has('code', 'copyout').in(AST_EDGE).ithArguments('2')
.not(has('code', P.within(*sanitizedList.toList())))
.astNodes().has('type', 'MultiplicativeExpression')
.astNodes().has('code', P.within(*taintList.toList())),
__.has('code', 'realloc').in(AST_EDGE).ithArguments('1')
.not(has('code', P.within(*sanitizedList.toList())))
.astNodes().has('code', P.within(*taintList.toList()))
)[0]
if (interesting == null) {
return null; // Continue search
}
return path; // Terminate search (successfully!)
}
sanitizes = { curNode ->
if (curNode.value('type') != 'Condition') {
return [];
}
return g.V(curNode)
.astNodes().has('type', 'RelationalExpression').has('operator', P.within('>', '>='))
.ithChildren('0')
.has('type', P.within('Identifier', 'MemberAccess', 'PtrMemberAccess'))
.values('code').toList()
}
/*
* @param visited Map from visited node IDs to the taint, sanitized lists when those
* nodes were visited to avoid searching if we've already searched down
* that path.
*/
__cfgWalkWithAliasAnalysis = { curNode, taintList, sanitizedList, sanitizes,
terminationCriteria, path, visited ->
// Expand sanitized, taint lists
for (aliased in aliasAnalysis(curNode)) {
if (sanitizedList.contains(aliased.value)) {
sanitizedList.add(aliased.key);
} else if (taintList.contains(aliased.value) || taintList.contains(aliased.value.split(" (->|\\.|\\[)")[0])) {
taintList.add(aliased.key);
} else {
sanitizedList.remove(aliased.key);
taintList.remove(aliased.key);
}
}
sanitizedList.addAll(sanitizes(curNode));
taintList.removeAll(sanitizedList);
println(String.format("At node %d with taint/sanitized lists %s/%s.",
curNode.id(), taintList.toString(), sanitizedList.toString()));
// Fail if no more tainted variables are being tracked.
if (taintList.size() == 0) {
println("Taint list empty " + curNode.toString());
return [];
}
// Check terminationCriteria
// terminationCriteria returns null to continue, or a list otherwise
// (this list is either [] or path, indicating failure or success)
termination = terminationCriteria(curNode, taintList, sanitizedList, path);
if (termination != null) {
return termination;
}
visited.put(curNode.id(), [sanitized:sanitizedList, taint:taintList]);
// Sort children so that we search those with the most incoming CFG edges
// first. This is an optimization for 'visited' (TODO: explain the
// rationale about ordering and avoiding long paths for visited.)
children = g.V(curNode).out(CFG_EDGE);
try {
children = children.order().by(__.in(CFG_EDGE).count(), Order.decr).toList()
} catch(UnsupportedOperationException e) {
// Workaround for https://issues.apache.org/jira/browse/TINKERPOP-1216
// due to old Tinkerpop3 version w/ octopus :(
}
for (child in children) {
// Do not visit already visited nodes
if (path.contains(child)) {
continue;
} else if (visited.get(child.id()) &&
taintList == visited.get(child.id())['taint'] &&
sanitizedList == visited.get(child.id())['sanitized']) {
continue;
}
result = __cfgWalkWithAliasAnalysis(child, taintList.clone(), sanitizedList.clone(), sanitizes, terminationCriteria, path + [curNode], visited);
if (result != []) {
return result;
}
}
return [];
}
cfgWalkWithAliasAnalysis = { startNode, taintedVariable ->
__cfgWalkWithAliasAnalysis(startNode, [ taintedVariable ] as Set, [] as Set,
sanitizes, terminationCriteria, [], [:])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment