Created
November 13, 2016 22:31
-
-
Save vlad902/b0c6b1133a78fc4e8ddd76f4abfc034c to your computer and use it in GitHub Desktop.
Octopus CFG walking code with very crude alias and taint analysis.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Simplify the argument, e.g. '( struct foo* ) & bar' to 'bar' | |
strip = { traversal -> | |
if (traversal.clone().values('type')[0] == 'CastExpression') { | |
traversal = traversal.ithChildren('1') | |
} | |
traversal.values('code')[0].replace("& ", "").replace("* ", "") | |
} | |
// { 'aliasing_function': [ 'src_arg', 'dst_arg' ], ... } | |
ALIASING_FUNCTIONS = [ | |
'memcpy': [ '0', '1' ], | |
'bcopy': [ '1', '0'], | |
'copyin': [ '1', '0' ] | |
] | |
ALIASING_FUNCTIONS_KEYS = ALIASING_FUNCTIONS.keySet().toList() | |
// { 'overwriting_function': 'overwritten_arg', ... } | |
OVERWRITING_FUNCTIONS = [ | |
'bzero': '0', | |
'memset': '0' | |
] | |
OVERWRITING_FUNCTIONS_KEYS = OVERWRITING_FUNCTIONS.keySet().toList() | |
// Very crude alias analysis, returns a map of the form: | |
// { "overwritten_variable": "variables_it_was_overwritten_with (if any)" } | |
aliasAnalysis = { curNode -> | |
retval = [:]; | |
g.V(curNode) | |
.astNodes().has('type', 'Callee') | |
.has('code', P.within(*OVERWRITING_FUNCTIONS_KEYS)) | |
.sideEffect { childNum = OVERWRITING_FUNCTIONS[it.get().value('code')] } | |
.in(AST_EDGE).sideEffect { | |
var = g.V(it.get()).ithArguments(childNum); | |
retval.put(strip(var), '') | |
}.iterate(); | |
g.V(curNode) | |
.astNodes().has('type', 'Callee') | |
.has('code', P.within(*ALIASING_FUNCTIONS_KEYS)) | |
.sideEffect { childNums = ALIASING_FUNCTIONS[it.get().value('code')] } | |
.in(AST_EDGE).sideEffect { | |
from = g.V(it.get()).ithArguments(childNums[0]); | |
to = g.V(it.get()).ithArguments(childNums[1]); | |
retval.put(strip(from), strip(to)) | |
}.iterate(); | |
// Find new assignments involving variables of interest | |
// TODO: this is going to have tons of FPs from call expressions, filter them out, | |
// create a verison of astNodes() that allows filtering as you go down | |
g.V(curNode) | |
.astNodes() | |
.has('type', 'AssignmentExpression').has('operator', '=') | |
.sideEffect { | |
from = g.V(it.get()).ithChildren('0'); | |
to = g.V(it.get()).ithChildren('1'); | |
retval.put(strip(from), strip(to)) | |
}.iterate(); | |
retval | |
} | |
TERMINATION_FUNCTION_MAP = [ | |
'malloc': '0', | |
'realloc': '1' | |
] | |
TERMINATION_FUNCTION_NAMES = TERMINATION_FUNCTION_MAP.keySet().toList() | |
terminationCriteria = { curNode, taintList, sanitizedList, path -> | |
if (path.size() > 30) { | |
println("Path too long " + path.toString()); | |
return []; // Terminate search (unsuccessfully) | |
} | |
interesting = g.V(curNode) | |
.astNodes().has('type', 'Callee') | |
.or( | |
__.has('code', 'malloc').in(AST_EDGE).ithArguments('0') | |
.not(has('code', P.within(*sanitizedList.toList()))) | |
.astNodes().has('type', 'MultiplicativeExpression') | |
.astNodes().has('code', P.within(*taintList.toList())), | |
__.has('code', 'copyout').in(AST_EDGE).ithArguments('2') | |
.not(has('code', P.within(*sanitizedList.toList()))) | |
.astNodes().has('type', 'MultiplicativeExpression') | |
.astNodes().has('code', P.within(*taintList.toList())), | |
__.has('code', 'realloc').in(AST_EDGE).ithArguments('1') | |
.not(has('code', P.within(*sanitizedList.toList()))) | |
.astNodes().has('code', P.within(*taintList.toList())) | |
)[0] | |
if (interesting == null) { | |
return null; // Continue search | |
} | |
return path; // Terminate search (successfully!) | |
} | |
sanitizes = { curNode -> | |
if (curNode.value('type') != 'Condition') { | |
return []; | |
} | |
return g.V(curNode) | |
.astNodes().has('type', 'RelationalExpression').has('operator', P.within('>', '>=')) | |
.ithChildren('0') | |
.has('type', P.within('Identifier', 'MemberAccess', 'PtrMemberAccess')) | |
.values('code').toList() | |
} | |
/* | |
* @param visited Map from visited node IDs to the taint, sanitized lists when those | |
* nodes were visited to avoid searching if we've already searched down | |
* that path. | |
*/ | |
__cfgWalkWithAliasAnalysis = { curNode, taintList, sanitizedList, sanitizes, | |
terminationCriteria, path, visited -> | |
// Expand sanitized, taint lists | |
for (aliased in aliasAnalysis(curNode)) { | |
if (sanitizedList.contains(aliased.value)) { | |
sanitizedList.add(aliased.key); | |
} else if (taintList.contains(aliased.value) || taintList.contains(aliased.value.split(" (->|\\.|\\[)")[0])) { | |
taintList.add(aliased.key); | |
} else { | |
sanitizedList.remove(aliased.key); | |
taintList.remove(aliased.key); | |
} | |
} | |
sanitizedList.addAll(sanitizes(curNode)); | |
taintList.removeAll(sanitizedList); | |
println(String.format("At node %d with taint/sanitized lists %s/%s.", | |
curNode.id(), taintList.toString(), sanitizedList.toString())); | |
// Fail if no more tainted variables are being tracked. | |
if (taintList.size() == 0) { | |
println("Taint list empty " + curNode.toString()); | |
return []; | |
} | |
// Check terminationCriteria | |
// terminationCriteria returns null to continue, or a list otherwise | |
// (this list is either [] or path, indicating failure or success) | |
termination = terminationCriteria(curNode, taintList, sanitizedList, path); | |
if (termination != null) { | |
return termination; | |
} | |
visited.put(curNode.id(), [sanitized:sanitizedList, taint:taintList]); | |
// Sort children so that we search those with the most incoming CFG edges | |
// first. This is an optimization for 'visited' (TODO: explain the | |
// rationale about ordering and avoiding long paths for visited.) | |
children = g.V(curNode).out(CFG_EDGE); | |
try { | |
children = children.order().by(__.in(CFG_EDGE).count(), Order.decr).toList() | |
} catch(UnsupportedOperationException e) { | |
// Workaround for https://issues.apache.org/jira/browse/TINKERPOP-1216 | |
// due to old Tinkerpop3 version w/ octopus :( | |
} | |
for (child in children) { | |
// Do not visit already visited nodes | |
if (path.contains(child)) { | |
continue; | |
} else if (visited.get(child.id()) && | |
taintList == visited.get(child.id())['taint'] && | |
sanitizedList == visited.get(child.id())['sanitized']) { | |
continue; | |
} | |
result = __cfgWalkWithAliasAnalysis(child, taintList.clone(), sanitizedList.clone(), sanitizes, terminationCriteria, path + [curNode], visited); | |
if (result != []) { | |
return result; | |
} | |
} | |
return []; | |
} | |
cfgWalkWithAliasAnalysis = { startNode, taintedVariable -> | |
__cfgWalkWithAliasAnalysis(startNode, [ taintedVariable ] as Set, [] as Set, | |
sanitizes, terminationCriteria, [], [:]) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment