Skip to content

Instantly share code, notes, and snippets.

@kmark
Created August 21, 2015 15:50
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmark/a01c1463242e435f6cb5 to your computer and use it in GitHub Desktop.
Save kmark/a01c1463242e435f6cb5 to your computer and use it in GitHub Desktop.
ProFinder - The ProGuard obfuscation tracker.
#!/usr/bin/env php
<?php
/*******************************************************************************************************************
* ProFinder v1.0 *
* http://forum.xda-developers.com/android/software/profinder-proguard-obfuscation-tracker-t3183647 *
*******************************************************************************************************************
* Copyright 2015 Kevin Mark *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); *
* you may not use this file except in compliance with the License. *
* You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
*******************************************************************************************************************
* REQUIRED ARGUMENTS: *
* -c, --classes A comma-separated list of old classes to find in the new directory *
* -o, --old Path of the old version's smali directory *
* -n, --new Path of the new version's smali directory *
* *
* OPTIONS: *
* -l, --sig-len The minimum number of characters a particular signature string can be (default: 3) *
* -m, --sig-match The minimum pct of matches for a successful signature class match (default: 0.70) *
* -r, --sig-occr The minimum number of signature strings an old class must have (default: 3) *
* -t, --tolerance Only apply the diff method on new classes +/- this number of lines (default: 150) *
* *
* FLAGS: *
* -d, --always-try-diff Use the diff method even if signature matching was successful (default: off) *
* -v, --verbose Enable additional descriptive log output (default: off) *
*******************************************************************************************************************/
// Get the options and place them into their respective constants
$opts = getopt('c:dl:m:n:o:r:t:v', [ 'classes:', 'always-try-diff', 'sig-len:', 'sig-match:', 'new:', 'old:', 'sig-occr:', 'tolerance:', 'verbose' ]);
define('VERBOSE', array_key_exists('v', $opts) || array_key_exists('verbose', $opts));
println('ProFinder v1.0');
verbose(' with PHP %s', PHP_VERSION);
println('-------------');
$oldClasses = explode(',',
getSingleOpt($opts, 'c', 'classes', 'Specify a comma separated class list with -c or --classes'));
define('OLD_PATH', getSingleOpt($opts, 'o', 'old', 'Specify an old smali directory with -o or --old'));
define('NEW_PATH', getSingleOpt($opts, 'n', 'new', 'Specify a new smali directory with -n or --new'));
define('LINE_TOLERANCE', (int)getSingleOpt($opts, 't', 'tolerance', false, 150));
define('SIGNATURE_STR_MIN_LENGTH', (int)getSingleOpt($opts, 'l', 'sig-len', false, 3));
define('SIGNATURE_STR_MIN_OCCURRENCE', (int)getSingleOpt($opts, 'r', 'sig-occr', false, 3));
define('SIGNATURE_STR_MATCH_THRESHOLD', (double)getSingleOpt($opts, 'm', 'sig-match', false, 0.70));
define('ALWAYS_TRY_DIFF', array_key_exists('d', $opts) || array_key_exists('always-try-diff', $opts));
// Check the config constants for proper values or exit
sanityCheck();
// Get the smali statistics for both our old and new smali directories
verbose('Calculating old smali statistics...');
$oldSmaliStats = getSmaliStats(OLD_PATH);
verbose('Calculating new smali statistics...');
$newSmaliStats = getSmaliStats(NEW_PATH);
verbose('%d old classes, %d new classes', count($oldSmaliStats), count($newSmaliStats));
// Iterate over each old class we need to find in the new smali directory
foreach($oldClasses as $class) {
verbose('-------------');
verbose('Finding potential matches for %s...', $class);
// Create the fully qualified old class file path from just the name
$classPath = OLD_PATH . DIRECTORY_SEPARATOR . $class . '.smali';
// A shortcut to the statistics for the old class
$target = $oldSmaliStats[$classPath];
// Arrays to store the matches for both methods
$sigMatches = [];
$diffMatches = [];
// Always try the signature string method first. It's much faster than the diff method.
$totalSigStrings = count($target[1]);
verbose('This class has %d signature strings.', $totalSigStrings);
// Only attempt the signature string method if we have enough signatures worth comparing
if($totalSigStrings >= SIGNATURE_STR_MIN_OCCURRENCE) {
verbose('Finding classes with matching signature strings...');
$sigMatches = getSigMatches($target[1], $newSmaliStats);
$totalSigMatches = count($sigMatches);
// Should never result in a divide-by-zero since sanityCheck confirms SIGNATURE_STR_MIN_OCCURRENCE > 0
$sigMatchPct = $sigMatches[0][1] / $totalSigStrings;
verbose('Found %d classes with at least one common signature.', $totalSigMatches);
// Have we found enough common strings to be considered a match?
if($sigMatchPct >= SIGNATURE_STR_MATCH_THRESHOLD) {
println('%s -> %s', $class, spliceClassPath($sigMatches[0][0]));
verbose(' - %.2f%% signature match', $sigMatchPct * 100.00);
if($totalSigMatches > 1) {
// List the second best match since it might be useful if our best match is incorrect
verbose(' - Next best match is %s with a %.2f%% signature match.', spliceClassPath($sigMatches[1][0]), $sigMatches[1][1] / $totalSigStrings * 100.00);
}
// Successful match. If ALWAYS_TRY_DIFF is false we will move onto the next old class
if(!ALWAYS_TRY_DIFF) {
continue;
}
} elseif($totalSigMatches > 0) {
// No new files meet the match threshold. Output the closest match anyway assuming one exists.
verbose('Closest match is %s with %d intersecting signatures.', spliceClassPath($sigMatches[0][0]), $sigMatches[0][1]);
}
}
// If we've made it this far either ALWAYS_TRY_DIFF is true or we couldn't find a good signature match
// Apply the diff | wc -l method
verbose('Finding syntactically similar classes...');
$diffMatches = getDiffMatches($classPath, $target[0], $newSmaliStats);
$totalDiffMatches = count($diffMatches);
verbose('%d classes fall within the line tolerance.', $totalDiffMatches);
if($totalDiffMatches > 0) {
println('%s -> %s', $class, spliceClassPath($diffMatches[0][0]));
verbose(' - %d differences.', $diffMatches[0][1]);
if (count($diffMatches) > 1) {
verbose(' - Next best match is %s with %d differences.', spliceClassPath($diffMatches[1][0]), $diffMatches[1][1]);
}
} else {
// ¯\_(ツ)_/¯
println('No matching class could be located.');
}
}
// Get a (desc) sorted list of intersecting signature string matches for a given list of signature strings
function getSigMatches($targetSigs, $newSmaliStats) {
$sigMatches = [];
foreach($newSmaliStats as $path => $stats) {
// Find the number of common signature strings between the old and the new
$intersections = count(array_intersect($targetSigs, $stats[1]));
if($intersections > 0) {
$sigMatches[] = [$path, $intersections];
}
}
if(count($sigMatches) > 0) {
// Sort the matches in descending order so the first index will be the best match (most shared signatures)
usort($sigMatches, function($a, $b) {
if($a[1] == $b[1]) {
return 0;
}
return ($a[1] > $b[1]) ? -1 : 1;
});
}
return $sigMatches;
}
// Get a (asc) sorted list of classes that are within the line tolerance of the input along with the diff lines
function getDiffMatches($targetPath, $targetLines, $newSmaliStats) {
$diffMatches = [];
foreach($newSmaliStats as $path => $stats) {
// Ignore this file if it is more than LINE_TOLERANCE larger or less than LINE_TOLERANCE smaller than the old
if($stats[0] > $targetLines + LINE_TOLERANCE || $stats[0] < $targetLines - LINE_TOLERANCE) {
continue;
}
$diffMatches[] = [
$path,
// The number of line-by-line differences between the old smali and the new smali
(int)trim(shell_exec('diff -y --suppress-common-lines ' . escapeshellarg($targetPath) . ' ' . escapeshellarg($path) . ' | wc -l'))
];
}
if(count($diffMatches) > 0) {
// Sort the matches in ascending order so the first index will be the best match (fewest diffs)
usort($diffMatches, function($a, $b) {
if($a[1] == $b[1]) {
return 0;
}
return ($a[1] < $b[1]) ? -1 : 1;
});
}
return $diffMatches;
}
// Recursively calculate the statistics needed to perform the class matching analysis
function getSmaliStats($path) {
$stats = [];
$dir = new RecursiveDirectoryIterator($path);
// Recursively loop over every file
foreach(new RecursiveIteratorIterator($dir) as $file) {
/** @var SplFileInfo $file */
// Not .smali? Not interested.
if($file->getExtension() !== 'smali') {
continue;
}
// Open the file and read all of it to memory
$handle = $file->openFile();
$contents = $handle->fread($handle->getSize());
// Close file
$handle = null;
// Use the pathname as the index since it should be unique and conveniently contains the class name
$stats[$file->getPathname()] = [
// [0] = number of lines in the file
substr_count($contents, "\n"),
// [1] = array of signature strings found in the file
getSigStrings($contents)
];
}
return $stats;
}
// Use a regular expression to get an array of signature strings from the given smali contents
function getSigStrings($contents) {
if(preg_match_all('/^\\s*const-string .+, "(.{'.SIGNATURE_STR_MIN_LENGTH.',})"$/m', $contents, $matches)) {
return $matches[1];
}
return [];
}
// muh convenience method
function println($format) {
$args = func_get_args();
$args[0] .= "\n";
call_user_func_array('printf', $args);
}
// Output only if verbose mode is enabled
function verbose($format) {
if(VERBOSE) {
call_user_func_array('println', func_get_args());
}
}
// Extract a single option and return it, exit or return a given default value if the option is unavailable
function getSingleOpt($options, $short, $long, $die = false, $default = null) {
if(array_key_exists($short, $options)) {
return $options[$short];
}
if(array_key_exists($long, $options)) {
return $options[$long];
}
if($die !== false) {
println($die);
exit(1);
}
return $default;
}
// Splice out the fully qualified class name from the smali file path
function spliceClassPath($class) {
return substr($class, strlen(NEW_PATH) + 1, -6);
}
// Check our runtime configuration constants and exit if they're invalid
function sanityCheck() {
if(!is_dir(OLD_PATH)) {
println('The old smali path is not a directory.');
}
elseif(!is_dir(NEW_PATH)) {
println('The new smali path is not a directory.');
}
elseif(LINE_TOLERANCE < 0) {
println('The line tolerance must be a non-negative whole number.');
}
elseif(SIGNATURE_STR_MIN_LENGTH < 1) {
println('The minimum signature string length must be greater than zero.');
}
elseif(SIGNATURE_STR_MIN_OCCURRENCE < 1) {
println('The minimum signature occurrence must be greater than zero.');
}
elseif(SIGNATURE_STR_MATCH_THRESHOLD > 1 || SIGNATURE_STR_MATCH_THRESHOLD < 0) {
println('The minimum signature match threshold must be from 0 (0%%) to 1 (100%%) in decimal form.');
}
else {
return;
}
exit(1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment