Created
March 4, 2024 19:40
-
-
Save mblackman/2f95a93466f31c1e20e5e9491766b2e0 to your computer and use it in GitHub Desktop.
Compare IPA File Contents
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -e | |
# --- Input Parameters --- | |
IPA1=$1 | |
IPA2=$2 | |
# --- Constants --- | |
THIS_DIR=$(cd $(dirname $0); pwd) | |
TMP=${THIS_DIR}/.tmp | |
OUTPUT_DIR=${THIS_DIR}/output | |
OUTPUT_FILE=${OUTPUT_DIR}/output.txt | |
COMMON_FILES=${OUTPUT_DIR}/common_files.txt | |
DIR1=${TMP}/ipa1_$(basename "$IPA1") | |
DIR2=${TMP}/ipa2_$(basename "$IPA2") | |
PATH1=${DIR1}/Payload/ | |
PATH2=${DIR2}/Payload/ | |
FILE_LIST_1=filelist1.txt # For some reason the find command isn't outputting these files to the output dir. | |
FILE_LIST_2=filelist2.txt # These are placed in the root to fix that. | |
IPA_UNIQUE_1=${OUTPUT_DIR}/filelist1.txt | |
IPA_UNIQUE_2=${OUTPUT_DIR}/filelist2.txt | |
TEXT_DIR="${OUTPUT_DIR}/text_matches" | |
BINARY_DIR="${OUTPUT_DIR}/binary_matches" | |
# --- Helper Functions --- | |
usage() { | |
echo "Usage: $0 IPA1 IPA2" | |
exit 1 | |
} | |
# --- Input Validation --- | |
if [ -z "$IPA1" ] || [ -z "$IPA2" ]; then | |
usage | |
fi | |
if ! [ -e "$IPA1" ]; then | |
echo "ipa file '$IPA1' is not found!" | |
usage | |
fi | |
if ! [ -e "$IPA2" ]; then | |
echo "ipa file '$IPA2' is not found!" | |
usage | |
fi | |
# --- Setup --- | |
mkdir -p "$TMP" | |
rm -rf "$OUTPUT_DIR" | |
mkdir -p "$OUTPUT_DIR" | |
mkdir -p "$TEXT_DIR" "$BINARY_DIR" | |
rm -rf "$DIR1" "$DIR2" | |
unzip -q "$IPA1" -d "$DIR1" || { echo "Error extracting $IPA1"; exit 1; } | |
unzip -q "$IPA2" -d "$DIR2" || { echo "Error extracting $IPA2"; exit 1; } | |
# --- Generate Output --- | |
> "$OUTPUT_FILE" | |
echo "IPA Comparison Results:" >> "$OUTPUT_FILE" | |
echo "Date: $(date)" >> "$OUTPUT_FILE" | |
echo "IPA1: $IPA1" >> "$OUTPUT_FILE" | |
echo "IPA2: $IPA2" >> "$OUTPUT_FILE" | |
echo "" >> "$OUTPUT_FILE" | |
# --- File Comparison and Statistics --- | |
find "$PATH1"/*.app -type f | sed "s|^$PATH1/[^/]*/||" > "$FILE_LIST_1" | |
find "$PATH2"/*.app -type f | sed "s|^$PATH2/[^/]*/||" > "$FILE_LIST_2" | |
# Find common files | |
comm -12 "$FILE_LIST_1" "$FILE_LIST_2" > "$COMMON_FILES" | |
num_common_files=$(wc -l < "$COMMON_FILES") | |
# Find files unique to each IPA | |
comm -23 "$FILE_LIST_1" "$FILE_LIST_2" > "$IPA_UNIQUE_1" | |
comm -13 "$FILE_LIST_1" "$FILE_LIST_2" > "$IPA_UNIQUE_2" | |
num_ipa1_unique=$(wc -l < "$IPA_UNIQUE_1") | |
num_ipa2_unique=$(wc -l < "$IPA_UNIQUE_2") | |
# Total file counts (approximate if files in multiple .app directories) | |
num_files_ipa1=$(wc -l < "$FILE_LIST_1") | |
num_files_ipa2=$(wc -l < "$FILE_LIST_2") | |
# Rough percentage calculations | |
percentage_common=$(echo "scale=2; $num_common_files / ($num_files_ipa1 + $num_files_ipa2) * 100" | bc) | |
percentage_ipa1_unique=$(echo "scale=2; $num_ipa1_unique / $num_files_ipa1 * 100" | bc) | |
percentage_ipa2_unique=$(echo "scale=2; $num_ipa2_unique / $num_files_ipa2 * 100" | bc) | |
# Add findings to output | |
echo "---- File Overlap Statistics ----" >> "$OUTPUT_FILE" | |
echo "Files common to both IPAs: $num_common_files ($percentage_common%)" >> "$OUTPUT_FILE" | |
echo "Files unique to $IPA1: $num_ipa1_unique ($percentage_ipa1_unique%)" >> "$OUTPUT_FILE" | |
echo "Files unique to $IPA2: $num_ipa2_unique ($percentage_ipa2_unique%)" >> "$OUTPUT_FILE" | |
# Clean files by moving output to output directory | |
mv "$FILE_LIST_1" "${OUTPUT_DIR}/$FILE_LIST_1" | |
mv "$FILE_LIST_2" "${OUTPUT_DIR}/$FILE_LIST_2" | |
# --- Detailed Content Comparison --- | |
APP_PATH1=$(find "$PATH1" -name "*.app" -type d | head -n1) | |
APP_PATH2=$(find "$PATH2" -name "*.app" -type d | head -n1) | |
# Ensure the paths were found | |
if [ -z "$APP_PATH1" ] || [ -z "$APP_PATH2" ]; then | |
echo "Error: .app directory not found." >> "$OUTPUT_FILE" | |
exit 1 | |
fi | |
echo "---- Plaintext file matches ----" >> "$TEXT_DIR/output.txt" | |
echo "---- Binary hash matches ----" >> "$BINARY_DIR/output.txt" | |
# Find line by line matches in plaintext files and compare hashes of binary files. | |
while IFS= read -r file; do | |
if [[ "$file" == *.txt || "$file" == *.plist || "$file" == *.xml ]]; then | |
# Check for any line-by-line similarities | |
if awk 'NR==FNR{a[$0];next} ($0 in a)' "$APP_PATH1/$file" "$APP_PATH2/$file"; then | |
echo "$file" >> "$TEXT_DIR/output.txt" | |
awk 'NR==FNR{a[$0];next} ($0 in a)' "$APP_PATH1/$file" "$APP_PATH2/$file" >> "${TEXT_DIR}/shared_content_${file//\//_}" | |
fi | |
else | |
# Compare hashes for potential binary files | |
hash1=$(md5sum "$APP_PATH1/$file" | awk '{ print $1 }') | |
hash2=$(md5sum "$APP_PATH2/$file" | awk '{ print $1 }') | |
if [ "$hash1" == "$hash2" ]; then | |
echo "$file" >> "$BINARY_DIR/output.txt" | |
fi | |
fi | |
done < "${COMMON_FILES}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
On MacOS, you may need to install the md5 checksum package.