Skip to content

Instantly share code, notes, and snippets.

@mblackman
Created March 4, 2024 19:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mblackman/2f95a93466f31c1e20e5e9491766b2e0 to your computer and use it in GitHub Desktop.
Save mblackman/2f95a93466f31c1e20e5e9491766b2e0 to your computer and use it in GitHub Desktop.
Compare IPA File Contents
#!/bin/sh
set -e
# --- Input Parameters ---
IPA1=$1
IPA2=$2
# --- Constants ---
THIS_DIR=$(cd $(dirname $0); pwd)
TMP=${THIS_DIR}/.tmp
OUTPUT_DIR=${THIS_DIR}/output
OUTPUT_FILE=${OUTPUT_DIR}/output.txt
COMMON_FILES=${OUTPUT_DIR}/common_files.txt
DIR1=${TMP}/ipa1_$(basename "$IPA1")
DIR2=${TMP}/ipa2_$(basename "$IPA2")
PATH1=${DIR1}/Payload/
PATH2=${DIR2}/Payload/
FILE_LIST_1=filelist1.txt # For some reason the find command isn't outputting these files to the output dir.
FILE_LIST_2=filelist2.txt # These are placed in the root to fix that.
IPA_UNIQUE_1=${OUTPUT_DIR}/filelist1.txt
IPA_UNIQUE_2=${OUTPUT_DIR}/filelist2.txt
TEXT_DIR="${OUTPUT_DIR}/text_matches"
BINARY_DIR="${OUTPUT_DIR}/binary_matches"
# --- Helper Functions ---
usage() {
echo "Usage: $0 IPA1 IPA2"
exit 1
}
# --- Input Validation ---
if [ -z "$IPA1" ] || [ -z "$IPA2" ]; then
usage
fi
if ! [ -e "$IPA1" ]; then
echo "ipa file '$IPA1' is not found!"
usage
fi
if ! [ -e "$IPA2" ]; then
echo "ipa file '$IPA2' is not found!"
usage
fi
# --- Setup ---
mkdir -p "$TMP"
rm -rf "$OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR"
mkdir -p "$TEXT_DIR" "$BINARY_DIR"
rm -rf "$DIR1" "$DIR2"
unzip -q "$IPA1" -d "$DIR1" || { echo "Error extracting $IPA1"; exit 1; }
unzip -q "$IPA2" -d "$DIR2" || { echo "Error extracting $IPA2"; exit 1; }
# --- Generate Output ---
> "$OUTPUT_FILE"
echo "IPA Comparison Results:" >> "$OUTPUT_FILE"
echo "Date: $(date)" >> "$OUTPUT_FILE"
echo "IPA1: $IPA1" >> "$OUTPUT_FILE"
echo "IPA2: $IPA2" >> "$OUTPUT_FILE"
echo "" >> "$OUTPUT_FILE"
# --- File Comparison and Statistics ---
find "$PATH1"/*.app -type f | sed "s|^$PATH1/[^/]*/||" > "$FILE_LIST_1"
find "$PATH2"/*.app -type f | sed "s|^$PATH2/[^/]*/||" > "$FILE_LIST_2"
# Find common files
comm -12 "$FILE_LIST_1" "$FILE_LIST_2" > "$COMMON_FILES"
num_common_files=$(wc -l < "$COMMON_FILES")
# Find files unique to each IPA
comm -23 "$FILE_LIST_1" "$FILE_LIST_2" > "$IPA_UNIQUE_1"
comm -13 "$FILE_LIST_1" "$FILE_LIST_2" > "$IPA_UNIQUE_2"
num_ipa1_unique=$(wc -l < "$IPA_UNIQUE_1")
num_ipa2_unique=$(wc -l < "$IPA_UNIQUE_2")
# Total file counts (approximate if files in multiple .app directories)
num_files_ipa1=$(wc -l < "$FILE_LIST_1")
num_files_ipa2=$(wc -l < "$FILE_LIST_2")
# Rough percentage calculations
percentage_common=$(echo "scale=2; $num_common_files / ($num_files_ipa1 + $num_files_ipa2) * 100" | bc)
percentage_ipa1_unique=$(echo "scale=2; $num_ipa1_unique / $num_files_ipa1 * 100" | bc)
percentage_ipa2_unique=$(echo "scale=2; $num_ipa2_unique / $num_files_ipa2 * 100" | bc)
# Add findings to output
echo "---- File Overlap Statistics ----" >> "$OUTPUT_FILE"
echo "Files common to both IPAs: $num_common_files ($percentage_common%)" >> "$OUTPUT_FILE"
echo "Files unique to $IPA1: $num_ipa1_unique ($percentage_ipa1_unique%)" >> "$OUTPUT_FILE"
echo "Files unique to $IPA2: $num_ipa2_unique ($percentage_ipa2_unique%)" >> "$OUTPUT_FILE"
# Clean files by moving output to output directory
mv "$FILE_LIST_1" "${OUTPUT_DIR}/$FILE_LIST_1"
mv "$FILE_LIST_2" "${OUTPUT_DIR}/$FILE_LIST_2"
# --- Detailed Content Comparison ---
APP_PATH1=$(find "$PATH1" -name "*.app" -type d | head -n1)
APP_PATH2=$(find "$PATH2" -name "*.app" -type d | head -n1)
# Ensure the paths were found
if [ -z "$APP_PATH1" ] || [ -z "$APP_PATH2" ]; then
echo "Error: .app directory not found." >> "$OUTPUT_FILE"
exit 1
fi
echo "---- Plaintext file matches ----" >> "$TEXT_DIR/output.txt"
echo "---- Binary hash matches ----" >> "$BINARY_DIR/output.txt"
# Find line by line matches in plaintext files and compare hashes of binary files.
while IFS= read -r file; do
if [[ "$file" == *.txt || "$file" == *.plist || "$file" == *.xml ]]; then
# Check for any line-by-line similarities
if awk 'NR==FNR{a[$0];next} ($0 in a)' "$APP_PATH1/$file" "$APP_PATH2/$file"; then
echo "$file" >> "$TEXT_DIR/output.txt"
awk 'NR==FNR{a[$0];next} ($0 in a)' "$APP_PATH1/$file" "$APP_PATH2/$file" >> "${TEXT_DIR}/shared_content_${file//\//_}"
fi
else
# Compare hashes for potential binary files
hash1=$(md5sum "$APP_PATH1/$file" | awk '{ print $1 }')
hash2=$(md5sum "$APP_PATH2/$file" | awk '{ print $1 }')
if [ "$hash1" == "$hash2" ]; then
echo "$file" >> "$BINARY_DIR/output.txt"
fi
fi
done < "${COMMON_FILES}"
@mblackman
Copy link
Author

On MacOS, you may need to install the md5 checksum package.

brew install md5sha1sum

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment