mblackman/compare-ipa.sh

## compare-ipa.sh
#!/bin/sh

set -e

# --- Input Parameters ---
IPA1=$1
IPA2=$2

# --- Constants ---
THIS_DIR=$(cd $(dirname $0); pwd)
TMP=${THIS_DIR}/.tmp
OUTPUT_DIR=${THIS_DIR}/output
OUTPUT_FILE=${OUTPUT_DIR}/output.txt
COMMON_FILES=${OUTPUT_DIR}/common_files.txt
DIR1=${TMP}/ipa1_$(basename "$IPA1")
DIR2=${TMP}/ipa2_$(basename "$IPA2")
PATH1=${DIR1}/Payload/
PATH2=${DIR2}/Payload/
FILE_LIST_1=filelist1.txt # For some reason the find command isn't outputting these files to the output dir.
FILE_LIST_2=filelist2.txt # These are placed in the root to fix that.
IPA_UNIQUE_1=${OUTPUT_DIR}/filelist1.txt
IPA_UNIQUE_2=${OUTPUT_DIR}/filelist2.txt
TEXT_DIR="${OUTPUT_DIR}/text_matches"
BINARY_DIR="${OUTPUT_DIR}/binary_matches"

# --- Helper Functions ---
usage() {
  echo "Usage: $0 IPA1 IPA2"
  exit 1
}

# --- Input Validation ---
if [ -z "$IPA1" ] || [ -z "$IPA2" ]; then
  usage
fi

if ! [ -e "$IPA1" ]; then
  echo "ipa file '$IPA1' is not found!"
  usage
fi

if ! [ -e "$IPA2" ]; then
  echo "ipa file '$IPA2' is not found!"
  usage
fi

# --- Setup ---
mkdir -p "$TMP"
rm -rf "$OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR"
mkdir -p "$TEXT_DIR" "$BINARY_DIR"

rm -rf "$DIR1" "$DIR2"
unzip -q "$IPA1" -d "$DIR1" || { echo "Error extracting $IPA1"; exit 1; }
unzip -q "$IPA2" -d "$DIR2" || { echo "Error extracting $IPA2"; exit 1; }

# --- Generate Output ---
> "$OUTPUT_FILE"

echo "IPA Comparison Results:" >> "$OUTPUT_FILE"
echo "Date: $(date)" >> "$OUTPUT_FILE"
echo "IPA1: $IPA1" >> "$OUTPUT_FILE"
echo "IPA2: $IPA2" >> "$OUTPUT_FILE"
echo "" >> "$OUTPUT_FILE"

# --- File Comparison and Statistics ---
find "$PATH1"/*.app -type f | sed "s|^$PATH1/[^/]*/||" > "$FILE_LIST_1"
find "$PATH2"/*.app -type f | sed "s|^$PATH2/[^/]*/||" > "$FILE_LIST_2"

# Find common files
comm -12 "$FILE_LIST_1" "$FILE_LIST_2" > "$COMMON_FILES"
num_common_files=$(wc -l < "$COMMON_FILES")

# Find files unique to each IPA
comm -23 "$FILE_LIST_1" "$FILE_LIST_2" > "$IPA_UNIQUE_1"
comm -13 "$FILE_LIST_1" "$FILE_LIST_2" > "$IPA_UNIQUE_2"
num_ipa1_unique=$(wc -l < "$IPA_UNIQUE_1")
num_ipa2_unique=$(wc -l < "$IPA_UNIQUE_2")

# Total file counts (approximate if files in multiple .app directories)
num_files_ipa1=$(wc -l < "$FILE_LIST_1")
num_files_ipa2=$(wc -l < "$FILE_LIST_2")

# Rough percentage calculations
percentage_common=$(echo "scale=2; $num_common_files / ($num_files_ipa1 + $num_files_ipa2) * 100" | bc)
percentage_ipa1_unique=$(echo "scale=2; $num_ipa1_unique / $num_files_ipa1 * 100" | bc)
percentage_ipa2_unique=$(echo "scale=2; $num_ipa2_unique / $num_files_ipa2 * 100" | bc)

# Add findings to output
echo "---- File Overlap Statistics ----" >> "$OUTPUT_FILE"
echo "Files common to both IPAs: $num_common_files ($percentage_common%)" >> "$OUTPUT_FILE"
echo "Files unique to $IPA1: $num_ipa1_unique ($percentage_ipa1_unique%)" >> "$OUTPUT_FILE"
echo "Files unique to $IPA2: $num_ipa2_unique ($percentage_ipa2_unique%)" >> "$OUTPUT_FILE"

# Clean files by moving output to output directory
mv "$FILE_LIST_1" "${OUTPUT_DIR}/$FILE_LIST_1"
mv "$FILE_LIST_2" "${OUTPUT_DIR}/$FILE_LIST_2"

# --- Detailed Content Comparison ---
APP_PATH1=$(find "$PATH1" -name "*.app" -type d | head -n1)
APP_PATH2=$(find "$PATH2" -name "*.app" -type d | head -n1)

# Ensure the paths were found
if [ -z "$APP_PATH1" ] || [ -z "$APP_PATH2" ]; then
    echo "Error: .app directory not found." >> "$OUTPUT_FILE"
    exit 1
fi

echo "---- Plaintext file matches ----" >> "$TEXT_DIR/output.txt"
echo "---- Binary hash matches ----" >> "$BINARY_DIR/output.txt"

# Find line by line matches in plaintext files and compare hashes of binary files.
while IFS= read -r file; do
   if [[ "$file" == *.txt || "$file" == *.plist || "$file" == *.xml ]]; then
       # Check for any line-by-line similarities
       if awk 'NR==FNR{a[$0];next} ($0 in a)' "$APP_PATH1/$file" "$APP_PATH2/$file"; then
           echo "$file" >> "$TEXT_DIR/output.txt"
           awk 'NR==FNR{a[$0];next} ($0 in a)' "$APP_PATH1/$file" "$APP_PATH2/$file" >> "${TEXT_DIR}/shared_content_${file//\//_}"
       fi
   else
       # Compare hashes for potential binary files
       hash1=$(md5sum "$APP_PATH1/$file" | awk '{ print $1 }')
       hash2=$(md5sum "$APP_PATH2/$file" | awk '{ print $1 }')
       if [ "$hash1" == "$hash2" ]; then
           echo "$file" >> "$BINARY_DIR/output.txt"
       fi
   fi
done < "${COMMON_FILES}"
	#!/bin/sh

	set -e

	# --- Input Parameters ---
	IPA1=$1
	IPA2=$2

	# --- Constants ---
	THIS_DIR=$(cd $(dirname $0); pwd)
	TMP=${THIS_DIR}/.tmp
	OUTPUT_DIR=${THIS_DIR}/output
	OUTPUT_FILE=${OUTPUT_DIR}/output.txt
	COMMON_FILES=${OUTPUT_DIR}/common_files.txt
	DIR1=${TMP}/ipa1_$(basename "$IPA1")
	DIR2=${TMP}/ipa2_$(basename "$IPA2")
	PATH1=${DIR1}/Payload/
	PATH2=${DIR2}/Payload/
	FILE_LIST_1=filelist1.txt # For some reason the find command isn't outputting these files to the output dir.
	FILE_LIST_2=filelist2.txt # These are placed in the root to fix that.
	IPA_UNIQUE_1=${OUTPUT_DIR}/filelist1.txt
	IPA_UNIQUE_2=${OUTPUT_DIR}/filelist2.txt
	TEXT_DIR="${OUTPUT_DIR}/text_matches"
	BINARY_DIR="${OUTPUT_DIR}/binary_matches"

	# --- Helper Functions ---
	usage() {
	echo "Usage: $0 IPA1 IPA2"
	exit 1
	}

	# --- Input Validation ---
	if [ -z "$IPA1" ] \|\| [ -z "$IPA2" ]; then
	usage
	fi

	if ! [ -e "$IPA1" ]; then
	echo "ipa file '$IPA1' is not found!"
	usage
	fi

	if ! [ -e "$IPA2" ]; then
	echo "ipa file '$IPA2' is not found!"
	usage
	fi

	# --- Setup ---
	mkdir -p "$TMP"
	rm -rf "$OUTPUT_DIR"
	mkdir -p "$OUTPUT_DIR"
	mkdir -p "$TEXT_DIR" "$BINARY_DIR"

	rm -rf "$DIR1" "$DIR2"
	unzip -q "$IPA1" -d "$DIR1" \|\| { echo "Error extracting $IPA1"; exit 1; }
	unzip -q "$IPA2" -d "$DIR2" \|\| { echo "Error extracting $IPA2"; exit 1; }

	# --- Generate Output ---
	> "$OUTPUT_FILE"

	echo "IPA Comparison Results:" >> "$OUTPUT_FILE"
	echo "Date: $(date)" >> "$OUTPUT_FILE"
	echo "IPA1: $IPA1" >> "$OUTPUT_FILE"
	echo "IPA2: $IPA2" >> "$OUTPUT_FILE"
	echo "" >> "$OUTPUT_FILE"

	# --- File Comparison and Statistics ---
	find "$PATH1"/.app -type f \| sed "s\|^$PATH1/[^/]/\|\|" > "$FILE_LIST_1"
	find "$PATH2"/.app -type f \| sed "s\|^$PATH2/[^/]/\|\|" > "$FILE_LIST_2"

	# Find common files
	comm -12 "$FILE_LIST_1" "$FILE_LIST_2" > "$COMMON_FILES"
	num_common_files=$(wc -l < "$COMMON_FILES")

	# Find files unique to each IPA
	comm -23 "$FILE_LIST_1" "$FILE_LIST_2" > "$IPA_UNIQUE_1"
	comm -13 "$FILE_LIST_1" "$FILE_LIST_2" > "$IPA_UNIQUE_2"
	num_ipa1_unique=$(wc -l < "$IPA_UNIQUE_1")
	num_ipa2_unique=$(wc -l < "$IPA_UNIQUE_2")

	# Total file counts (approximate if files in multiple .app directories)
	num_files_ipa1=$(wc -l < "$FILE_LIST_1")
	num_files_ipa2=$(wc -l < "$FILE_LIST_2")

	# Rough percentage calculations
	percentage_common=$(echo "scale=2; $num_common_files / ($num_files_ipa1 + $num_files_ipa2) * 100" \| bc)
	percentage_ipa1_unique=$(echo "scale=2; $num_ipa1_unique / $num_files_ipa1 * 100" \| bc)
	percentage_ipa2_unique=$(echo "scale=2; $num_ipa2_unique / $num_files_ipa2 * 100" \| bc)

	# Add findings to output
	echo "---- File Overlap Statistics ----" >> "$OUTPUT_FILE"
	echo "Files common to both IPAs: $num_common_files ($percentage_common%)" >> "$OUTPUT_FILE"
	echo "Files unique to $IPA1: $num_ipa1_unique ($percentage_ipa1_unique%)" >> "$OUTPUT_FILE"
	echo "Files unique to $IPA2: $num_ipa2_unique ($percentage_ipa2_unique%)" >> "$OUTPUT_FILE"

	# Clean files by moving output to output directory
	mv "$FILE_LIST_1" "${OUTPUT_DIR}/$FILE_LIST_1"
	mv "$FILE_LIST_2" "${OUTPUT_DIR}/$FILE_LIST_2"

	# --- Detailed Content Comparison ---
	APP_PATH1=$(find "$PATH1" -name "*.app" -type d \| head -n1)
	APP_PATH2=$(find "$PATH2" -name "*.app" -type d \| head -n1)

	# Ensure the paths were found
	if [ -z "$APP_PATH1" ] \|\| [ -z "$APP_PATH2" ]; then
	echo "Error: .app directory not found." >> "$OUTPUT_FILE"
	exit 1
	fi

	echo "---- Plaintext file matches ----" >> "$TEXT_DIR/output.txt"
	echo "---- Binary hash matches ----" >> "$BINARY_DIR/output.txt"

	# Find line by line matches in plaintext files and compare hashes of binary files.
	while IFS= read -r file; do
	if [[ "$file" == .txt \|\| "$file" == .plist \|\| "$file" == *.xml ]]; then
	# Check for any line-by-line similarities
	if awk 'NR==FNR{a[$0];next} ($0 in a)' "$APP_PATH1/$file" "$APP_PATH2/$file"; then
	echo "$file" >> "$TEXT_DIR/output.txt"
	awk 'NR==FNR{a[$0];next} ($0 in a)' "$APP_PATH1/$file" "$APP_PATH2/$file" >> "${TEXT_DIR}/shared_content_${file//\//_}"
	fi
	else
	# Compare hashes for potential binary files
	hash1=$(md5sum "$APP_PATH1/$file" \| awk '{ print $1 }')
	hash2=$(md5sum "$APP_PATH2/$file" \| awk '{ print $1 }')
	if [ "$hash1" == "$hash2" ]; then
	echo "$file" >> "$BINARY_DIR/output.txt"
	fi
	fi
	done < "${COMMON_FILES}"