Skip to content

Instantly share code, notes, and snippets.

@psarna

psarna/squash.sh Secret

Last active September 5, 2025 08:58
Show Gist options
  • Select an option

  • Save psarna/e3a93715e7e1bb07993f782f26fde578 to your computer and use it in GitHub Desktop.

Select an option

Save psarna/e3a93715e7e1bb07993f782f26fde578 to your computer and use it in GitHub Desktop.
#!/bin/bash
set -euo pipefail
# Usage: ./squash IMAGE START_LAYER END_LAYER OUTPUT_DIR
if [[ $# -ne 4 ]]; then
echo "Usage: $0 IMAGE START_LAYER END_LAYER OUTPUT_DIR"
echo "Example: $0 docker://alpine:latest 1 3 ./output"
exit 1
fi
image="$1"
start_layer="$2"
end_layer="$3"
output_dir="$4"
# Add docker:// prefix if not present
if [[ ! "$image" =~ :// ]]; then
image="docker://$image"
fi
workdir=$(mktemp -d)
trap "rm -rf $workdir" EXIT
echo "Downloading image: $image"
skopeo copy "$image" "oci:$workdir/image:latest"
# Get manifest info
manifest_digest=$(jq -r '.manifests[0].digest' "$workdir/image/index.json" | cut -d: -f2)
manifest_file="$workdir/image/blobs/sha256/$manifest_digest"
total_layers=$(jq '.layers | length' "$manifest_file")
echo "Total layers: $total_layers"
if [[ $start_layer -ge $total_layers || $end_layer -ge $total_layers || $start_layer -gt $end_layer ]]; then
echo "Error: Invalid layer range $start_layer-$end_layer (total: $total_layers)"
exit 1
fi
echo "Squashing layers $start_layer to $end_layer"
# Extract layers to merge
layers_to_merge=()
for ((i=start_layer; i<=end_layer; i++)); do
layer_digest=$(jq -r ".layers[$i].digest" "$manifest_file" | cut -d: -f2)
layer_file="$workdir/image/blobs/sha256/$layer_digest"
layers_to_merge+=("$layer_file")
echo " Layer $i: ${layer_digest:0:12}..."
done
# Merge layers
echo "Merging layers..."
final_dir="$workdir/final"
mkdir -p "$final_dir"
# Extract all layers
for i in "${!layers_to_merge[@]}"; do
layer_dir="$workdir/layer_$i"
mkdir -p "$layer_dir"
# Handle compression
if file "${layers_to_merge[$i]}" | grep -q gzip; then
tar -xzf "${layers_to_merge[$i]}" -C "$layer_dir"
elif file "${layers_to_merge[$i]}" | grep -q "Zstandard"; then
tar --zstd -xf "${layers_to_merge[$i]}" -C "$layer_dir"
else
tar -xf "${layers_to_merge[$i]}" -C "$layer_dir"
fi
done
# Function to check if file exists in previous layers with same size and content
file_exists_in_previous_layers() {
# disable for now
return 1
local rel_path="$1"
local current_file="$2"
local current_layer_index="$3"
# Get current file size
local current_size=$(stat -c%s "$current_file" 2>/dev/null || echo "0")
# Check previous layers in descending order
for ((j=current_layer_index-1; j>=0; j--)); do
local prev_layer_dir="$workdir/layer_$j"
local prev_file="$prev_layer_dir/$rel_path"
# Check for whiteout file that would delete this file
local whiteout_file="$prev_layer_dir/$(dirname "$rel_path")/.wh.$(basename "$rel_path")"
if [[ -f "$whiteout_file" ]]; then
# Found whiteout, file was deleted in this layer, stop looking
return 1
fi
# Check for opaque whiteout in parent directory
local parent_dir="$(dirname "$rel_path")"
while [[ "$parent_dir" != "." && "$parent_dir" != "/" ]]; do
local opaque_file="$prev_layer_dir/$parent_dir/.wh..wh..opq"
if [[ -f "$opaque_file" ]]; then
# Found opaque whiteout, directory was cleared, stop looking
return 1
fi
parent_dir="$(dirname "$parent_dir")"
done
# Check if file exists in this previous layer
if [[ -f "$prev_file" ]]; then
local prev_size=$(stat -c%s "$prev_file" 2>/dev/null || echo "0")
# Compare sizes first (quick check)
if [[ "$current_size" == "$prev_size" ]]; then
# Same size, now compare content
if cmp -s "$current_file" "$prev_file"; then
# Same content, file is identical
return 0
fi
fi
# File exists but different, stop looking (this is the version we should use)
return 1
fi
done
# File not found in any previous layer
return 1
}
# Apply layers in order
for i in "${!layers_to_merge[@]}"; do
layer_dir="$workdir/layer_$i"
# Create directories first
find "$layer_dir" -type d 2>/dev/null | while read -r dir; do
rel_path="${dir#$layer_dir/}"
[[ "$rel_path" == "." ]] && continue
mkdir -p "$final_dir/$rel_path"
done || :
# Process opaque dirs
find "$layer_dir" -name ".wh..wh..opq" 2>/dev/null | while read -r opaque; do
opaque_path="${opaque#$layer_dir/}"
target_dir="$final_dir/$(dirname "$opaque_path")"
# Clear directory and keep the opaque marker
find "$target_dir" -mindepth 1 -maxdepth 1 -exec rm -rf {} + || :
mkdir -p "$target_dir"
touch "$final_dir/$opaque_path"
done || :
# Process whiteouts
find "$layer_dir" -name ".wh.*" ! -name ".wh..wh..opq" 2>/dev/null | while read -r whiteout; do
whiteout_path="${whiteout#$layer_dir/}"
target_file="$final_dir/$(dirname "$whiteout_path")/$(basename "$whiteout_path" | sed 's/^\.wh\.//')"
# Remove target if exists, but keep whiteout marker
rm -rf "$target_file" 2>/dev/null || :
mkdir -p "$(dirname "$final_dir/$whiteout_path")"
touch "$final_dir/$whiteout_path"
done || :
# Copy regular files and special files (excluding whiteouts)
find "$layer_dir" -type f ! -name ".wh.*" 2>/dev/null | while read -r file; do
rel_path="${file#$layer_dir/}"
target="$final_dir/$rel_path"
# Check if file already exists in previous layers with same content
if file_exists_in_previous_layers "$rel_path" "$file" "$i"; then
# File is identical to one in a previous layer, skip it
echo " Skipping identical file: $rel_path"
continue
fi
cp -a "$file" "$target"
# Remove whiteout for this file if it exists
whiteout="$final_dir/$(dirname "$rel_path")/.wh.$(basename "$rel_path")"
rm -f "$whiteout" 2>/dev/null || :
done || :
# Copy symlinks and other special files
find "$layer_dir" ! -type d ! -type f ! -name ".wh.*" 2>/dev/null | while read -r special; do
rel_path="${special#$layer_dir/}"
target="$final_dir/$rel_path"
cp -a "$special" "$target"
# Remove whiteout for this item if it exists
whiteout="$final_dir/$(dirname "$rel_path")/.wh.$(basename "$rel_path")"
rm -f "$whiteout" 2>/dev/null || :
done || :
done
# Create merged layer tar
merged_layer="$workdir/merged.tar"
echo "Creating merged layer tar..."
(cd "$final_dir" && tar -cf "$merged_layer" .)
# Compress merged layer
compressed_layer="$workdir/merged.tar.gz"
echo "Compressing merged layer..."
gzip -c "$merged_layer" > "$compressed_layer"
# Calculate hashes
compressed_hash=$(sha256sum "$compressed_layer" | cut -d' ' -f1)
uncompressed_hash=$(sha256sum "$merged_layer" | cut -d' ' -f1)
compressed_size=$(stat -c%s "$compressed_layer")
echo "New layer: ${compressed_hash:0:12}... ($compressed_size bytes)"
# Store new layer blob
cp "$compressed_layer" "$workdir/image/blobs/sha256/$compressed_hash"
# Update manifest
echo "Updating manifest..."
config_digest=$(jq -r '.config.digest' "$manifest_file" | cut -d: -f2)
config_file="$workdir/image/blobs/sha256/$config_digest"
# Create new layer descriptor
new_layer=$(jq -n \
--arg mediaType "application/vnd.oci.image.layer.v1.tar+gzip" \
--arg digest "sha256:$compressed_hash" \
--argjson size "$compressed_size" \
'{mediaType: $mediaType, digest: $digest, size: $size}')
# Create temporary files for array manipulation
echo "Creating new layers array..."
jq '.layers' "$manifest_file" > "$workdir/orig_layers.json"
# Build new layers array using simple bash array manipulation
echo "[]" > "$workdir/new_layers.json"
# Add layers before start_layer
for ((i=0; i<start_layer; i++)); do
layer=$(jq ".[$i]" "$workdir/orig_layers.json")
jq ". += [$layer]" --argjson layer "$layer" "$workdir/new_layers.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_layers.json"
done
# Add the new merged layer
jq ". += [$new_layer]" --argjson new_layer "$new_layer" "$workdir/new_layers.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_layers.json"
# Add layers after end_layer
total_layers=$(jq 'length' "$workdir/orig_layers.json")
for ((i=end_layer+1; i<total_layers; i++)); do
layer=$(jq ".[$i]" "$workdir/orig_layers.json")
jq ". += [$layer]" --argjson layer "$layer" "$workdir/new_layers.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_layers.json"
done
# Update manifest with new layers
new_layers=$(cat "$workdir/new_layers.json")
updated_manifest=$(jq --argjson layers "$new_layers" '.layers = $layers' "$manifest_file")
# Update config similarly
echo "Creating new diff_ids array..."
jq '.rootfs.diff_ids' "$config_file" > "$workdir/orig_diffs.json"
echo "[]" > "$workdir/new_diffs.json"
# Add diff_ids before start_layer
for ((i=0; i<start_layer; i++)); do
diff_id=$(jq -r ".[$i]" "$workdir/orig_diffs.json")
jq ". += [\$diff_id]" --arg diff_id "$diff_id" "$workdir/new_diffs.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_diffs.json"
done
# Add the new diff_id
jq ". += [\$diff_id]" --arg diff_id "sha256:$uncompressed_hash" "$workdir/new_diffs.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_diffs.json"
# Add diff_ids after end_layer
total_diffs=$(jq 'length' "$workdir/orig_diffs.json")
for ((i=end_layer+1; i<total_diffs; i++)); do
diff_id=$(jq -r ".[$i]" "$workdir/orig_diffs.json")
jq ". += [\$diff_id]" --arg diff_id "$diff_id" "$workdir/new_diffs.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_diffs.json"
done
# Update config
new_diffs=$(cat "$workdir/new_diffs.json")
layer_count=$((end_layer - start_layer + 1))
updated_config=$(jq --argjson diffs "$new_diffs" --arg createdBy "SQUASHED: $layer_count layers" '.rootfs.diff_ids = $diffs' "$config_file")
# Handle history if it exists
if jq -e '.history' "$config_file" > /dev/null; then
echo "Updating history..."
jq '.history' "$config_file" > "$workdir/orig_history.json"
echo "[]" > "$workdir/new_history.json"
# Add history before start_layer
for ((i=0; i<start_layer; i++)); do
hist=$(jq ".[$i]" "$workdir/orig_history.json")
jq ". += [\$hist]" --argjson hist "$hist" "$workdir/new_history.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_history.json"
done
# Add new history entry
new_hist='{"created_by": "'$layer_count' layers", "empty_layer": false}'
jq ". += [\$hist]" --argjson hist "$new_hist" "$workdir/new_history.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_history.json"
# Add history after end_layer
total_history=$(jq 'length' "$workdir/orig_history.json")
for ((i=end_layer+1; i<total_history; i++)); do
hist=$(jq ".[$i]" "$workdir/orig_history.json")
jq ". += [\$hist]" --argjson hist "$hist" "$workdir/new_history.json" > "$workdir/tmp.json"
mv "$workdir/tmp.json" "$workdir/new_history.json"
done
new_history=$(cat "$workdir/new_history.json")
updated_config=$(echo "$updated_config" | jq --argjson history "$new_history" '.history = $history')
fi
# Save updated config
echo "$updated_config" > "$workdir/new_config.json"
new_config_hash=$(sha256sum "$workdir/new_config.json" | cut -d' ' -f1)
new_config_size=$(stat -c%s "$workdir/new_config.json")
cp "$workdir/new_config.json" "$workdir/image/blobs/sha256/$new_config_hash"
# Update manifest with new config reference
updated_manifest=$(echo "$updated_manifest" | jq --arg digest "sha256:$new_config_hash" --argjson size "$new_config_size" '.config.digest = $digest | .config.size = $size')
# Save updated manifest
echo "$updated_manifest" > "$workdir/new_manifest.json"
new_manifest_hash=$(sha256sum "$workdir/new_manifest.json" | cut -d' ' -f1)
new_manifest_size=$(stat -c%s "$workdir/new_manifest.json")
cp "$workdir/new_manifest.json" "$workdir/image/blobs/sha256/$new_manifest_hash"
# Update index
jq --arg digest "sha256:$new_manifest_hash" --argjson size "$new_manifest_size" '.manifests[0].digest = $digest | .manifests[0].size = $size' "$workdir/image/index.json" > "$workdir/image/index.json.tmp"
mv "$workdir/image/index.json.tmp" "$workdir/image/index.json"
# Copy to output
echo "Saving to: $output_dir"
rm -rf "$output_dir"
cp -r "$workdir/image" "$output_dir"
echo "Success! You can now push with:"
echo " skopeo copy oci:$output_dir docker://your-registry/squashed:tag"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment