-
-
Save psarna/e3a93715e7e1bb07993f782f26fde578 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -euo pipefail | |
| # Usage: ./squash IMAGE START_LAYER END_LAYER OUTPUT_DIR | |
| if [[ $# -ne 4 ]]; then | |
| echo "Usage: $0 IMAGE START_LAYER END_LAYER OUTPUT_DIR" | |
| echo "Example: $0 docker://alpine:latest 1 3 ./output" | |
| exit 1 | |
| fi | |
| image="$1" | |
| start_layer="$2" | |
| end_layer="$3" | |
| output_dir="$4" | |
| # Add docker:// prefix if not present | |
| if [[ ! "$image" =~ :// ]]; then | |
| image="docker://$image" | |
| fi | |
| workdir=$(mktemp -d) | |
| trap "rm -rf $workdir" EXIT | |
| echo "Downloading image: $image" | |
| skopeo copy "$image" "oci:$workdir/image:latest" | |
| # Get manifest info | |
| manifest_digest=$(jq -r '.manifests[0].digest' "$workdir/image/index.json" | cut -d: -f2) | |
| manifest_file="$workdir/image/blobs/sha256/$manifest_digest" | |
| total_layers=$(jq '.layers | length' "$manifest_file") | |
| echo "Total layers: $total_layers" | |
| if [[ $start_layer -ge $total_layers || $end_layer -ge $total_layers || $start_layer -gt $end_layer ]]; then | |
| echo "Error: Invalid layer range $start_layer-$end_layer (total: $total_layers)" | |
| exit 1 | |
| fi | |
| echo "Squashing layers $start_layer to $end_layer" | |
| # Extract layers to merge | |
| layers_to_merge=() | |
| for ((i=start_layer; i<=end_layer; i++)); do | |
| layer_digest=$(jq -r ".layers[$i].digest" "$manifest_file" | cut -d: -f2) | |
| layer_file="$workdir/image/blobs/sha256/$layer_digest" | |
| layers_to_merge+=("$layer_file") | |
| echo " Layer $i: ${layer_digest:0:12}..." | |
| done | |
| # Merge layers | |
| echo "Merging layers..." | |
| final_dir="$workdir/final" | |
| mkdir -p "$final_dir" | |
| # Extract all layers | |
| for i in "${!layers_to_merge[@]}"; do | |
| layer_dir="$workdir/layer_$i" | |
| mkdir -p "$layer_dir" | |
| # Handle compression | |
| if file "${layers_to_merge[$i]}" | grep -q gzip; then | |
| tar -xzf "${layers_to_merge[$i]}" -C "$layer_dir" | |
| elif file "${layers_to_merge[$i]}" | grep -q "Zstandard"; then | |
| tar --zstd -xf "${layers_to_merge[$i]}" -C "$layer_dir" | |
| else | |
| tar -xf "${layers_to_merge[$i]}" -C "$layer_dir" | |
| fi | |
| done | |
| # Function to check if file exists in previous layers with same size and content | |
| file_exists_in_previous_layers() { | |
| # disable for now | |
| return 1 | |
| local rel_path="$1" | |
| local current_file="$2" | |
| local current_layer_index="$3" | |
| # Get current file size | |
| local current_size=$(stat -c%s "$current_file" 2>/dev/null || echo "0") | |
| # Check previous layers in descending order | |
| for ((j=current_layer_index-1; j>=0; j--)); do | |
| local prev_layer_dir="$workdir/layer_$j" | |
| local prev_file="$prev_layer_dir/$rel_path" | |
| # Check for whiteout file that would delete this file | |
| local whiteout_file="$prev_layer_dir/$(dirname "$rel_path")/.wh.$(basename "$rel_path")" | |
| if [[ -f "$whiteout_file" ]]; then | |
| # Found whiteout, file was deleted in this layer, stop looking | |
| return 1 | |
| fi | |
| # Check for opaque whiteout in parent directory | |
| local parent_dir="$(dirname "$rel_path")" | |
| while [[ "$parent_dir" != "." && "$parent_dir" != "/" ]]; do | |
| local opaque_file="$prev_layer_dir/$parent_dir/.wh..wh..opq" | |
| if [[ -f "$opaque_file" ]]; then | |
| # Found opaque whiteout, directory was cleared, stop looking | |
| return 1 | |
| fi | |
| parent_dir="$(dirname "$parent_dir")" | |
| done | |
| # Check if file exists in this previous layer | |
| if [[ -f "$prev_file" ]]; then | |
| local prev_size=$(stat -c%s "$prev_file" 2>/dev/null || echo "0") | |
| # Compare sizes first (quick check) | |
| if [[ "$current_size" == "$prev_size" ]]; then | |
| # Same size, now compare content | |
| if cmp -s "$current_file" "$prev_file"; then | |
| # Same content, file is identical | |
| return 0 | |
| fi | |
| fi | |
| # File exists but different, stop looking (this is the version we should use) | |
| return 1 | |
| fi | |
| done | |
| # File not found in any previous layer | |
| return 1 | |
| } | |
| # Apply layers in order | |
| for i in "${!layers_to_merge[@]}"; do | |
| layer_dir="$workdir/layer_$i" | |
| # Create directories first | |
| find "$layer_dir" -type d 2>/dev/null | while read -r dir; do | |
| rel_path="${dir#$layer_dir/}" | |
| [[ "$rel_path" == "." ]] && continue | |
| mkdir -p "$final_dir/$rel_path" | |
| done || : | |
| # Process opaque dirs | |
| find "$layer_dir" -name ".wh..wh..opq" 2>/dev/null | while read -r opaque; do | |
| opaque_path="${opaque#$layer_dir/}" | |
| target_dir="$final_dir/$(dirname "$opaque_path")" | |
| # Clear directory and keep the opaque marker | |
| find "$target_dir" -mindepth 1 -maxdepth 1 -exec rm -rf {} + || : | |
| mkdir -p "$target_dir" | |
| touch "$final_dir/$opaque_path" | |
| done || : | |
| # Process whiteouts | |
| find "$layer_dir" -name ".wh.*" ! -name ".wh..wh..opq" 2>/dev/null | while read -r whiteout; do | |
| whiteout_path="${whiteout#$layer_dir/}" | |
| target_file="$final_dir/$(dirname "$whiteout_path")/$(basename "$whiteout_path" | sed 's/^\.wh\.//')" | |
| # Remove target if exists, but keep whiteout marker | |
| rm -rf "$target_file" 2>/dev/null || : | |
| mkdir -p "$(dirname "$final_dir/$whiteout_path")" | |
| touch "$final_dir/$whiteout_path" | |
| done || : | |
| # Copy regular files and special files (excluding whiteouts) | |
| find "$layer_dir" -type f ! -name ".wh.*" 2>/dev/null | while read -r file; do | |
| rel_path="${file#$layer_dir/}" | |
| target="$final_dir/$rel_path" | |
| # Check if file already exists in previous layers with same content | |
| if file_exists_in_previous_layers "$rel_path" "$file" "$i"; then | |
| # File is identical to one in a previous layer, skip it | |
| echo " Skipping identical file: $rel_path" | |
| continue | |
| fi | |
| cp -a "$file" "$target" | |
| # Remove whiteout for this file if it exists | |
| whiteout="$final_dir/$(dirname "$rel_path")/.wh.$(basename "$rel_path")" | |
| rm -f "$whiteout" 2>/dev/null || : | |
| done || : | |
| # Copy symlinks and other special files | |
| find "$layer_dir" ! -type d ! -type f ! -name ".wh.*" 2>/dev/null | while read -r special; do | |
| rel_path="${special#$layer_dir/}" | |
| target="$final_dir/$rel_path" | |
| cp -a "$special" "$target" | |
| # Remove whiteout for this item if it exists | |
| whiteout="$final_dir/$(dirname "$rel_path")/.wh.$(basename "$rel_path")" | |
| rm -f "$whiteout" 2>/dev/null || : | |
| done || : | |
| done | |
| # Create merged layer tar | |
| merged_layer="$workdir/merged.tar" | |
| echo "Creating merged layer tar..." | |
| (cd "$final_dir" && tar -cf "$merged_layer" .) | |
| # Compress merged layer | |
| compressed_layer="$workdir/merged.tar.gz" | |
| echo "Compressing merged layer..." | |
| gzip -c "$merged_layer" > "$compressed_layer" | |
| # Calculate hashes | |
| compressed_hash=$(sha256sum "$compressed_layer" | cut -d' ' -f1) | |
| uncompressed_hash=$(sha256sum "$merged_layer" | cut -d' ' -f1) | |
| compressed_size=$(stat -c%s "$compressed_layer") | |
| echo "New layer: ${compressed_hash:0:12}... ($compressed_size bytes)" | |
| # Store new layer blob | |
| cp "$compressed_layer" "$workdir/image/blobs/sha256/$compressed_hash" | |
| # Update manifest | |
| echo "Updating manifest..." | |
| config_digest=$(jq -r '.config.digest' "$manifest_file" | cut -d: -f2) | |
| config_file="$workdir/image/blobs/sha256/$config_digest" | |
| # Create new layer descriptor | |
| new_layer=$(jq -n \ | |
| --arg mediaType "application/vnd.oci.image.layer.v1.tar+gzip" \ | |
| --arg digest "sha256:$compressed_hash" \ | |
| --argjson size "$compressed_size" \ | |
| '{mediaType: $mediaType, digest: $digest, size: $size}') | |
| # Create temporary files for array manipulation | |
| echo "Creating new layers array..." | |
| jq '.layers' "$manifest_file" > "$workdir/orig_layers.json" | |
| # Build new layers array using simple bash array manipulation | |
| echo "[]" > "$workdir/new_layers.json" | |
| # Add layers before start_layer | |
| for ((i=0; i<start_layer; i++)); do | |
| layer=$(jq ".[$i]" "$workdir/orig_layers.json") | |
| jq ". += [$layer]" --argjson layer "$layer" "$workdir/new_layers.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_layers.json" | |
| done | |
| # Add the new merged layer | |
| jq ". += [$new_layer]" --argjson new_layer "$new_layer" "$workdir/new_layers.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_layers.json" | |
| # Add layers after end_layer | |
| total_layers=$(jq 'length' "$workdir/orig_layers.json") | |
| for ((i=end_layer+1; i<total_layers; i++)); do | |
| layer=$(jq ".[$i]" "$workdir/orig_layers.json") | |
| jq ". += [$layer]" --argjson layer "$layer" "$workdir/new_layers.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_layers.json" | |
| done | |
| # Update manifest with new layers | |
| new_layers=$(cat "$workdir/new_layers.json") | |
| updated_manifest=$(jq --argjson layers "$new_layers" '.layers = $layers' "$manifest_file") | |
| # Update config similarly | |
| echo "Creating new diff_ids array..." | |
| jq '.rootfs.diff_ids' "$config_file" > "$workdir/orig_diffs.json" | |
| echo "[]" > "$workdir/new_diffs.json" | |
| # Add diff_ids before start_layer | |
| for ((i=0; i<start_layer; i++)); do | |
| diff_id=$(jq -r ".[$i]" "$workdir/orig_diffs.json") | |
| jq ". += [\$diff_id]" --arg diff_id "$diff_id" "$workdir/new_diffs.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_diffs.json" | |
| done | |
| # Add the new diff_id | |
| jq ". += [\$diff_id]" --arg diff_id "sha256:$uncompressed_hash" "$workdir/new_diffs.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_diffs.json" | |
| # Add diff_ids after end_layer | |
| total_diffs=$(jq 'length' "$workdir/orig_diffs.json") | |
| for ((i=end_layer+1; i<total_diffs; i++)); do | |
| diff_id=$(jq -r ".[$i]" "$workdir/orig_diffs.json") | |
| jq ". += [\$diff_id]" --arg diff_id "$diff_id" "$workdir/new_diffs.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_diffs.json" | |
| done | |
| # Update config | |
| new_diffs=$(cat "$workdir/new_diffs.json") | |
| layer_count=$((end_layer - start_layer + 1)) | |
| updated_config=$(jq --argjson diffs "$new_diffs" --arg createdBy "SQUASHED: $layer_count layers" '.rootfs.diff_ids = $diffs' "$config_file") | |
| # Handle history if it exists | |
| if jq -e '.history' "$config_file" > /dev/null; then | |
| echo "Updating history..." | |
| jq '.history' "$config_file" > "$workdir/orig_history.json" | |
| echo "[]" > "$workdir/new_history.json" | |
| # Add history before start_layer | |
| for ((i=0; i<start_layer; i++)); do | |
| hist=$(jq ".[$i]" "$workdir/orig_history.json") | |
| jq ". += [\$hist]" --argjson hist "$hist" "$workdir/new_history.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_history.json" | |
| done | |
| # Add new history entry | |
| new_hist='{"created_by": "'$layer_count' layers", "empty_layer": false}' | |
| jq ". += [\$hist]" --argjson hist "$new_hist" "$workdir/new_history.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_history.json" | |
| # Add history after end_layer | |
| total_history=$(jq 'length' "$workdir/orig_history.json") | |
| for ((i=end_layer+1; i<total_history; i++)); do | |
| hist=$(jq ".[$i]" "$workdir/orig_history.json") | |
| jq ". += [\$hist]" --argjson hist "$hist" "$workdir/new_history.json" > "$workdir/tmp.json" | |
| mv "$workdir/tmp.json" "$workdir/new_history.json" | |
| done | |
| new_history=$(cat "$workdir/new_history.json") | |
| updated_config=$(echo "$updated_config" | jq --argjson history "$new_history" '.history = $history') | |
| fi | |
| # Save updated config | |
| echo "$updated_config" > "$workdir/new_config.json" | |
| new_config_hash=$(sha256sum "$workdir/new_config.json" | cut -d' ' -f1) | |
| new_config_size=$(stat -c%s "$workdir/new_config.json") | |
| cp "$workdir/new_config.json" "$workdir/image/blobs/sha256/$new_config_hash" | |
| # Update manifest with new config reference | |
| updated_manifest=$(echo "$updated_manifest" | jq --arg digest "sha256:$new_config_hash" --argjson size "$new_config_size" '.config.digest = $digest | .config.size = $size') | |
| # Save updated manifest | |
| echo "$updated_manifest" > "$workdir/new_manifest.json" | |
| new_manifest_hash=$(sha256sum "$workdir/new_manifest.json" | cut -d' ' -f1) | |
| new_manifest_size=$(stat -c%s "$workdir/new_manifest.json") | |
| cp "$workdir/new_manifest.json" "$workdir/image/blobs/sha256/$new_manifest_hash" | |
| # Update index | |
| jq --arg digest "sha256:$new_manifest_hash" --argjson size "$new_manifest_size" '.manifests[0].digest = $digest | .manifests[0].size = $size' "$workdir/image/index.json" > "$workdir/image/index.json.tmp" | |
| mv "$workdir/image/index.json.tmp" "$workdir/image/index.json" | |
| # Copy to output | |
| echo "Saving to: $output_dir" | |
| rm -rf "$output_dir" | |
| cp -r "$workdir/image" "$output_dir" | |
| echo "Success! You can now push with:" | |
| echo " skopeo copy oci:$output_dir docker://your-registry/squashed:tag" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment