Skip to content

Instantly share code, notes, and snippets.

@dnebing
Created September 16, 2024 03:19
Show Gist options
  • Save dnebing/8b1ffdd8dc1c27cc5874dd4f968877fe to your computer and use it in GitHub Desktop.
Save dnebing/8b1ffdd8dc1c27cc5874dd4f968877fe to your computer and use it in GitHub Desktop.
This is a script used to process all of the separate OpenAPI yaml files and can merge them into a single yaml file ready to generate a client.
#!/usr/bin/env bash
# Enable strict mode
set -euo pipefail
# Declare associative arrays
declare -A file_object_map
declare -A operationId_counts
declare -A operationId_occurrences
# Declare indexed arrays for input files
declare -a input_files
# Output file variable
output_file=""
# Function to display usage instructions
usage() {
echo "Usage: $0 -o <output.yaml> <input1.yaml> <input2.yaml> ..."
echo ""
echo "Options:"
echo " -o, --output <output.yaml> Specify the output YAML file for joined content."
echo ""
echo "Example:"
echo " $0 -o joined.yaml file1.yaml file2.yaml file3.yaml"
exit 1
}
# Function to parse command-line arguments
parse_args() {
local output_set=false
while [[ "$#" -gt 0 ]]; do
case "$1" in
-o|--output)
if [[ -n "${2:-}" && ! "$2" =~ ^- ]]; then
if [ "$output_set" = true ]; then
echo "Error: Multiple -o/--output arguments provided." >&2
usage
fi
output_file="$2"
output_set=true
shift 2
else
echo "Error: Argument for $1 is missing." >&2
usage
fi
;;
-*)
echo "Unknown option: $1" >&2
usage
;;
*)
input_files+=("$1")
shift
;;
esac
done
if [ "$output_set" = false ]; then
echo "Error: Output file not specified. Use -o or --output to specify the output YAML file." >&2
usage
fi
if [ "${#input_files[@]}" -eq 0 ]; then
echo "Error: No input YAML files provided." >&2
usage
fi
# Ensure output file is not among input files
for file in "${input_files[@]}"; do
if [[ "$file" == "$output_file" ]]; then
echo "Error: Output file '$output_file' cannot be one of the input files." >&2
exit 1
fi
done
}
# Function to check for required commands
check_commands() {
local missing_commands=""
local required_commands=("gawk" "gsed" "yq" "npx")
for cmd in "${required_commands[@]}"; do
if ! command -v "$cmd" >/dev/null 2>&1; then
missing_commands+="$cmd "
fi
done
if [[ -n "$missing_commands" ]]; then
echo "Error: The following required commands are missing:"
for cmd in $missing_commands; do
case "$cmd" in
gawk)
echo " - gawk: Install it using 'brew install gawk'"
;;
gsed)
echo " - gsed: Install it using 'brew install gnu-sed'"
;;
yq)
echo " - yq: Install it using 'brew install yq'"
;;
npx)
echo " - npx: Install it by installing Node.js from 'brew install node'"
;;
*)
echo " - $cmd"
;;
esac
done
exit 1
fi
}
# Function to check if a file is excluded
is_excluded() {
local file="$1"
[[ "$file" == "$output_file" ]]
}
# Function to process all files
process_files() {
echo "Processing the following YAML files:"
for file in "${input_files[@]}"; do
if is_excluded "$file"; then
echo " - $file (Excluded)"
continue
fi
process_file "$file"
done
echo "Done"
}
# Function to process an individual file
process_file() {
local file="$1"
if [ -f "$file" ]; then
echo " - $file"
object_name=$(extract_object_name "$file")
echo " Object Name: $object_name"
file_object_map["$file"]="$object_name"
base_path=$(extract_base_path "$file")
echo " Base Path: $base_path"
fix_title "$file" "$object_name"
remove_base_path "$file"
remove_openapi_paths "$file"
prefix_paths "$file" "$base_path"
add_default_response_description "$file"
add_parameter_type_string "$file"
else
echo "Warning: File '$file' does not exist." >&2
fi
}
# Function to extract and format the object name using gawk and GNU sed
extract_object_name() {
local basename
basename=$(basename "$1" .yaml)
local object_name
object_name=$(echo "$basename" | gawk '{gsub(/-/, " "); for (i=1; i<=NF; i++) $i=toupper(substr($i,1,1)) tolower(substr($i,2))} 1')
echo "$object_name"
}
# Function to fix the title only if it is "Object"
fix_title() {
local file="$1"
local object_name="$2"
if [[ $(yq eval '.info.title' "$file") == "Object" ]]; then
yq eval --inplace '.info.title = "'"$object_name"'"' "$file"
fi
}
# Function to extract the base path from the server URL using GNU sed
extract_base_path() {
local file="$1"
local url
url=$(yq eval '.servers[0].url' "$file")
local base_path
base_path=$(echo "$url" | gsed -E 's|^https?://[^/]+||' | gsed 's|/$||')
echo "$base_path"
}
# Function to remove the base path from the server URL
remove_base_path() {
local file="$1"
yq eval --inplace '.servers[0].url |= sub("https?://[^/]+/[^ ]*", "http://localhost:8080/")' "$file"
}
# Reusable function to remove a given path
remove_path() {
local file="$1"
local path_to_remove="$2"
yq eval --inplace 'del(.paths["'"$path_to_remove"'"])' "$file"
}
# Function to remove paths that end with /openapi.{type}
remove_openapi_paths() {
local file="$1"
local paths
paths=$(yq eval '.paths | keys | .[]' "$file")
for path in $paths; do
if [[ "$path" == */openapi.* ]]; then
remove_path "$file" "$path"
fi
done
}
# Function to prefix paths with the base path and output dots for progress
prefix_paths() {
local file="$1"
local base_path="$2"
local paths
paths=$(yq eval '.paths | keys | .[]' "$file")
if [ -z "$paths" ]; then
return
fi
printf "Path Update: "
local printed_dot=false
for path in $paths; do
if [[ "$path" == \?* ]]; then
new_path="? ${base_path}${path:2}"
else
new_path="${base_path}${path}"
fi
printf "."
printed_dot=true
yq eval --inplace '.paths["'"$new_path"'"] = .paths["'"$path"'"] | del(.paths["'"$path"'"])' "$file"
done
if [ "$printed_dot" = true ]; then
echo ""
fi
}
# Function to ensure default responses have a description and output dots
add_default_response_description() {
local file="$1"
printf "Desc Update: "
local paths
paths=$(yq eval '.paths | keys | .[]' "$file")
for path in $paths; do
local methods
methods=$(yq eval ".paths[\"$path\"] | keys | .[]" "$file")
for method in $methods; do
printf "."
local has_default_response
has_default_response=$(yq eval ".paths[\"$path\"].$method.responses.default" "$file")
local has_description
has_description=$(yq eval ".paths[\"$path\"].$method.responses.default.description" "$file")
if [[ "$has_default_response" != "null" && "$has_description" == "null" ]]; then
echo ""
echo "Adding default response description for $path [$method]"
yq eval --inplace ".paths[\"$path\"].$method.responses.default.description = \"default response\"" "$file"
fi
done
done
echo ""
}
# Function to ensure all parameters have a schema with type: string
add_parameter_type_string() {
local file="$1"
printf "Parm Update: "
local paths
paths=$(yq eval '.paths | keys | .[]' "$file")
for path in $paths; do
local methods
methods=$(yq eval ".paths[\"$path\"] | keys | .[]" "$file")
for method in $methods; do
printf "."
local param_count
param_count=$(yq eval ".paths[\"$path\"].$method.parameters | length" "$file" 2>/dev/null || echo 0)
if [[ "$param_count" -gt 0 ]]; then
for ((i=0; i<param_count; i++)); do
local param
param=$(yq eval ".paths[\"$path\"].$method.parameters[$i].name" "$file")
local has_schema
has_schema=$(yq eval ".paths[\"$path\"].$method.parameters[$i].schema" "$file" 2>/dev/null || echo "null")
if [[ "$has_schema" == "null" ]]; then
yq eval --inplace '.paths["'"$path"'"].'"$method"'.parameters['"$i"'].schema = {"type": "string"}' "$file"
echo ""
echo "Adding schema type: string to parameter $param in $path [$method]"
fi
done
fi
done
done
echo ""
}
# Function to unify components across all YAML files
unify_components() {
local files=("$@")
local temp_dir
temp_dir=$(mktemp -d)
declare -A component_files
# Collect components and map to files
for file in "${files[@]}"; do
if is_excluded "$file"; then
continue
fi
local components
components=$(yq eval '.components.schemas | keys | .[]' "$file" 2>/dev/null || true)
for comp in $components; do
component_files["$comp"]+="$file "
done
done
# Iterate over components present in multiple files
for comp in "${!component_files[@]}"; do
local files_with_comp=(${component_files["$comp"]})
if [ "${#files_with_comp[@]}" -lt 2 ]; then
continue # Only process components in multiple files
fi
echo "Processing component: $comp"
local defs=()
for f in "${files_with_comp[@]}"; do
# Extract the component definition
yq eval ".components.schemas.$comp" "$f" > "$temp_dir/$comp-$(basename "$f").yaml"
defs+=("$temp_dir/$comp-$(basename "$f").yaml")
done
# Check if any definitions are empty
local empty_def=false
for def in "${defs[@]}"; do
if [ ! -s "$def" ]; then
empty_def=true
break
fi
done
if [ "$empty_def" = true ]; then
echo "Warning: One or more definitions for component '$comp' are empty. Skipping merge."
continue
fi
# Sequentially merge definitions
merged_def=$(yq eval '.' "${defs[0]}")
for ((i=1; i<${#defs[@]}; i++)); do
temp_merged="$temp_dir/merged_$comp.yaml"
echo "$merged_def" > "$temp_dir/base.yaml"
yq eval-all 'select(fileIndex == 0) * select(fileIndex == 1)' "$temp_dir/base.yaml" "${defs[$i]}" > "$temp_dir/merged_$comp.yaml"
merged_def=$(yq eval '.' "$temp_dir/merged_$comp.yaml")
done
# Write merged_def to a temporary file
echo "$merged_def" > "$temp_dir/merged_$comp.yaml"
# Compare and update if necessary
for f in "${files_with_comp[@]}"; do
if is_excluded "$f"; then
continue
fi
local current_def
current_def=$(yq eval ".components.schemas.$comp" "$f")
local merged_def_trimmed
merged_def_trimmed=$(yq eval '.' "$temp_dir/merged_$comp.yaml")
if [ "$current_def" != "$merged_def_trimmed" ]; then
echo "Updating component '$comp' in $f"
# Use load to assign the merged component
yq eval --inplace ".components.schemas[\"$comp\"] = load(\"$temp_dir/merged_$comp.yaml\")" "$f"
fi
done
done
rm -rf "$temp_dir"
echo "Components unified across all YAML files."
}
# Function to resolve $refs by copying missing component definitions
resolve_refs() {
local files=("$@")
local temp_dir
temp_dir=$(mktemp -d)
declare -A component_definitions
# Pre-index all component definitions
for file in "${files[@]}"; do
if is_excluded "$file"; then
continue
fi
local components
components=$(yq eval '.components.schemas | keys | .[]' "$file" 2>/dev/null || true)
for comp in $components; do
# Store the component definition only once (first occurrence)
if [[ -z "${component_definitions[$comp]:-}" ]]; then
component_definitions["$comp"]=$(yq eval ".components.schemas.$comp" "$file")
fi
done
done
local added_component=true
local iteration=0
local max_iterations=10
while [ "$added_component" = true ] && [ "$iteration" -lt "$max_iterations" ]; do
iteration=$((iteration + 1))
echo "Resolve_refs Iteration: $iteration"
added_component=false
for file in "${files[@]}"; do
if is_excluded "$file"; then
continue
fi
echo "Resolving \$refs in $file"
# Extract all $refs pointing to schemas
local refs
refs=$(yq eval '.. | select(has("$ref")) | ."$ref"' "$file" | grep '^#/components/schemas/' || true)
for ref in $refs; do
# Extract the component name from the $ref
local comp
comp=$(echo "$ref" | sed -E 's|#/components/schemas/||')
# Check if the component is already defined in the current file
local has_component
has_component=$(yq eval ".components.schemas[\"$comp\"]" "$file" 2>/dev/null || echo "null")
if [[ "$has_component" == "null" ]]; then
# Check if we have the component definition in our index
if [[ -n "${component_definitions[$comp]:-}" ]]; then
echo "Adding missing component '$comp' to $file"
# Add the component definition to the file using load
# First, write the component definition to a temp file
echo "${component_definitions[$comp]}" > "$temp_dir/$comp.yaml"
yq eval --inplace ".components.schemas[\"$comp\"] = load(\"$temp_dir/$comp.yaml\")" "$file"
added_component=true
else
echo "Warning: Component '$comp' referenced in $file but not found in any provided files."
fi
fi
done
done
if [ "$added_component" = false ]; then
echo "No new components added in this iteration."
break
fi
done
if [ "$iteration" -ge "$max_iterations" ]; then
echo "Reached maximum iterations ($max_iterations) while resolving \$refs."
fi
rm -rf "$temp_dir"
echo "All \$refs resolved."
}
# Function to resolve duplicate operationIds
resolve_duplicate_operation_ids() {
local files=("$@")
local temp_file
temp_file=$(mktemp)
echo "Collecting all operationIds from provided YAML files..."
# First pass: collect all operationIds
for file in "${files[@]}"; do
if is_excluded "$file"; then
continue
fi
echo " Processing file: $file"
# Extract all paths
local paths
paths=$(yq eval '.paths | keys | .[]' "$file" 2>/dev/null || true)
for path in $paths; do
# Extract all methods for the path
local methods
methods=$(yq eval ".paths[\"$path\"] | keys | .[]" "$file" 2>/dev/null || true)
for method in $methods; do
# Extract operationId
local operation_id
operation_id=$(yq eval ".paths[\"$path\"].$method.operationId" "$file" 2>/dev/null || echo "null")
# Ensure operation_id is not null or empty
if [[ "$operation_id" != "null" && -n "$operation_id" ]]; then
# Append to temp_file: operationId,file,method,path
echo "$operation_id,$file,$method,$path" >> "$temp_file"
# Increment count with default value 0 if not set
operationId_counts["$operation_id"]=$(( ${operationId_counts["$operation_id"]:-0} + 1 ))
# Append occurrence
operationId_occurrences["$operation_id"]+="$file|$method|$path;"
fi
done
done
done
# Identify duplicate operationIds
echo "Identifying duplicate operationIds..."
declare -A duplicate_operationIds
for opId in "${!operationId_counts[@]}"; do
if [ "${operationId_counts[$opId]}" -gt 1 ]; then
duplicate_operationIds["$opId"]=1
fi
done
if [ "${#duplicate_operationIds[@]}" -eq 0 ]; then
echo "No duplicate operationIds found."
rm "$temp_file"
return
fi
echo "Found duplicate operationIds:"
for dup in "${!duplicate_operationIds[@]}"; do
echo " - $dup"
done
# Process each duplicate operationId
for dup in "${!duplicate_operationIds[@]}"; do
echo "Resolving duplicate operationId: $dup"
# Get all occurrences
IFS=';' read -ra occs <<< "${operationId_occurrences["$dup"]}"
for occ in "${occs[@]}"; do
# Skip empty occurrences (possible trailing ;)
[[ -z "$occ" ]] && continue
# Split into file, method, path
IFS='|' read -r file method path <<< "$occ"
# Get object_name from file_object_map
local object_name="${file_object_map["$file"]}"
# Remove spaces from object_name
object_name="${object_name// /}"
# Extract method prefix (ensure lowercase)
local method_lower
method_lower=$(echo "$method" | awk '{print tolower($0)}')
# Determine if operationId starts with the method name (case insensitive)
local new_operation_id
if [[ "${dup,,}" == "${method_lower}"* ]]; then
# Remove the method from the start (case insensitive)
rest=$(echo "$dup" | sed -E "s/^${method_lower}//I")
# Construct new operationId
new_operation_id="${method_lower}${object_name}${rest}"
else
# If operationId doesn't start with method, prepend method and object name
new_operation_id="${method_lower}${object_name}${dup}"
fi
echo " - Updating operationId in file: $file, path: $path, method: $method"
echo " Old operationId: $dup"
echo " New operationId: $new_operation_id"
# Update the operationId in the file
# Properly handle quotes and special characters in path and method
yq eval --inplace ".paths[\"$path\"][\"$method\"].operationId = \"${new_operation_id}\"" "$file"
done
done
rm "$temp_file"
echo "Duplicate operationIds have been resolved."
}
# Function to join YAML files using Redocly CLI
join_yaml_files() {
local output="$1"
shift
local files=("$@")
# If output file exists, delete it
if [ -f "$output" ]; then
echo "Output file '$output' already exists. Deleting it before joining."
rm "$output"
fi
# Join YAML files using Redocly CLI
echo "Joining YAML files into '$output'..."
npx @redocly/cli join -o "$output" "${files[@]}"
echo "YAML files have been successfully joined into '$output'."
}
# Main execution flow
main() {
parse_args "$@"
check_commands
# Process input files
process_files "${input_files[@]}"
unify_components "${input_files[@]}"
resolve_refs "${input_files[@]}"
resolve_duplicate_operation_ids "${input_files[@]}"
# Join YAML files
join_yaml_files "$output_file" "${input_files[@]}"
echo "Ready to begin processing."
}
# Run the main function with all passed arguments
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment