Created
September 16, 2024 03:19
-
-
Save dnebing/8b1ffdd8dc1c27cc5874dd4f968877fe to your computer and use it in GitHub Desktop.
This is a script used to process all of the separate OpenAPI yaml files and can merge them into a single yaml file ready to generate a client.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Enable strict mode | |
set -euo pipefail | |
# Declare associative arrays | |
declare -A file_object_map | |
declare -A operationId_counts | |
declare -A operationId_occurrences | |
# Declare indexed arrays for input files | |
declare -a input_files | |
# Output file variable | |
output_file="" | |
# Function to display usage instructions | |
usage() { | |
echo "Usage: $0 -o <output.yaml> <input1.yaml> <input2.yaml> ..." | |
echo "" | |
echo "Options:" | |
echo " -o, --output <output.yaml> Specify the output YAML file for joined content." | |
echo "" | |
echo "Example:" | |
echo " $0 -o joined.yaml file1.yaml file2.yaml file3.yaml" | |
exit 1 | |
} | |
# Function to parse command-line arguments | |
parse_args() { | |
local output_set=false | |
while [[ "$#" -gt 0 ]]; do | |
case "$1" in | |
-o|--output) | |
if [[ -n "${2:-}" && ! "$2" =~ ^- ]]; then | |
if [ "$output_set" = true ]; then | |
echo "Error: Multiple -o/--output arguments provided." >&2 | |
usage | |
fi | |
output_file="$2" | |
output_set=true | |
shift 2 | |
else | |
echo "Error: Argument for $1 is missing." >&2 | |
usage | |
fi | |
;; | |
-*) | |
echo "Unknown option: $1" >&2 | |
usage | |
;; | |
*) | |
input_files+=("$1") | |
shift | |
;; | |
esac | |
done | |
if [ "$output_set" = false ]; then | |
echo "Error: Output file not specified. Use -o or --output to specify the output YAML file." >&2 | |
usage | |
fi | |
if [ "${#input_files[@]}" -eq 0 ]; then | |
echo "Error: No input YAML files provided." >&2 | |
usage | |
fi | |
# Ensure output file is not among input files | |
for file in "${input_files[@]}"; do | |
if [[ "$file" == "$output_file" ]]; then | |
echo "Error: Output file '$output_file' cannot be one of the input files." >&2 | |
exit 1 | |
fi | |
done | |
} | |
# Function to check for required commands | |
check_commands() { | |
local missing_commands="" | |
local required_commands=("gawk" "gsed" "yq" "npx") | |
for cmd in "${required_commands[@]}"; do | |
if ! command -v "$cmd" >/dev/null 2>&1; then | |
missing_commands+="$cmd " | |
fi | |
done | |
if [[ -n "$missing_commands" ]]; then | |
echo "Error: The following required commands are missing:" | |
for cmd in $missing_commands; do | |
case "$cmd" in | |
gawk) | |
echo " - gawk: Install it using 'brew install gawk'" | |
;; | |
gsed) | |
echo " - gsed: Install it using 'brew install gnu-sed'" | |
;; | |
yq) | |
echo " - yq: Install it using 'brew install yq'" | |
;; | |
npx) | |
echo " - npx: Install it by installing Node.js from 'brew install node'" | |
;; | |
*) | |
echo " - $cmd" | |
;; | |
esac | |
done | |
exit 1 | |
fi | |
} | |
# Function to check if a file is excluded | |
is_excluded() { | |
local file="$1" | |
[[ "$file" == "$output_file" ]] | |
} | |
# Function to process all files | |
process_files() { | |
echo "Processing the following YAML files:" | |
for file in "${input_files[@]}"; do | |
if is_excluded "$file"; then | |
echo " - $file (Excluded)" | |
continue | |
fi | |
process_file "$file" | |
done | |
echo "Done" | |
} | |
# Function to process an individual file | |
process_file() { | |
local file="$1" | |
if [ -f "$file" ]; then | |
echo " - $file" | |
object_name=$(extract_object_name "$file") | |
echo " Object Name: $object_name" | |
file_object_map["$file"]="$object_name" | |
base_path=$(extract_base_path "$file") | |
echo " Base Path: $base_path" | |
fix_title "$file" "$object_name" | |
remove_base_path "$file" | |
remove_openapi_paths "$file" | |
prefix_paths "$file" "$base_path" | |
add_default_response_description "$file" | |
add_parameter_type_string "$file" | |
else | |
echo "Warning: File '$file' does not exist." >&2 | |
fi | |
} | |
# Function to extract and format the object name using gawk and GNU sed | |
extract_object_name() { | |
local basename | |
basename=$(basename "$1" .yaml) | |
local object_name | |
object_name=$(echo "$basename" | gawk '{gsub(/-/, " "); for (i=1; i<=NF; i++) $i=toupper(substr($i,1,1)) tolower(substr($i,2))} 1') | |
echo "$object_name" | |
} | |
# Function to fix the title only if it is "Object" | |
fix_title() { | |
local file="$1" | |
local object_name="$2" | |
if [[ $(yq eval '.info.title' "$file") == "Object" ]]; then | |
yq eval --inplace '.info.title = "'"$object_name"'"' "$file" | |
fi | |
} | |
# Function to extract the base path from the server URL using GNU sed | |
extract_base_path() { | |
local file="$1" | |
local url | |
url=$(yq eval '.servers[0].url' "$file") | |
local base_path | |
base_path=$(echo "$url" | gsed -E 's|^https?://[^/]+||' | gsed 's|/$||') | |
echo "$base_path" | |
} | |
# Function to remove the base path from the server URL | |
remove_base_path() { | |
local file="$1" | |
yq eval --inplace '.servers[0].url |= sub("https?://[^/]+/[^ ]*", "http://localhost:8080/")' "$file" | |
} | |
# Reusable function to remove a given path | |
remove_path() { | |
local file="$1" | |
local path_to_remove="$2" | |
yq eval --inplace 'del(.paths["'"$path_to_remove"'"])' "$file" | |
} | |
# Function to remove paths that end with /openapi.{type} | |
remove_openapi_paths() { | |
local file="$1" | |
local paths | |
paths=$(yq eval '.paths | keys | .[]' "$file") | |
for path in $paths; do | |
if [[ "$path" == */openapi.* ]]; then | |
remove_path "$file" "$path" | |
fi | |
done | |
} | |
# Function to prefix paths with the base path and output dots for progress | |
prefix_paths() { | |
local file="$1" | |
local base_path="$2" | |
local paths | |
paths=$(yq eval '.paths | keys | .[]' "$file") | |
if [ -z "$paths" ]; then | |
return | |
fi | |
printf "Path Update: " | |
local printed_dot=false | |
for path in $paths; do | |
if [[ "$path" == \?* ]]; then | |
new_path="? ${base_path}${path:2}" | |
else | |
new_path="${base_path}${path}" | |
fi | |
printf "." | |
printed_dot=true | |
yq eval --inplace '.paths["'"$new_path"'"] = .paths["'"$path"'"] | del(.paths["'"$path"'"])' "$file" | |
done | |
if [ "$printed_dot" = true ]; then | |
echo "" | |
fi | |
} | |
# Function to ensure default responses have a description and output dots | |
add_default_response_description() { | |
local file="$1" | |
printf "Desc Update: " | |
local paths | |
paths=$(yq eval '.paths | keys | .[]' "$file") | |
for path in $paths; do | |
local methods | |
methods=$(yq eval ".paths[\"$path\"] | keys | .[]" "$file") | |
for method in $methods; do | |
printf "." | |
local has_default_response | |
has_default_response=$(yq eval ".paths[\"$path\"].$method.responses.default" "$file") | |
local has_description | |
has_description=$(yq eval ".paths[\"$path\"].$method.responses.default.description" "$file") | |
if [[ "$has_default_response" != "null" && "$has_description" == "null" ]]; then | |
echo "" | |
echo "Adding default response description for $path [$method]" | |
yq eval --inplace ".paths[\"$path\"].$method.responses.default.description = \"default response\"" "$file" | |
fi | |
done | |
done | |
echo "" | |
} | |
# Function to ensure all parameters have a schema with type: string | |
add_parameter_type_string() { | |
local file="$1" | |
printf "Parm Update: " | |
local paths | |
paths=$(yq eval '.paths | keys | .[]' "$file") | |
for path in $paths; do | |
local methods | |
methods=$(yq eval ".paths[\"$path\"] | keys | .[]" "$file") | |
for method in $methods; do | |
printf "." | |
local param_count | |
param_count=$(yq eval ".paths[\"$path\"].$method.parameters | length" "$file" 2>/dev/null || echo 0) | |
if [[ "$param_count" -gt 0 ]]; then | |
for ((i=0; i<param_count; i++)); do | |
local param | |
param=$(yq eval ".paths[\"$path\"].$method.parameters[$i].name" "$file") | |
local has_schema | |
has_schema=$(yq eval ".paths[\"$path\"].$method.parameters[$i].schema" "$file" 2>/dev/null || echo "null") | |
if [[ "$has_schema" == "null" ]]; then | |
yq eval --inplace '.paths["'"$path"'"].'"$method"'.parameters['"$i"'].schema = {"type": "string"}' "$file" | |
echo "" | |
echo "Adding schema type: string to parameter $param in $path [$method]" | |
fi | |
done | |
fi | |
done | |
done | |
echo "" | |
} | |
# Function to unify components across all YAML files | |
unify_components() { | |
local files=("$@") | |
local temp_dir | |
temp_dir=$(mktemp -d) | |
declare -A component_files | |
# Collect components and map to files | |
for file in "${files[@]}"; do | |
if is_excluded "$file"; then | |
continue | |
fi | |
local components | |
components=$(yq eval '.components.schemas | keys | .[]' "$file" 2>/dev/null || true) | |
for comp in $components; do | |
component_files["$comp"]+="$file " | |
done | |
done | |
# Iterate over components present in multiple files | |
for comp in "${!component_files[@]}"; do | |
local files_with_comp=(${component_files["$comp"]}) | |
if [ "${#files_with_comp[@]}" -lt 2 ]; then | |
continue # Only process components in multiple files | |
fi | |
echo "Processing component: $comp" | |
local defs=() | |
for f in "${files_with_comp[@]}"; do | |
# Extract the component definition | |
yq eval ".components.schemas.$comp" "$f" > "$temp_dir/$comp-$(basename "$f").yaml" | |
defs+=("$temp_dir/$comp-$(basename "$f").yaml") | |
done | |
# Check if any definitions are empty | |
local empty_def=false | |
for def in "${defs[@]}"; do | |
if [ ! -s "$def" ]; then | |
empty_def=true | |
break | |
fi | |
done | |
if [ "$empty_def" = true ]; then | |
echo "Warning: One or more definitions for component '$comp' are empty. Skipping merge." | |
continue | |
fi | |
# Sequentially merge definitions | |
merged_def=$(yq eval '.' "${defs[0]}") | |
for ((i=1; i<${#defs[@]}; i++)); do | |
temp_merged="$temp_dir/merged_$comp.yaml" | |
echo "$merged_def" > "$temp_dir/base.yaml" | |
yq eval-all 'select(fileIndex == 0) * select(fileIndex == 1)' "$temp_dir/base.yaml" "${defs[$i]}" > "$temp_dir/merged_$comp.yaml" | |
merged_def=$(yq eval '.' "$temp_dir/merged_$comp.yaml") | |
done | |
# Write merged_def to a temporary file | |
echo "$merged_def" > "$temp_dir/merged_$comp.yaml" | |
# Compare and update if necessary | |
for f in "${files_with_comp[@]}"; do | |
if is_excluded "$f"; then | |
continue | |
fi | |
local current_def | |
current_def=$(yq eval ".components.schemas.$comp" "$f") | |
local merged_def_trimmed | |
merged_def_trimmed=$(yq eval '.' "$temp_dir/merged_$comp.yaml") | |
if [ "$current_def" != "$merged_def_trimmed" ]; then | |
echo "Updating component '$comp' in $f" | |
# Use load to assign the merged component | |
yq eval --inplace ".components.schemas[\"$comp\"] = load(\"$temp_dir/merged_$comp.yaml\")" "$f" | |
fi | |
done | |
done | |
rm -rf "$temp_dir" | |
echo "Components unified across all YAML files." | |
} | |
# Function to resolve $refs by copying missing component definitions | |
resolve_refs() { | |
local files=("$@") | |
local temp_dir | |
temp_dir=$(mktemp -d) | |
declare -A component_definitions | |
# Pre-index all component definitions | |
for file in "${files[@]}"; do | |
if is_excluded "$file"; then | |
continue | |
fi | |
local components | |
components=$(yq eval '.components.schemas | keys | .[]' "$file" 2>/dev/null || true) | |
for comp in $components; do | |
# Store the component definition only once (first occurrence) | |
if [[ -z "${component_definitions[$comp]:-}" ]]; then | |
component_definitions["$comp"]=$(yq eval ".components.schemas.$comp" "$file") | |
fi | |
done | |
done | |
local added_component=true | |
local iteration=0 | |
local max_iterations=10 | |
while [ "$added_component" = true ] && [ "$iteration" -lt "$max_iterations" ]; do | |
iteration=$((iteration + 1)) | |
echo "Resolve_refs Iteration: $iteration" | |
added_component=false | |
for file in "${files[@]}"; do | |
if is_excluded "$file"; then | |
continue | |
fi | |
echo "Resolving \$refs in $file" | |
# Extract all $refs pointing to schemas | |
local refs | |
refs=$(yq eval '.. | select(has("$ref")) | ."$ref"' "$file" | grep '^#/components/schemas/' || true) | |
for ref in $refs; do | |
# Extract the component name from the $ref | |
local comp | |
comp=$(echo "$ref" | sed -E 's|#/components/schemas/||') | |
# Check if the component is already defined in the current file | |
local has_component | |
has_component=$(yq eval ".components.schemas[\"$comp\"]" "$file" 2>/dev/null || echo "null") | |
if [[ "$has_component" == "null" ]]; then | |
# Check if we have the component definition in our index | |
if [[ -n "${component_definitions[$comp]:-}" ]]; then | |
echo "Adding missing component '$comp' to $file" | |
# Add the component definition to the file using load | |
# First, write the component definition to a temp file | |
echo "${component_definitions[$comp]}" > "$temp_dir/$comp.yaml" | |
yq eval --inplace ".components.schemas[\"$comp\"] = load(\"$temp_dir/$comp.yaml\")" "$file" | |
added_component=true | |
else | |
echo "Warning: Component '$comp' referenced in $file but not found in any provided files." | |
fi | |
fi | |
done | |
done | |
if [ "$added_component" = false ]; then | |
echo "No new components added in this iteration." | |
break | |
fi | |
done | |
if [ "$iteration" -ge "$max_iterations" ]; then | |
echo "Reached maximum iterations ($max_iterations) while resolving \$refs." | |
fi | |
rm -rf "$temp_dir" | |
echo "All \$refs resolved." | |
} | |
# Function to resolve duplicate operationIds | |
resolve_duplicate_operation_ids() { | |
local files=("$@") | |
local temp_file | |
temp_file=$(mktemp) | |
echo "Collecting all operationIds from provided YAML files..." | |
# First pass: collect all operationIds | |
for file in "${files[@]}"; do | |
if is_excluded "$file"; then | |
continue | |
fi | |
echo " Processing file: $file" | |
# Extract all paths | |
local paths | |
paths=$(yq eval '.paths | keys | .[]' "$file" 2>/dev/null || true) | |
for path in $paths; do | |
# Extract all methods for the path | |
local methods | |
methods=$(yq eval ".paths[\"$path\"] | keys | .[]" "$file" 2>/dev/null || true) | |
for method in $methods; do | |
# Extract operationId | |
local operation_id | |
operation_id=$(yq eval ".paths[\"$path\"].$method.operationId" "$file" 2>/dev/null || echo "null") | |
# Ensure operation_id is not null or empty | |
if [[ "$operation_id" != "null" && -n "$operation_id" ]]; then | |
# Append to temp_file: operationId,file,method,path | |
echo "$operation_id,$file,$method,$path" >> "$temp_file" | |
# Increment count with default value 0 if not set | |
operationId_counts["$operation_id"]=$(( ${operationId_counts["$operation_id"]:-0} + 1 )) | |
# Append occurrence | |
operationId_occurrences["$operation_id"]+="$file|$method|$path;" | |
fi | |
done | |
done | |
done | |
# Identify duplicate operationIds | |
echo "Identifying duplicate operationIds..." | |
declare -A duplicate_operationIds | |
for opId in "${!operationId_counts[@]}"; do | |
if [ "${operationId_counts[$opId]}" -gt 1 ]; then | |
duplicate_operationIds["$opId"]=1 | |
fi | |
done | |
if [ "${#duplicate_operationIds[@]}" -eq 0 ]; then | |
echo "No duplicate operationIds found." | |
rm "$temp_file" | |
return | |
fi | |
echo "Found duplicate operationIds:" | |
for dup in "${!duplicate_operationIds[@]}"; do | |
echo " - $dup" | |
done | |
# Process each duplicate operationId | |
for dup in "${!duplicate_operationIds[@]}"; do | |
echo "Resolving duplicate operationId: $dup" | |
# Get all occurrences | |
IFS=';' read -ra occs <<< "${operationId_occurrences["$dup"]}" | |
for occ in "${occs[@]}"; do | |
# Skip empty occurrences (possible trailing ;) | |
[[ -z "$occ" ]] && continue | |
# Split into file, method, path | |
IFS='|' read -r file method path <<< "$occ" | |
# Get object_name from file_object_map | |
local object_name="${file_object_map["$file"]}" | |
# Remove spaces from object_name | |
object_name="${object_name// /}" | |
# Extract method prefix (ensure lowercase) | |
local method_lower | |
method_lower=$(echo "$method" | awk '{print tolower($0)}') | |
# Determine if operationId starts with the method name (case insensitive) | |
local new_operation_id | |
if [[ "${dup,,}" == "${method_lower}"* ]]; then | |
# Remove the method from the start (case insensitive) | |
rest=$(echo "$dup" | sed -E "s/^${method_lower}//I") | |
# Construct new operationId | |
new_operation_id="${method_lower}${object_name}${rest}" | |
else | |
# If operationId doesn't start with method, prepend method and object name | |
new_operation_id="${method_lower}${object_name}${dup}" | |
fi | |
echo " - Updating operationId in file: $file, path: $path, method: $method" | |
echo " Old operationId: $dup" | |
echo " New operationId: $new_operation_id" | |
# Update the operationId in the file | |
# Properly handle quotes and special characters in path and method | |
yq eval --inplace ".paths[\"$path\"][\"$method\"].operationId = \"${new_operation_id}\"" "$file" | |
done | |
done | |
rm "$temp_file" | |
echo "Duplicate operationIds have been resolved." | |
} | |
# Function to join YAML files using Redocly CLI | |
join_yaml_files() { | |
local output="$1" | |
shift | |
local files=("$@") | |
# If output file exists, delete it | |
if [ -f "$output" ]; then | |
echo "Output file '$output' already exists. Deleting it before joining." | |
rm "$output" | |
fi | |
# Join YAML files using Redocly CLI | |
echo "Joining YAML files into '$output'..." | |
npx @redocly/cli join -o "$output" "${files[@]}" | |
echo "YAML files have been successfully joined into '$output'." | |
} | |
# Main execution flow | |
main() { | |
parse_args "$@" | |
check_commands | |
# Process input files | |
process_files "${input_files[@]}" | |
unify_components "${input_files[@]}" | |
resolve_refs "${input_files[@]}" | |
resolve_duplicate_operation_ids "${input_files[@]}" | |
# Join YAML files | |
join_yaml_files "$output_file" "${input_files[@]}" | |
echo "Ready to begin processing." | |
} | |
# Run the main function with all passed arguments | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment