Skip to content

Instantly share code, notes, and snippets.

@flash1293
Created January 26, 2024 16:38
Show Gist options
  • Save flash1293/edfc0a2824e1ca6768708be634004b15 to your computer and use it in GitHub Desktop.
Save flash1293/edfc0a2824e1ca6768708be634004b15 to your computer and use it in GitHub Desktop.
airbyte-lib connector migration
#!/bin/bash
certified_connectors=()
# Loop through all source-* folders
for dir in airbyte-integrations/connectors/source-*; do
if [ -d "$dir" ]; then
# Convert folder name to snake_case
snake_cased_name=$(echo "$(basename "$dir")" | sed 's/-/_/g')
# Check for run.py in the snake_cased subfolder
if [ -f "$dir/$snake_cased_name/run.py" ]; then
# Check for "supportLevel: certified" in metadata.yaml
if grep -q "supportLevel: certified" "$dir/metadata.yaml"; then
certified_connectors+=("$(basename "$dir")")
fi
fi
fi
done
# Print the list of certified connectors
echo "Certified Connectors with run.py:"
for connector in "${certified_connectors[@]}"; do
echo "$connector"
done
#!/bin/bash
# Initialize arrays to keep track of processed and skipped directories
processed_dirs=()
skipped_no_main=()
skipped_no_setup=()
skipped_run_exists=()
# Loop through all directories starting with source-
for dir in source-*/ ; do
echo "Processing directory: $dir"
FOLDER_NAME="$dir"
FOLDER_NAME="${FOLDER_NAME%/}" # Remove trailing slash
SNAKE_CASED_FOLDER_NAME=$(echo "$FOLDER_NAME" | sed 's/-/_/g')
# Check for setup.py
if [ ! -f "$dir/setup.py" ]; then
echo "setup.py not found in $dir"
skipped_no_setup+=("$dir")
continue
fi
# Check for main.py
if [ ! -f "$dir/main.py" ]; then
echo "main.py not found in $dir"
skipped_no_main+=("$dir")
continue
fi
# Check if run.py already exists
if [ -f "$dir/$SNAKE_CASED_FOLDER_NAME/run.py" ]; then
echo "run.py already exists in $FOLDER_NAME/$SNAKE_CASED_FOLDER_NAME"
skipped_run_exists+=("$dir")
continue
fi
# Step 1: Add entry_points to setup.py
sed -i '' "/setup(/a \\
entry_points={\\
\"console_scripts\": [\\
\"${FOLDER_NAME}=${SNAKE_CASED_FOLDER_NAME}.run:run\",\\
],\\
}," "$FOLDER_NAME/setup.py"
# Step 2: Create run.py and copy contents from main.py
mkdir -p "$FOLDER_NAME/$SNAKE_CASED_FOLDER_NAME"
cp "$FOLDER_NAME/main.py" "$FOLDER_NAME/$SNAKE_CASED_FOLDER_NAME/run.py"
sed -i '' 's/if __name__ == "__main__":/def run():/' "$FOLDER_NAME/$SNAKE_CASED_FOLDER_NAME/run.py"
# Step 3: Modify main.py
echo -e "#\n# Copyright (c) 2023 Airbyte, Inc., all rights reserved.\n#\n\nfrom ${SNAKE_CASED_FOLDER_NAME}.run import run\n\nif __name__ == \"__main__\":\n run()" > "$FOLDER_NAME/main.py"
processed_dirs+=("$dir")
done
skipped_no_setup_python=()
skipped_no_setup_other=()
skipped_no_main_python=()
skipped_no_main_other=()
# Function to categorize directories based on metadata.yaml
categorize_based_on_metadata() {
local dir=$1
local metadata_file="$dir/metadata.yaml"
if grep -q "language:python\|language:low-code" "$metadata_file" && ! grep -q "language:java" "$metadata_file"; then
eval "$2+=('$dir')" # Add to python/low-code array
else
eval "$3+=('$dir')" # Add to other array
fi
}
# Categorize skipped_no_setup
for dir in "${skipped_no_setup[@]}"; do
categorize_based_on_metadata "$dir" "skipped_no_setup_python" "skipped_no_setup_other"
done
# Categorize skipped_no_main
for dir in "${skipped_no_main[@]}"; do
categorize_based_on_metadata "$dir" "skipped_no_main_python" "skipped_no_main_other"
done
# Print processed and skipped directories
echo "Processed directories:"
printf '%s\n' "${processed_dirs[@]}"
echo "Skipped directories (setup.py not found):"
printf '%s\n' "${skipped_no_setup[@]}"
echo "Skipped directories (main.py not found):"
printf '%s\n' "${skipped_no_main[@]}"
echo "Skipped directories (run.py already exists):"
printf '%s\n' "${skipped_run_exists[@]}"
echo "Skipped directories (setup.py not found, language:python/low-code):"
printf '%s\n' "${skipped_no_setup_python[@]}"
echo "Skipped directories (setup.py not found, other languages):"
printf '%s\n' "${skipped_no_setup_other[@]}"
echo "Skipped directories (main.py not found, language:python/low-code):"
printf '%s\n' "${skipped_no_main_python[@]}"
echo "Skipped directories (main.py not found, other languages):"
printf '%s\n' "${skipped_no_main_other[@]}"
source-amazon-ads
source-amplitude
source-bing-ads
source-chargebee
source-file
source-github
source-google-analytics-v4
source-google-search-console
source-greenhouse
source-harvest
source-hubspot
source-intercom
source-jira
source-klaviyo
source-monday
source-notion
source-pinterest
source-recharge
source-sendgrid
source-sentry
source-snapchat-marketing
source-surveymonkey
source-tiktok-marketing
source-twilio
#!/bin/bash
# Check if a file path is provided
if [ -z "$1" ]; then
echo "Please provide a file path."
exit 1
fi
# Initialize lists
succeeded=()
failed_ci_credentials=()
failed_validate_source=()
spec_only_succeeded_due_to_no_secrets=()
succeeded_spec_only=()
succeeded_spec_only_community=()
# Read each line from the file
while IFS= read -r directory; do
directory="${directory%/}"
echo "Processing directory: $directory"
# Check if supportLevel is certified
if ! grep -q "supportLevel: certified" "./airbyte-integrations/connectors/$directory/metadata.yaml"; then
# Run only --validate-install-only for community connectors
if airbyte-lib-validate-source --connector-dir "./airbyte-integrations/connectors/$directory/" --validate-install-only > "debug-validate-spec-$directory.txt" 2>&1; then
succeeded_spec_only_community+=("$directory")
else
echo "validate-source --validate-install-only failed for $directory. Check debug-validate-spec-$directory.txt for details."
fi
continue
fi
ci_credentials_failed=false
if ! VERSION=dev ci_credentials "$directory" write-to-storage > "debug-ci-credentials-$directory.txt" 2>&1; then
ci_credentials_failed=true
echo "ci_credentials failed for $directory. Check debug-ci-credentials-$directory.txt for details."
fi
# Check for first secret file
first_secret_file=$(find "./airbyte-integrations/connectors/$directory/secrets/" -type f | head -n 1)
if [ -z "$first_secret_file" ]; then
echo "No secret file found in $directory/secrets."
if $ci_credentials_failed; then
if ! airbyte-lib-validate-source --connector-dir "./airbyte-integrations/connectors/$directory/" --validate-install-only > "debug-validate-spec-$directory.txt" 2>&1; then
failed_ci_credentials+=("$directory")
echo "validate-source also failed for $directory. Check debug-validate-spec-$directory.txt for details."
else
spec_only_succeeded_due_to_no_secrets+=("$directory")
fi
continue
fi
fi
if [ "$ci_credentials_failed" = false ]; then
if ! airbyte-lib-validate-source --connector-dir "./airbyte-integrations/connectors/$directory/" --sample-config "$first_secret_file" > "debug-validate-full-$directory.txt" 2>&1; then
# Retry with --validate-install-only and capture output
if ! airbyte-lib-validate-source --connector-dir "./airbyte-integrations/connectors/$directory/" --validate-install-only > "debug-validate-spec-$directory.txt" 2>&1; then
failed_validate_source+=("$directory")
echo "validate-source failed for $directory. Check debug-validate-full-$directory.txt for details."
else
succeeded_spec_only+=("$directory")
fi
else
succeeded+=("$directory")
fi
fi
rm -rf .venv-source-*
done < "$1"
add_remote_registries() {
local directory="$1"
local enable="$2"
local todo_comment="$3"
local connector_name=$(basename "$directory")
# Edit metadata.yaml
local metadata_file="./airbyte-integrations/connectors/$directory/metadata.yaml"
sed -i '' "/registries:/i \\
remoteRegistries:\\
pypi:\\
enabled: $enable\\
${todo_comment}packageName: airbyte-$connector_name\\
" "$metadata_file"
}
# Process succeeded list
for dir in "${succeeded[@]}"; do
add_remote_registries "$dir" "true" ""
done
# Process succeeded_spec_only_community list
for dir in "${succeeded_spec_only_community[@]}"; do
add_remote_registries "$dir" "true" ""
done
# Process succeeded_spec_only and spec_only_succeeded_due_to_no_secrets lists
for dir in "${succeeded_spec_only[@]}" "${spec_only_succeeded_due_to_no_secrets[@]}"; do
# Check if supportLevel is certified
if grep -q "supportLevel: certified" "./airbyte-integrations/connectors/$dir/metadata.yaml"; then
add_remote_registries "$dir" "false" "# TODO: Set enabled=true after \`airbyte-lib-validate-source\` is passing.\\
"
else
add_remote_registries "$dir" "true" ""
fi
done
# Process failed lists
for dir in "${failed_ci_credentials[@]}" "${failed_validate_source[@]}"; do
add_remote_registries "$dir" "false" "# TODO: Set enabled=true after \`airbyte-lib-validate-source\` is passing.\\
"
done
# Print results
echo -e "\nSucceeded:"
for dir in "${succeeded[@]}"; do
echo "$dir"
done
echo -e "\nSucceeded with Spec Only due to full validation failing:"
for dir in "${succeeded_spec_only[@]}"; do
echo "$dir"
done
echo -e "\nSpec Only Succeeded Due to No Secrets:"
for dir in "${spec_only_succeeded_due_to_no_secrets[@]}"; do
echo "$dir"
done
echo -e "\nSpec Only Succeeded Due to community connector:"
for dir in "${succeeded_spec_only_community[@]}"; do
echo "$dir"
done
echo -e "\nFailed ci_credentials:"
for dir in "${failed_ci_credentials[@]}"; do
echo "$dir"
done
echo -e "\nFailed validate-source:"
for dir in "${failed_validate_source[@]}"; do
echo "$dir"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment