Skip to content

Instantly share code, notes, and snippets.

@Praveen005
Last active July 22, 2024 12:41
Show Gist options
  • Save Praveen005/e0eef2aa39a1d3497745127df471bbea to your computer and use it in GitHub Desktop.
Save Praveen005/e0eef2aa39a1d3497745127df471bbea to your computer and use it in GitHub Desktop.
Improving retries and error handling in Civo TF Provider, Issue: #263
package utils
import "strings"
// IsRetryableError determines if an error is retryable
func IsRetryableError(err error) bool {
for _, error := range RetryableErrors {
if strings.Contains(err.Error(), error) {
return true
}
}
return false
}
var RetryableErrors = []string{
// Network and Connection Issues
"TimeoutError",
"RegionUnavailable",
"OpenstackConnectionFailedError",
"DatabaseConnectionFailedError",
// Temporary Resource Issues
"OutOFCapacityError",
"DatabaseClusterPoolNoSufficientInstancesAvailable",
// Transient API Issues
"ResponseDecodeFailed",
"CivoStatsdRecordFailedError",
// Concurrent Operation Conflicts
"DatabaseInstanceBuildError",
"DatabaseSnapshotCreateAlreadyInProcessError",
"CannotScaleAlreadyRescalingClusterError",
// Database Operation Failures
"DatabaseCreatingAccountError",
"DatabaseUpdatingAccountError",
"DatabaseAccountStatsError",
"DatabaseActionListingError",
"DatabaseActionCreateError",
"DatabaseApiKeyCreateError",
"DatabaseAuditLogListingError",
"DatabaseChangeAPIKeyError",
"DatabaseChargeListingError",
"DatabaseListingDNSDomainsError",
"DatabaseListingFirewallsError",
"DatabaseInstanceListError",
"DatabaseListingAccountsError",
"DatabaseListingMembershipsError",
"DatabaseNetworksListError",
"DatabaseSizesListError",
"DatabaseSnapshotsListError",
"DatabaseTeamListingError",
"DatabaseNamespacesListError",
"DatabaseFirewallCreateError",
"DatabaseNetworkCreateError",
// OpenStack Operation Failures
"OpenstackCreatingProjectError",
"OpenstackCreatingUserError",
"OpenstackFirewallCreateError",
"OpenstackInstanceCreateError",
"OpenstackIPCreateError",
"OpenstackNetworkCreateFailedError",
"OpenstackNetworkEnsureConfiguredError",
"OpenstackPublicIPConnectError",
"OpenstackQuotaApplyError",
"OpenstackSSHKeyUploadError",
"CannotGetConsoleError",
"DatabaseCannotManageClusterInstanceError",
"DatabaseCannotMoveIPError",
"DatabaseInstanceAlreadyinRescueStateError",
"DatabaseInstanceCreateError",
"DatabaseLoadBalancerSaveError",
"DatabaseLoadBalancerUpdateError",
"DatabaseNetworkSaveError",
"DatabaseQuotaLockFailedError",
"DatabaseSnapshotCreateError",
"DatabaseVolumeStillAttachedCannotResizeError",
"InstanceStateMustBeActiveOrShutoffError",
"InternalServerError",
"OpenstackInstanceRebootError",
"OpenstackInstanceRebuildError",
"OpenstackInstanceResizeError",
"OpenstackInstanceRestoreError",
"OpenstackInstanceStartError",
"OpenstackInstanceStopError",
"OpenstackURLNovaError",
"OpenstackUrlGlanceError",
"ParameterDateRangeError",
"ParameterTimeValueError",
"DatabaseVolumeDeleteFailedError",
"DatabaseWebhookDestroyError",
"OpenstackFirewallDestroyError",
"OpenstackInstanceDestroyError",
"OpenstackNnetworkDestroyFailedError",
"OpenstackSnapshotDestroyError",
"OpenstackProjectDestroyError",
"OpenstackUserDestroyError",
"DatabaseNamespaceDeleteLastError",
"DatabaseNamespaceDeleteWithInstanceError",
"DatabaseNamespaceSaveError",
"DatabaseTemplateSaveFailedError",
"DatabaseAPIkeyDestroyError",
"NetworkCreateDefaultError",
"NetworkDeleteDefaultError",
"MarshalingObjectsToJsonError",
"DatabaseTemplateParseRequestError",
"DatabaseDnsDomainCreateError",
"DatabaseDnsDomainUpdateError",
"DatabaseDNSRecordCreateError",
"DatabaseDNSRecordUpdateError",
"DatabaseNamespaceCreateError",
// More errors can be added or removed if need be
}
/*
Non-Retryable Errors:
**************
Authentication and Authorization Issues:
--------------
"DisabledServiceError"
"NoAPIKeySuppliedError"
"AuthenticationFailedError"
"DatabaseAccountAccessDeniedError"
"DatabaseMembershipsSuspendedError"
"AuthenticationInvalidKeyError"
"AuthenticationAccessDeniedError"
"AccountNotEnabledIncCardError"
"AccountNotEnabledWithoutCardError"
"AuthenticationError"
Resource Not Found:
--------------
"DatabaseAccountNotFoundError"
"DatabaseApiKeyNotFoundError"
"DatabaseBlueprintNotFoundError"
"DatabaseDNSDomainNotFoundError"
"DatabaseDNSRecordNotFoundError"
"DatabaseFirewallNotFoundError"
"DatabaseInstanceNotFoundError"
"DatabaseKubernetesApplicationNotFound"
"DatabaseKubernetesClusterNoPoolsError"
"DatabaseKubernetesClusterNotFound"
"DatabaseKubernetesNodeNotFound"
"DatabaseClusterPoolNotFound"
"DatabaseClusterPoolInstanceNotFound"
"DatabaseMembershipsNotFoundError"
"DatabaseLoadBalancerNotFoundError"
"DatabaseNetworkNotFoundError"
"DatabaseQuotaNotFoundError"
"DatabaseServiceNotFoundError"
"DatabaseSnapshotNotFoundError"
"DatabaseSSHKeyNotFoundError"
"DatabaseTeamNotFoundError"
"DatabaseTemplateNotFoundError"
"DatabaseUserNotFoundError"
"DatabaseVolumeNotFoundError"
"DatabaseWebhookNotFoundError"
"DatabaseDiskImageNotFoundError"
"DatabaseNamespaceNotFoundError"
Invalid Parameters or States:
--------------
"MultipleMatchesError"
"ZeroMatchesError"
"IDisEmptyError"
"VolumeInvalidSizeError"
"ParameterEmptyVolumeIDError"
"ParameterEmptyOpenstackVolumeIDError"
"DatabaseFirewallRulesInvalidParams"
"DatabaseKubernetesClusterInvalid"
"DatabaseKubernetesApplicationInvalidPlan"
"DatabaseMembershipsInvalidInvitationError"
"DatabaseMembershipsInvalidStatusError"
"DatabaseImageIDInvalidError"
"DatabaseVolumeIDInvalidError"
"ParameterDateRangeTooLongError"
"ParameterDnsRecordTypeError"
"ParameterDNSRecordCnameApexError"
"ParameterPublicKeyEmptyError"
"ParameterIDMissingError"
"ParameterIDToIntegerError"
"ParameterImageAndVolumeIDMissingError"
"ParameterLabelInvalidError"
"ParameterNameInvalidError"
"ParameterPrivateIPMissingError"
"ParameterPublicIPMissingError"
"ParameterSizeMissingError"
"ParameterVolumeSizeIncorrectError"
"ParameterVolumeSizeMustIncreaseError"
"ParameterSnapshotMissingError"
"ParameterSnapshotIncorrectFormatError"
"ParameterStartPortMissingError"
"ParameterValueMissingError"
"DatabaseDNSDomainInvalidError"
"DatabaseKubernetesClusterInvalidVersionError"
"KubernetesClusterInvalidNameError"
Duplicate Resources:
--------------
"DatabaseApiKeyDuplicateError"
"DatabaseDnsDomainDuplicateNameError"
"DatabaseFirewallDuplicateNameError"
"FirewallDuplicateError"
"DatabaseInstanceDuplicateError"
"DatabaseInstanceDuplicateNameError"
"DatabaseKubernetesClusterDuplicate"
"DatabaseLoadBalancerDuplicateError"
"DatabaseLoadBalancerExistsError"
"DatabaseNetworkExistsError"
"DatabaseNetworkDuplicateNameError"
"DatabaseSSHKeyDuplicateNameError"
"SSHKeyDuplicateError"
"DatabaseUserAlreadyExistsError"
"DatabaseVolumeDuplicateNameError"
"DatabaseFirewallExistsError"
"DatabaseNamespaceExistsError"
"DatabaseNamespaceDuplicateNameError"
"DatabaseTemplateExistsError"
Operation Not Allowed:
--------------
"CannotRescueNewVolumeError"
"CannotRestoreNewVolumeError"
"DatabaseAccountDestroyError"
"DatabaseBlueprintDeleteFailedError"
"DatabaseBlueprintCreateError"
"DatabaseBlueprintUpdateError"
"DatabaseFirewallMismatchError"
"DatabaseFirewallSaveFailedError"
"DatabaseFirewallDeleteFailedError"
"DatabaseFirewallRuleCreateError"
"DatabaseFirewallRuleDeleteFailedError"
"DatabaseInstanceBuildMultipleWithExistingPublicIPError"
"DatabaseInstanceSnapshotTooBigError"
"DatabaseInstanceNotInOpenStackError"
"DatabaseClusterPoolInstanceDeleteFailed"
"DatabaseMembershipCannotDeleteError"
"DatabaseMembershipsGrantAccessError"
"DatabaseLoadBalancerDeleteError"
"DatabaseNetworkDeleteLastError"
"DatabaseNetworkDeleteWithInstanceError"
"DatabaseNetworkInUseByVolumes"
"DatabasePrivateIPFromPublicIPError"
"DatabaseQuotaUpdateError"
"DatabaseSnapshotCannotDeleteInUseError"
"DatabaseSnapshotCannotReplaceError"
"DatabaseSnapshotCreateInstanceNotFoundError"
"DatabaseSSHKeyDestroyError"
"DatabaseSSHKeyCreateError"
"DatabaseSSHKeyUpdateError"
"DatabaseTeamCannotDeleteError"
"DatabaseTeamCreateError"
"DatabaseTeamMembershipCreateError"
"DatabaseTemplateDestroyError"
"DatabaseTemplateUpdateError"
"DatabaseTemplateWouldConflictError"
"DatabaseUserNewError"
"DatabaseUserConfirmedError"
"DatabaseUserNoChangeStatusError"
"DatabaseUserUpdateError"
"DatabaseCreatingUserError"
"DatabaseVolumeCannotMultipleAttachError"
"DatabaseVolumeNotAttachedError"
"DatabaseWebhookUpdateError"
"DatabaseWebhookWouldConflictError"
"OpenstackFirewallRuleDestroyError"
"OpenstackInstanceSetFirewallError"
"CannotResizeVolumeError"
"DatabaseDiskImageNotImplementedError"
Rate Limiting and Quota Issues:
--------------
"QuotaLimitReachedError"
Security-Related Errors:
--------------
"DatabaseUserPasswordInvalidError"
"DatabaseUserPasswordSecuringFailedError"
"DatabaseUserLoginFailedError"
"DatabaseUserSuspendedError"
Generic Errors:
--------------
"Error"
"UnknownError"
OpenStack-Specific Errors:
--------------
"OpenstackInstanceFindError"
"OpenstackProjectFindError"
Database-Specific Errors:
--------------
"DatabaseOldInstanceFindError"
"DatabaseIPFindError"
"DatabaseFirewallRulesFindError"
"DatabaseNamespaceLookupError"
"DatabaseNetworkLookupError"
*/
var instance *civogo.Instance
err := utils.RetryWithTimeout(ctx, 2*time.Minute, "create instance", func() error {
var err error
instance, err = apiClient.CreateInstance(config)
if err != nil {
return err
}
d.SetId(instance.ID)
return nil
})
if err != nil {
return diag.Errorf("[ERR] failed to create a new instance: %s", err)
}
var network *civogo.NetworkResult
err := utils.RetryWithTimeout(ctx, 2*time.Minute, "create network", func() error {
var err error
network, err = apiClient.CreateNetwork(configs)
if err != nil {
return err
}
d.SetId(network.ID)
return nil
})
if err != nil {
return diag.Errorf("[ERR] failed to create a new network: %s", err)
}
package utils
import (
"context"
"encoding/json"
"fmt"
"log"
"math"
"regexp"
"sync"
"time"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"
)
// CustomError stores a part of the complete API error
type CustomError struct {
Code string `json:"code"`
Reason string `json:"reason"`
}
// Error implements the error interface
func (e *CustomError) Error() string {
return fmt.Sprintf("%s - %s", e.Code, e.Reason)
}
var jsonRegex *regexp.Regexp
var once sync.Once
var regexErr error
func getJSONRegex() (*regexp.Regexp, error) {
once.Do(func() {
jsonRegex, regexErr = regexp.Compile(`\{.*\}`)
})
return jsonRegex, regexErr
}
// extractJSON uses regex to find JSON content within a string
func extractJSON(s string) (string, error) {
re, err := getJSONRegex()
if err != nil {
return "", fmt.Errorf("failed to compile regex: %v", err)
}
match := re.FindString(s)
if match == "" {
return "", fmt.Errorf("no JSON object found in the string")
}
return match, nil
}
// ParseErrorResponse extracts and parses the JSON error response
func ParseErrorResponse(errorMsg string) (*CustomError, error) {
jsonStr, err := extractJSON(errorMsg)
if err != nil {
return nil, fmt.Errorf("failed to extract JSON: %v", err)
}
var customErr CustomError
err = json.Unmarshal([]byte(jsonStr), &customErr)
if err != nil {
return nil, fmt.Errorf("failed to parse error response: %v", err)
}
return &customErr, nil
}
// RetryWithTimeout attempts to do a task and retries upon failing for specific errors
func RetryWithTimeout(ctx context.Context, timeout time.Duration, taskName string, f func() error) error {
log.Printf("[DEBUG] Starting retry loop for task: %s\n", taskName)
startTime := time.Now()
err := retry.RetryContext(ctx, timeout, func() *retry.RetryError {
err := f()
if err == nil {
log.Printf("[DEBUG] Task '%s' succeeded\n", taskName)
return nil
}
if IsRetryableError(err) {
elapsed := int(math.Round(time.Since(startTime).Seconds()))
log.Printf("[DEBUG] Task '%s' encountered a retryable error: %v. Retrying... (Elapsed time: %ds)\n", taskName, err, elapsed)
return retry.RetryableError(err)
}
log.Printf("[DEBUG] Task '%s' encountered a non-retryable error: %v\n", taskName, err)
return retry.NonRetryableError(err)
})
if err != nil {
elapsed := int(math.Round(time.Since(startTime).Seconds()))
// Extract the relevant part from 'err' to be sent as error
customErr, parseErr := ParseErrorResponse(err.Error())
var errResp error
if parseErr != nil {
errResp = err
} else {
errResp = customErr
}
log.Printf("[ERROR] Task '%s' failed after multiple retries (Elapsed time: %ds): %v\n", taskName, elapsed, errResp)
return errResp
}
elapsed := int(math.Round(time.Since(startTime).Seconds()))
log.Printf("[INFO] Task '%s' completed successfully (Elapsed time: %ds)\n", taskName, elapsed)
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment