Skip to content

Instantly share code, notes, and snippets.

@kwilczynski
Last active August 28, 2023 14:38
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save kwilczynski/f6e626990d6d2395b42a12721b165b86 to your computer and use it in GitHub Desktop.
Save kwilczynski/f6e626990d6d2395b42a12721b165b86 to your computer and use it in GitHub Desktop.
Parse S3 URL into bucket, key, region, version ID, etc. Useful when using AWS SDK for Go.
http://s3.amazonaws.com/bucket/key (for a bucket created in the US East (N. Virginia) region)
https://s3.amazonaws.com/bucket/key
http://s3-region.amazonaws.com/bucket/key
https://s3-region.amazonaws.com/bucket/key
http://s3.region.amazonaws.com/bucket/key
https://s3.region.amazonaws.com/bucket/key
http://s3.dualstack.region.amazonaws.com/bucket/key (for requests using IPv4 or IPv6)
https://s3.dualstack.region.amazonaws.com/bucket/key
http://bucket.s3.amazonaws.com/key
http://bucket.s3-region.amazonaws.com/key
http://bucket.s3.region.amazonaws.com/key
http://bucket.s3.dualstack.region.amazonaws.com/key (for requests using IPv4 or IPv6)
http://bucket.s3-website-region.amazonaws.com/key (if static website hosting is enabled on the bucket)
http://bucket.s3-website.region.amazonaws.com/key (if static website hosting is enabled on the bucket)
http://bucket.s3-accelerated.amazonaws.com/key (where the filetransfer exits Amazons network at the last possible moment so as to give the fastest possible transfer speed and lowest latency)
http://bucket.s3-accelerated.dualstack.amazonaws.com/key
http://bucket/key (where bucket is a DNS CNAME record pointing to bucket.s3.amazonaws.com)
Host-Style Naming: http://bucket.s3-region.amazonaws.com
Path-Style Naming: http://s3-region.amazonaws.com/bucket
package main
import (
"errors"
"fmt"
"net/url"
"regexp"
"strings"
"github.com/davecgh/go-spew/spew"
)
// DefaultRegion contains a default region for an S3 bucket, when a region
// cannot be determined, for example when the s3:// schema is used or when
// path style URL has been given without the region component in the
// fully-qualified domain name.
const DefaultRegion = "us-east-1"
var (
ErrBucketNotFound = errors.New("bucket name could not be found")
ErrHostnameNotFound = errors.New("hostname could not be found")
ErrInvalidS3Endpoint = errors.New("an invalid S3 endpoint URL")
// Pattern used to parse multiple path and host style S3 endpoint URLs.
s3URLPattern = regexp.MustCompile(`^(.+\.)?s3[.-](?:(accelerated|dualstack|website)[.-])?([a-z0-9-]+)\.`)
)
type S3URIOpt func(*S3URI)
func WithScheme(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.Scheme = String(s)
}
}
func WithBucket(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.Bucket = String(s)
}
}
func WithKey(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.Key = String(s)
}
}
func WithVersionID(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.VersionID = String(s)
}
}
func WithRegion(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.Region = String(s)
}
}
func WithNormalizedKey(b bool) S3URIOpt {
return func(s3u *S3URI) {
s3u.normalize = Bool(b)
}
}
type S3URI struct {
uri *url.URL
options []S3URIOpt
normalize *bool
HostStyle *bool
PathStyle *bool
Accelerated *bool
DualStack *bool
Website *bool
Scheme *string
Bucket *string
Key *string
VersionID *string
Region *string
}
func NewS3URI(opts ...S3URIOpt) *S3URI {
return &S3URI{options: opts}
}
func (s3u *S3URI) Reset() *S3URI {
return reset(s3u)
}
func (s3u *S3URI) Parse(v interface{}) (*S3URI, error) {
return parse(s3u, v)
}
func (s3u *S3URI) ParseURL(u *url.URL) (*S3URI, error) {
return parse(s3u, u)
}
func (s3u *S3URI) ParseString(s string) (*S3URI, error) {
return parse(s3u, s)
}
func (s3u *S3URI) URI() *url.URL {
return s3u.uri
}
func Parse(v interface{}) (*S3URI, error) {
return NewS3URI().Parse(v)
}
func ParseURL(u *url.URL) (*S3URI, error) {
return NewS3URI().ParseURL(u)
}
func ParseString(s string) (*S3URI, error) {
return NewS3URI().ParseString(s)
}
func MustParse(s3u *S3URI, err error) *S3URI {
if err != nil {
panic(err)
}
return s3u
}
func Validate(v interface{}) bool {
_, err := NewS3URI().Parse(v)
return err == nil
}
func ValidateURL(u *url.URL) bool {
_, err := NewS3URI().Parse(u)
return err == nil
}
func ValidateString(s string) bool {
_, err := NewS3URI().Parse(s)
return err == nil
}
func parse(s3u *S3URI, s interface{}) (*S3URI, error) {
var (
u *url.URL
err error
)
switch s := s.(type) {
case string:
u, err = url.Parse(s)
case *url.URL:
u = s
default:
return nil, fmt.Errorf("unable to parse unknown type: %T", s)
}
if err != nil {
return nil, fmt.Errorf("unable to parse given S3 endpoint URL: %w", err)
}
reset(s3u)
s3u.uri = u
switch u.Scheme {
case "s3", "http", "https":
s3u.Scheme = String(u.Scheme)
default:
return nil, fmt.Errorf("unable to parse schema type: %s", u.Scheme)
}
// Handle S3 endpoint URL with the schema s3:// that is neither
// the host style nor the path style.
if u.Scheme == "s3" {
if u.Host == "" {
return nil, ErrBucketNotFound
}
s3u.Bucket = String(u.Host)
if u.Path != "" && u.Path != "/" {
s3u.Key = String(u.Path[1:len(u.Path)])
}
s3u.Region = String(DefaultRegion)
return s3u, nil
}
if u.Host == "" {
return nil, ErrHostnameNotFound
}
matches := s3URLPattern.FindStringSubmatch(u.Host)
if matches == nil || len(matches) < 1 {
return nil, ErrInvalidS3Endpoint
}
prefix := matches[1]
usage := matches[2] // Type of the S3 bucket.
region := matches[3]
if prefix == "" {
s3u.PathStyle = Bool(true)
if u.Path != "" && u.Path != "/" {
u.Path = u.Path[1:len(u.Path)]
index := strings.Index(u.Path, "/")
switch {
case index == -1:
s3u.Bucket = String(u.Path)
case index == len(u.Path)-1:
s3u.Bucket = String(u.Path[:index])
default:
s3u.Bucket = String(u.Path[:index])
s3u.Key = String(u.Path[index+1:])
}
}
} else {
s3u.HostStyle = Bool(true)
s3u.Bucket = String(prefix[:len(prefix)-1])
if u.Path != "" && u.Path != "/" {
s3u.Key = String(u.Path[1:len(u.Path)])
}
}
const (
// Used to denote type of the S3 bucket.
accelerated = "accelerated"
dualStack = "dualstack"
website = "website"
// Part of the amazonaws.com domain name. Set when no region
// could be ascertain correctly using the S3 endpoint URL.
amazonAWS = "amazonaws"
// Part of the query parameters. Used when retrieving S3
// object (key) of a particular version.
versionID = "versionId"
)
// An S3 bucket can be either accelerated or website endpoint,
// but not both.
if usage == accelerated {
s3u.Accelerated = Bool(true)
} else if usage == website {
s3u.Website = Bool(true)
}
// An accelerated S3 bucket can also be dualstack.
if usage == dualStack || region == dualStack {
s3u.DualStack = Bool(true)
}
// Handle the special case of an accelerated dualstack S3
// endpoint URL:
// <BUCKET>.s3-accelerated.dualstack.amazonaws.com/<KEY>.
// As there is no way to accertain the region solely based on
// the S3 endpoint URL.
if usage != accelerated {
s3u.Region = String(DefaultRegion)
if region != amazonAWS {
s3u.Region = String(region)
}
}
// Query string used when requesting a particular version of a given
// S3 object (key).
if s := u.Query().Get(versionID); s != "" {
s3u.VersionID = String(s)
}
// Apply options that serve as overrides after the initial parsing
// is completed. This allows for bucket name, key, version ID, etc.,
// to be overridden at the parsing stage.
for _, o := range s3u.options {
o(s3u)
}
// Remove trailing slash from the key name, so that the "key/" will
// become "key" and similarly "a/complex/key/" will simply become
// "a/complex/key" afer being normalized.
if BoolValue(s3u.normalize) && s3u.Key != nil {
k := StringValue(s3u.Key)
if k[len(k)-1] == '/' {
k = k[:len(k)-1]
}
s3u.Key = String(k)
}
return s3u, nil
}
// Reset fields in the S3URI type, and set boolean values to false.
func reset(s3u *S3URI) *S3URI {
*s3u = S3URI{
HostStyle: Bool(false),
PathStyle: Bool(false),
Accelerated: Bool(false),
DualStack: Bool(false),
Website: Bool(false),
}
return s3u
}
func String(s string) *string {
return &s
}
func Bool(b bool) *bool {
return &b
}
func StringValue(s *string) string {
if s != nil {
return *s
}
return ""
}
func BoolValue(b *bool) bool {
if b != nil {
return *b
}
return false
}
func main() {
s3u := NewS3URI()
// fmt.Println(s3u.ParseString("s3://test123"))
// fmt.Println(s3u.URI())
// spew.Dump(s3u)
// s3u.Bucket = String("test")
// fmt.Println(s3u.URI().String())
// spew.Dump(s3u)
fmt.Println(s3u.ParseString("s3://test123/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("s3://test123/key456"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("s3://test123/key456/"))
spew.Dump(s3u)
// fmt.Println(s3u.ParseString("https://s3.amazonaws.com/test123"))
// fmt.Println(s3u.ParseString("https://s3.amazonaws.com/test123/"))
// fmt.Println(s3u.ParseString("https://s3.amazonaws.com/test123/key456"))
// fmt.Println(s3u.ParseString("https://s3.amazonaws.com/test123/key456/"))
fmt.Println(s3u.ParseString("https://s3-eu-west-1.amazonaws.com/test123/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://s3.eu-west-1.amazonaws.com/test123/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://s3.dualstack.eu-west-1.amazonaws.com/test123/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://test123.s3-website-eu-west-1.amazonaws.com/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://test123.s3-accelerated.amazonaws.com/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://test123.s3-accelerated.dualstack.amazonaws.com/key456/"))
spew.Dump(s3u)
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/"))
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/key456"))
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/key456"))
// fmt.Println(s3u.ParseString("https://google.com")) // invalid S3 endpoint
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/key456?versionId=123456&x=1&y=2&y=3;z"))
// fmt.Println(*s3u.Bucket, *s3u.Key, *s3u.Region, *s3u.PathStyle, *s3u.VersionID)
// fmt.Println(s3u.URI().Scheme)
// fmt.Println(s3u.ParseString("https://s3-eu-west-1.amazonaws.com/test123/key456?t=this+is+a+simple+%26+short+test."))
// u, _ := url.Parse("s3://test123/key456")
// fmt.Println(s3u.Parse(u))
// fmt.Println(MustParse(s3u.ParseString("s3://test123/key456")))
// // Will panic: no hostname
// // fmt.Println(MustParse(s3u.ParseString("")))
// s3u = NewS3URI(
// WithRegion("eu-west-1"),
// WithVersionID("12341234"),
// WithNormalizedKey(true),
// )
// spew.Dump(s3u.URI())
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/key456/?versionId=123456&x=1&y=2&y=3;z"))
// fmt.Println(*s3u.Bucket, *s3u.Key, *s3u.Region, *s3u.PathStyle, *s3u.VersionID)
// fmt.Println(s3u.URI().Scheme)
// spew.Dump(s3u.URI())
fmt.Println(Validate("https://test123.s3-accelerated.dualstack.amazonaws.com/key456/"))
fmt.Println(Validate("ftp://google.com/"))
fmt.Println(ParseString("ftp://google.com/"))
}
&{0x432c00 [] <nil> 0x40e194 0x40e195 0x40e196 0x40e197 0x40e198 0x40c1e8 0x40c1f0 <nil> <nil> 0x40c1f8} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432c00)(s3://test123/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40e194)(false),
PathStyle: (*bool)(0x40e195)(false),
Accelerated: (*bool)(0x40e196)(false),
DualStack: (*bool)(0x40e197)(false),
Website: (*bool)(0x40e198)(false),
Scheme: (*string)(0x40c1e8)((len=2) "s3"),
Bucket: (*string)(0x40c1f0)((len=7) "test123"),
Key: (*string)(<nil>),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c1f8)((len=9) "us-east-1")
})
&{0x432c80 [] <nil> 0x40e48d 0x40e48e 0x40e48f 0x40e4a0 0x40e4a1 0x40c230 0x40c238 0x40c240 <nil> 0x40c248} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432c80)(s3://test123/key456),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40e48d)(false),
PathStyle: (*bool)(0x40e48e)(false),
Accelerated: (*bool)(0x40e48f)(false),
DualStack: (*bool)(0x40e4a0)(false),
Website: (*bool)(0x40e4a1)(false),
Scheme: (*string)(0x40c230)((len=2) "s3"),
Bucket: (*string)(0x40c238)((len=7) "test123"),
Key: (*string)(0x40c240)((len=6) "key456"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c248)((len=9) "us-east-1")
})
&{0x432d00 [] <nil> 0x40e75d 0x40e75e 0x40e75f 0x40e770 0x40e771 0x40c260 0x40c268 0x40c270 <nil> 0x40c278} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432d00)(s3://test123/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40e75d)(false),
PathStyle: (*bool)(0x40e75e)(false),
Accelerated: (*bool)(0x40e75f)(false),
DualStack: (*bool)(0x40e770)(false),
Website: (*bool)(0x40e771)(false),
Scheme: (*string)(0x40c260)((len=2) "s3"),
Bucket: (*string)(0x40c268)((len=7) "test123"),
Key: (*string)(0x40c270)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c278)((len=9) "us-east-1")
})
&{0x432d80 [] <nil> 0x40ea28 0x40ea2d 0x40ea2a 0x40ea2b 0x40ea2c 0x40c288 0x40c290 0x40c298 <nil> 0x40c2a8} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432d80)(https://s3-eu-west-1.amazonaws.com/test123/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40ea28)(false),
PathStyle: (*bool)(0x40ea2d)(true),
Accelerated: (*bool)(0x40ea2a)(false),
DualStack: (*bool)(0x40ea2b)(false),
Website: (*bool)(0x40ea2c)(false),
Scheme: (*string)(0x40c288)((len=5) "https"),
Bucket: (*string)(0x40c290)((len=7) "test123"),
Key: (*string)(0x40c298)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c2a8)((len=9) "eu-west-1")
})
&{0x432e00 [] <nil> 0x40ece8 0x40eced 0x40ecea 0x40eceb 0x40ecec 0x40c2b8 0x40c2c0 0x40c2c8 <nil> 0x40c2d8} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432e00)(https://s3.eu-west-1.amazonaws.com/test123/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40ece8)(false),
PathStyle: (*bool)(0x40eced)(true),
Accelerated: (*bool)(0x40ecea)(false),
DualStack: (*bool)(0x40eceb)(false),
Website: (*bool)(0x40ecec)(false),
Scheme: (*string)(0x40c2b8)((len=5) "https"),
Bucket: (*string)(0x40c2c0)((len=7) "test123"),
Key: (*string)(0x40c2c8)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c2d8)((len=9) "eu-west-1")
})
&{0x432e80 [] <nil> 0x40efa8 0x40efad 0x40efaa 0x40efae 0x40efac 0x40c2e8 0x40c2f0 0x40c2f8 <nil> 0x40c308} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432e80)(https://s3.dualstack.eu-west-1.amazonaws.com/test123/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40efa8)(false),
PathStyle: (*bool)(0x40efad)(true),
Accelerated: (*bool)(0x40efaa)(false),
DualStack: (*bool)(0x40efae)(true),
Website: (*bool)(0x40efac)(false),
Scheme: (*string)(0x40c2e8)((len=5) "https"),
Bucket: (*string)(0x40c2f0)((len=7) "test123"),
Key: (*string)(0x40c2f8)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c308)((len=9) "eu-west-1")
})
&{0x432f00 [] <nil> 0x40f26d 0x40f269 0x40f26a 0x40f26b 0x40f26e 0x40c318 0x40c320 0x40c328 <nil> 0x40c338} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432f00)(https://test123.s3-website-eu-west-1.amazonaws.com/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40f26d)(true),
PathStyle: (*bool)(0x40f269)(false),
Accelerated: (*bool)(0x40f26a)(false),
DualStack: (*bool)(0x40f26b)(false),
Website: (*bool)(0x40f26e)(true),
Scheme: (*string)(0x40c318)((len=5) "https"),
Bucket: (*string)(0x40c320)((len=7) "test123"),
Key: (*string)(0x40c328)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c338)((len=9) "eu-west-1")
})
&{0x432f80 [] <nil> 0x40f52d 0x40f529 0x40f52e 0x40f52b 0x40f52c 0x40c348 0x40c350 0x40c358 <nil> <nil>} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432f80)(https://test123.s3-accelerated.amazonaws.com/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40f52d)(true),
PathStyle: (*bool)(0x40f529)(false),
Accelerated: (*bool)(0x40f52e)(true),
DualStack: (*bool)(0x40f52b)(false),
Website: (*bool)(0x40f52c)(false),
Scheme: (*string)(0x40c348)((len=5) "https"),
Bucket: (*string)(0x40c350)((len=7) "test123"),
Key: (*string)(0x40c358)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(<nil>)
})
&{0x433000 [] <nil> 0x40f7d5 0x40f7d1 0x40f7d6 0x40f7d7 0x40f7d4 0x40c368 0x40c370 0x40c378 <nil> <nil>} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x433000)(https://test123.s3-accelerated.dualstack.amazonaws.com/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40f7d5)(true),
PathStyle: (*bool)(0x40f7d1)(false),
Accelerated: (*bool)(0x40f7d6)(true),
DualStack: (*bool)(0x40f7d7)(true),
Website: (*bool)(0x40f7d4)(false),
Scheme: (*string)(0x40c368)((len=5) "https"),
Bucket: (*string)(0x40c370)((len=7) "test123"),
Key: (*string)(0x40c378)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(<nil>)
})
true
false
<nil> unable to parse schema type: ftp
@kwilczynski
Copy link
Author

kwilczynski commented Dec 9, 2019

@benpate
Copy link

benpate commented Aug 17, 2022

Hey, I found my way to this old code, and it looks like it solves a big problem I'm facing. Thank you for posting! Is it OK to use this as the starting point for a small Go module? Or, is there a better way that someone has already published?

@kwilczynski
Copy link
Author

Hi @benpate, welcome to this ancient Gist!

Hey, I found my way to this old code, and it looks like it solves a big problem I'm facing. Thank you for posting! Is it OK to use this as the starting point for a small Go module?

Of course. Feel free to use it anyway you like. I hope that this actually works, especially as I put it together in one go to illustrate how to deal with this to a friend of mine.

There might be... bugs? 😄

Or, is there a better way that someone has already published?

I am not sure, to be honest. I have had to look for anything like that recently - and back then there wasn't anything, hence the example here.

A small background of why this came to be: a friend of mine needed to handle S3 Bucket both name (key, rather) creation as well as parsing of existing buckets in something he was working on.

He complained that there isn't anything like it in the Go SDK, and so then I put this together based on the the approach in the Java SDK that he found (see link above).

@benpate
Copy link

benpate commented Aug 22, 2022

Awesome. Thank you. If I end up with anything interesting, I'll try to link back to it here :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment