Skip to content

Instantly share code, notes, and snippets.

@kennykarnama
Forked from kwilczynski/buckets.txt
Created June 23, 2021 13:10
Show Gist options
  • Save kennykarnama/6d15490b9505c6ce4a5dca4e0d7fd15b to your computer and use it in GitHub Desktop.
Save kennykarnama/6d15490b9505c6ce4a5dca4e0d7fd15b to your computer and use it in GitHub Desktop.
Parse S3 URL into bucket, key, region, version ID, etc. Useful when using AWS SDK for Go.
http://s3.amazonaws.com/bucket/key (for a bucket created in the US East (N. Virginia) region)
https://s3.amazonaws.com/bucket/key
http://s3-region.amazonaws.com/bucket/key
https://s3-region.amazonaws.com/bucket/key
http://s3.region.amazonaws.com/bucket/key
https://s3.region.amazonaws.com/bucket/key
http://s3.dualstack.region.amazonaws.com/bucket/key (for requests using IPv4 or IPv6)
https://s3.dualstack.region.amazonaws.com/bucket/key
http://bucket.s3.amazonaws.com/key
http://bucket.s3-region.amazonaws.com/key
http://bucket.s3.region.amazonaws.com/key
http://bucket.s3.dualstack.region.amazonaws.com/key (for requests using IPv4 or IPv6)
http://bucket.s3-website-region.amazonaws.com/key (if static website hosting is enabled on the bucket)
http://bucket.s3-website.region.amazonaws.com/key (if static website hosting is enabled on the bucket)
http://bucket.s3-accelerated.amazonaws.com/key (where the filetransfer exits Amazons network at the last possible moment so as to give the fastest possible transfer speed and lowest latency)
http://bucket.s3-accelerated.dualstack.amazonaws.com/key
http://bucket/key (where bucket is a DNS CNAME record pointing to bucket.s3.amazonaws.com)
Host-Style Naming: http://bucket.s3-region.amazonaws.com
Path-Style Naming: http://s3-region.amazonaws.com/bucket
package main
import (
"errors"
"fmt"
"net/url"
"regexp"
"strings"
"github.com/davecgh/go-spew/spew"
)
// DefaultRegion contains a default region for an S3 bucket, when a region
// cannot be determined, for example when the s3:// schema is used or when
// path style URL has been given without the region component in the
// fully-qualified domain name.
const DefaultRegion = "us-east-1"
var (
ErrBucketNotFound = errors.New("bucket name could not be found")
ErrHostnameNotFound = errors.New("hostname could not be found")
ErrInvalidS3Endpoint = errors.New("an invalid S3 endpoint URL")
// Pattern used to parse multiple path and host style S3 endpoint URLs.
s3URLPattern = regexp.MustCompile(`^(.+\.)?s3[.-](?:(accelerated|dualstack|website)[.-])?([a-z0-9-]+)\.`)
)
type S3URIOpt func(*S3URI)
func WithScheme(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.Scheme = String(s)
}
}
func WithBucket(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.Bucket = String(s)
}
}
func WithKey(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.Key = String(s)
}
}
func WithVersionID(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.VersionID = String(s)
}
}
func WithRegion(s string) S3URIOpt {
return func(s3u *S3URI) {
s3u.Region = String(s)
}
}
func WithNormalizedKey(b bool) S3URIOpt {
return func(s3u *S3URI) {
s3u.normalize = Bool(b)
}
}
type S3URI struct {
uri *url.URL
options []S3URIOpt
normalize *bool
HostStyle *bool
PathStyle *bool
Accelerated *bool
DualStack *bool
Website *bool
Scheme *string
Bucket *string
Key *string
VersionID *string
Region *string
}
func NewS3URI(opts ...S3URIOpt) *S3URI {
return &S3URI{options: opts}
}
func (s3u *S3URI) Reset() *S3URI {
return reset(s3u)
}
func (s3u *S3URI) Parse(v interface{}) (*S3URI, error) {
return parse(s3u, v)
}
func (s3u *S3URI) ParseURL(u *url.URL) (*S3URI, error) {
return parse(s3u, u)
}
func (s3u *S3URI) ParseString(s string) (*S3URI, error) {
return parse(s3u, s)
}
func (s3u *S3URI) URI() *url.URL {
return s3u.uri
}
func Parse(v interface{}) (*S3URI, error) {
return NewS3URI().Parse(v)
}
func ParseURL(u *url.URL) (*S3URI, error) {
return NewS3URI().ParseURL(u)
}
func ParseString(s string) (*S3URI, error) {
return NewS3URI().ParseString(s)
}
func MustParse(s3u *S3URI, err error) *S3URI {
if err != nil {
panic(err)
}
return s3u
}
func Validate(v interface{}) bool {
_, err := NewS3URI().Parse(v)
return err == nil
}
func ValidateURL(u *url.URL) bool {
_, err := NewS3URI().Parse(u)
return err == nil
}
func ValidateString(s string) bool {
_, err := NewS3URI().Parse(s)
return err == nil
}
func parse(s3u *S3URI, s interface{}) (*S3URI, error) {
var (
u *url.URL
err error
)
switch s := s.(type) {
case string:
u, err = url.Parse(s)
case *url.URL:
u = s
default:
return nil, fmt.Errorf("unable to parse unknown type: %T", s)
}
if err != nil {
return nil, fmt.Errorf("unable to parse given S3 endpoint URL: %w", err)
}
reset(s3u)
s3u.uri = u
switch u.Scheme {
case "s3", "http", "https":
s3u.Scheme = String(u.Scheme)
default:
return nil, fmt.Errorf("unable to parse schema type: %s", u.Scheme)
}
// Handle S3 endpoint URL with the schema s3:// that is neither
// the host style nor the path style.
if u.Scheme == "s3" {
if u.Host == "" {
return nil, ErrBucketNotFound
}
s3u.Bucket = String(u.Host)
if u.Path != "" && u.Path != "/" {
s3u.Key = String(u.Path[1:len(u.Path)])
}
s3u.Region = String(DefaultRegion)
return s3u, nil
}
if u.Host == "" {
return nil, ErrHostnameNotFound
}
matches := s3URLPattern.FindStringSubmatch(u.Host)
if matches == nil || len(matches) < 1 {
return nil, ErrInvalidS3Endpoint
}
prefix := matches[1]
usage := matches[2] // Type of the S3 bucket.
region := matches[3]
if prefix == "" {
s3u.PathStyle = Bool(true)
if u.Path != "" && u.Path != "/" {
u.Path = u.Path[1:len(u.Path)]
index := strings.Index(u.Path, "/")
switch {
case index == -1:
s3u.Bucket = String(u.Path)
case index == len(u.Path)-1:
s3u.Bucket = String(u.Path[:index])
default:
s3u.Bucket = String(u.Path[:index])
s3u.Key = String(u.Path[index+1:])
}
}
} else {
s3u.HostStyle = Bool(true)
s3u.Bucket = String(prefix[:len(prefix)-1])
if u.Path != "" && u.Path != "/" {
s3u.Key = String(u.Path[1:len(u.Path)])
}
}
const (
// Used to denote type of the S3 bucket.
accelerated = "accelerated"
dualStack = "dualstack"
website = "website"
// Part of the amazonaws.com domain name. Set when no region
// could be ascertain correctly using the S3 endpoint URL.
amazonAWS = "amazonaws"
// Part of the query parameters. Used when retrieving S3
// object (key) of a particular version.
versionID = "versionId"
)
// An S3 bucket can be either accelerated or website endpoint,
// but not both.
if usage == accelerated {
s3u.Accelerated = Bool(true)
} else if usage == website {
s3u.Website = Bool(true)
}
// An accelerated S3 bucket can also be dualstack.
if usage == dualStack || region == dualStack {
s3u.DualStack = Bool(true)
}
// Handle the special case of an accelerated dualstack S3
// endpoint URL:
// <BUCKET>.s3-accelerated.dualstack.amazonaws.com/<KEY>.
// As there is no way to accertain the region solely based on
// the S3 endpoint URL.
if usage != accelerated {
s3u.Region = String(DefaultRegion)
if region != amazonAWS {
s3u.Region = String(region)
}
}
// Query string used when requesting a particular version of a given
// S3 object (key).
if s := u.Query().Get(versionID); s != "" {
s3u.VersionID = String(s)
}
// Apply options that serve as overrides after the initial parsing
// is completed. This allows for bucket name, key, version ID, etc.,
// to be overridden at the parsing stage.
for _, o := range s3u.options {
o(s3u)
}
// Remove trailing slash from the key name, so that the "key/" will
// become "key" and similarly "a/complex/key/" will simply become
// "a/complex/key" afer being normalized.
if BoolValue(s3u.normalize) && s3u.Key != nil {
k := StringValue(s3u.Key)
if k[len(k)-1] == '/' {
k = k[:len(k)-1]
}
s3u.Key = String(k)
}
return s3u, nil
}
// Reset fields in the S3URI type, and set boolean values to false.
func reset(s3u *S3URI) *S3URI {
*s3u = S3URI{
HostStyle: Bool(false),
PathStyle: Bool(false),
Accelerated: Bool(false),
DualStack: Bool(false),
Website: Bool(false),
}
return s3u
}
func String(s string) *string {
return &s
}
func Bool(b bool) *bool {
return &b
}
func StringValue(s *string) string {
if s != nil {
return *s
}
return ""
}
func BoolValue(b *bool) bool {
if b != nil {
return *b
}
return false
}
func main() {
s3u := NewS3URI()
// fmt.Println(s3u.ParseString("s3://test123"))
// fmt.Println(s3u.URI())
// spew.Dump(s3u)
// s3u.Bucket = String("test")
// fmt.Println(s3u.URI().String())
// spew.Dump(s3u)
fmt.Println(s3u.ParseString("s3://test123/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("s3://test123/key456"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("s3://test123/key456/"))
spew.Dump(s3u)
// fmt.Println(s3u.ParseString("https://s3.amazonaws.com/test123"))
// fmt.Println(s3u.ParseString("https://s3.amazonaws.com/test123/"))
// fmt.Println(s3u.ParseString("https://s3.amazonaws.com/test123/key456"))
// fmt.Println(s3u.ParseString("https://s3.amazonaws.com/test123/key456/"))
fmt.Println(s3u.ParseString("https://s3-eu-west-1.amazonaws.com/test123/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://s3.eu-west-1.amazonaws.com/test123/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://s3.dualstack.eu-west-1.amazonaws.com/test123/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://test123.s3-website-eu-west-1.amazonaws.com/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://test123.s3-accelerated.amazonaws.com/key456/"))
spew.Dump(s3u)
fmt.Println(s3u.ParseString("https://test123.s3-accelerated.dualstack.amazonaws.com/key456/"))
spew.Dump(s3u)
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/"))
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/key456"))
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/key456"))
// fmt.Println(s3u.ParseString("https://google.com")) // invalid S3 endpoint
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/key456?versionId=123456&x=1&y=2&y=3;z"))
// fmt.Println(*s3u.Bucket, *s3u.Key, *s3u.Region, *s3u.PathStyle, *s3u.VersionID)
// fmt.Println(s3u.URI().Scheme)
// fmt.Println(s3u.ParseString("https://s3-eu-west-1.amazonaws.com/test123/key456?t=this+is+a+simple+%26+short+test."))
// u, _ := url.Parse("s3://test123/key456")
// fmt.Println(s3u.Parse(u))
// fmt.Println(MustParse(s3u.ParseString("s3://test123/key456")))
// // Will panic: no hostname
// // fmt.Println(MustParse(s3u.ParseString("")))
// s3u = NewS3URI(
// WithRegion("eu-west-1"),
// WithVersionID("12341234"),
// WithNormalizedKey(true),
// )
// spew.Dump(s3u.URI())
// fmt.Println(s3u.ParseString("https://test123.s3.amazonaws.com/key456/?versionId=123456&x=1&y=2&y=3;z"))
// fmt.Println(*s3u.Bucket, *s3u.Key, *s3u.Region, *s3u.PathStyle, *s3u.VersionID)
// fmt.Println(s3u.URI().Scheme)
// spew.Dump(s3u.URI())
fmt.Println(Validate("https://test123.s3-accelerated.dualstack.amazonaws.com/key456/"))
fmt.Println(Validate("ftp://google.com/"))
fmt.Println(ParseString("ftp://google.com/"))
}
&{0x432c00 [] <nil> 0x40e194 0x40e195 0x40e196 0x40e197 0x40e198 0x40c1e8 0x40c1f0 <nil> <nil> 0x40c1f8} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432c00)(s3://test123/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40e194)(false),
PathStyle: (*bool)(0x40e195)(false),
Accelerated: (*bool)(0x40e196)(false),
DualStack: (*bool)(0x40e197)(false),
Website: (*bool)(0x40e198)(false),
Scheme: (*string)(0x40c1e8)((len=2) "s3"),
Bucket: (*string)(0x40c1f0)((len=7) "test123"),
Key: (*string)(<nil>),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c1f8)((len=9) "us-east-1")
})
&{0x432c80 [] <nil> 0x40e48d 0x40e48e 0x40e48f 0x40e4a0 0x40e4a1 0x40c230 0x40c238 0x40c240 <nil> 0x40c248} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432c80)(s3://test123/key456),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40e48d)(false),
PathStyle: (*bool)(0x40e48e)(false),
Accelerated: (*bool)(0x40e48f)(false),
DualStack: (*bool)(0x40e4a0)(false),
Website: (*bool)(0x40e4a1)(false),
Scheme: (*string)(0x40c230)((len=2) "s3"),
Bucket: (*string)(0x40c238)((len=7) "test123"),
Key: (*string)(0x40c240)((len=6) "key456"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c248)((len=9) "us-east-1")
})
&{0x432d00 [] <nil> 0x40e75d 0x40e75e 0x40e75f 0x40e770 0x40e771 0x40c260 0x40c268 0x40c270 <nil> 0x40c278} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432d00)(s3://test123/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40e75d)(false),
PathStyle: (*bool)(0x40e75e)(false),
Accelerated: (*bool)(0x40e75f)(false),
DualStack: (*bool)(0x40e770)(false),
Website: (*bool)(0x40e771)(false),
Scheme: (*string)(0x40c260)((len=2) "s3"),
Bucket: (*string)(0x40c268)((len=7) "test123"),
Key: (*string)(0x40c270)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c278)((len=9) "us-east-1")
})
&{0x432d80 [] <nil> 0x40ea28 0x40ea2d 0x40ea2a 0x40ea2b 0x40ea2c 0x40c288 0x40c290 0x40c298 <nil> 0x40c2a8} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432d80)(https://s3-eu-west-1.amazonaws.com/test123/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40ea28)(false),
PathStyle: (*bool)(0x40ea2d)(true),
Accelerated: (*bool)(0x40ea2a)(false),
DualStack: (*bool)(0x40ea2b)(false),
Website: (*bool)(0x40ea2c)(false),
Scheme: (*string)(0x40c288)((len=5) "https"),
Bucket: (*string)(0x40c290)((len=7) "test123"),
Key: (*string)(0x40c298)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c2a8)((len=9) "eu-west-1")
})
&{0x432e00 [] <nil> 0x40ece8 0x40eced 0x40ecea 0x40eceb 0x40ecec 0x40c2b8 0x40c2c0 0x40c2c8 <nil> 0x40c2d8} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432e00)(https://s3.eu-west-1.amazonaws.com/test123/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40ece8)(false),
PathStyle: (*bool)(0x40eced)(true),
Accelerated: (*bool)(0x40ecea)(false),
DualStack: (*bool)(0x40eceb)(false),
Website: (*bool)(0x40ecec)(false),
Scheme: (*string)(0x40c2b8)((len=5) "https"),
Bucket: (*string)(0x40c2c0)((len=7) "test123"),
Key: (*string)(0x40c2c8)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c2d8)((len=9) "eu-west-1")
})
&{0x432e80 [] <nil> 0x40efa8 0x40efad 0x40efaa 0x40efae 0x40efac 0x40c2e8 0x40c2f0 0x40c2f8 <nil> 0x40c308} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432e80)(https://s3.dualstack.eu-west-1.amazonaws.com/test123/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40efa8)(false),
PathStyle: (*bool)(0x40efad)(true),
Accelerated: (*bool)(0x40efaa)(false),
DualStack: (*bool)(0x40efae)(true),
Website: (*bool)(0x40efac)(false),
Scheme: (*string)(0x40c2e8)((len=5) "https"),
Bucket: (*string)(0x40c2f0)((len=7) "test123"),
Key: (*string)(0x40c2f8)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c308)((len=9) "eu-west-1")
})
&{0x432f00 [] <nil> 0x40f26d 0x40f269 0x40f26a 0x40f26b 0x40f26e 0x40c318 0x40c320 0x40c328 <nil> 0x40c338} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432f00)(https://test123.s3-website-eu-west-1.amazonaws.com/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40f26d)(true),
PathStyle: (*bool)(0x40f269)(false),
Accelerated: (*bool)(0x40f26a)(false),
DualStack: (*bool)(0x40f26b)(false),
Website: (*bool)(0x40f26e)(true),
Scheme: (*string)(0x40c318)((len=5) "https"),
Bucket: (*string)(0x40c320)((len=7) "test123"),
Key: (*string)(0x40c328)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(0x40c338)((len=9) "eu-west-1")
})
&{0x432f80 [] <nil> 0x40f52d 0x40f529 0x40f52e 0x40f52b 0x40f52c 0x40c348 0x40c350 0x40c358 <nil> <nil>} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x432f80)(https://test123.s3-accelerated.amazonaws.com/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40f52d)(true),
PathStyle: (*bool)(0x40f529)(false),
Accelerated: (*bool)(0x40f52e)(true),
DualStack: (*bool)(0x40f52b)(false),
Website: (*bool)(0x40f52c)(false),
Scheme: (*string)(0x40c348)((len=5) "https"),
Bucket: (*string)(0x40c350)((len=7) "test123"),
Key: (*string)(0x40c358)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(<nil>)
})
&{0x433000 [] <nil> 0x40f7d5 0x40f7d1 0x40f7d6 0x40f7d7 0x40f7d4 0x40c368 0x40c370 0x40c378 <nil> <nil>} <nil>
(*main.S3URI)(0x432bc0)({
uri: (*url.URL)(0x433000)(https://test123.s3-accelerated.dualstack.amazonaws.com/key456/),
options: ([]main.S3URIOpt) <nil>,
normalize: (*bool)(<nil>),
HostStyle: (*bool)(0x40f7d5)(true),
PathStyle: (*bool)(0x40f7d1)(false),
Accelerated: (*bool)(0x40f7d6)(true),
DualStack: (*bool)(0x40f7d7)(true),
Website: (*bool)(0x40f7d4)(false),
Scheme: (*string)(0x40c368)((len=5) "https"),
Bucket: (*string)(0x40c370)((len=7) "test123"),
Key: (*string)(0x40c378)((len=7) "key456/"),
VersionID: (*string)(<nil>),
Region: (*string)(<nil>)
})
true
false
<nil> unable to parse schema type: ftp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment