Last active
January 14, 2021 02:31
-
-
Save dnafication/097dedf048abdc1c92a8c1f890ced2f7 to your computer and use it in GitHub Desktop.
Iterator pattern for s3 objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as AWS from 'aws-sdk'; | |
import {GetObjectRequest, ListObjectsV2Request} from 'aws-sdk/clients/s3'; | |
// basic aws configuration stuff | |
AWS.config.update({ | |
region: 'ap-southeast-2', | |
}); | |
// using this interface to store all the keys | |
interface S3Key { | |
key: string; | |
etag: string; | |
} | |
class S3FileIterator { | |
private s3 = new AWS.S3({apiVersion: '2006-03-01'}); | |
maxKeys = 1000; // Sets the maximum number of keys returned in the response. By default the API returns up to 1,000 key names. | |
bucketName: string; | |
prefix: string; | |
keysList: S3Key[] = []; // state to store list of keys that came back for the object key prefix | |
constructor(bucketName: string, prefix: string) { | |
this.bucketName = bucketName; | |
this.prefix = prefix; | |
} | |
/** | |
* This loads all the keys matching the prefix | |
* Should be called after object is created and before iterating | |
*/ | |
async loadKeys() { | |
let listObjectParams: ListObjectsV2Request = { | |
Bucket: this.bucketName, | |
MaxKeys: this.maxKeys, | |
Prefix: this.prefix, | |
}; | |
let isTruncated: boolean | undefined = true; | |
let continuationToken: string | undefined = ''; | |
while (isTruncated) { | |
if (continuationToken) { | |
listObjectParams = { | |
...listObjectParams, | |
ContinuationToken: continuationToken, | |
}; | |
} | |
const data = await this.s3.listObjectsV2(listObjectParams).promise(); | |
if (data.Contents) { | |
data.Contents.forEach(item => { | |
if (item.Key && item.ETag) { | |
this.keysList.push({etag: item.ETag, key: item.Key}); | |
} | |
}); | |
isTruncated = data.IsTruncated; | |
continuationToken = data.NextContinuationToken; | |
} | |
} | |
return this.keysList; | |
} | |
/** | |
* The Symbol.asyncIterator well-known symbol specifies the default AsyncIterator | |
* for an object. If this property is set on an object, it is | |
* an async iterable and can be used in a for await...of loop. | |
*/ | |
[Symbol.asyncIterator]() { | |
let index = 0; | |
return { | |
next: async () => { | |
if (index < this.keysList.length) { | |
const getObjectParams: GetObjectRequest = { | |
Bucket: this.bucketName, | |
Key: this.keysList[index].key, | |
}; | |
const object = await this.s3.getObject(getObjectParams).promise(); | |
index += 1; | |
return { | |
value: object, | |
done: false, | |
}; | |
} | |
return { | |
done: true, | |
}; | |
}, | |
}; | |
} | |
} | |
// instantiate the object | |
const myS3Files = new S3FileIterator('bucket-name', 'filePrefix'); | |
async function main() { | |
// load all keys | |
// TODO: load only as required (anyone got good ideas?) | |
await myS3Files.loadKeys(); | |
// best part: iterate through the filtered array of s3 objects | |
for await (const s3File of myS3Files) { | |
console.log(s3File?.ETag); | |
} | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment