Skip to content

Instantly share code, notes, and snippets.

Last active May 14, 2022 23:15
Show Gist options
  • Save pfeilbr/24e18abf1ae051a90589ae16488cadb8 to your computer and use it in GitHub Desktop.
Save pfeilbr/24e18abf1ae051a90589ae16488cadb8 to your computer and use it in GitHub Desktop.
fetch all aws directory api metadata (arch diagrams, products, blog posts, builders library articles, etc.)
// fetch all aws directory api metadata
(async () => {
const directories = [
"directoryId": "event-content"
"directoryId": "amazon-redwood"
"directoryId": "aws-products"
"directoryId": "free-tier-products"
"directoryId": "blog-posts"
"directoryId": "whats-new"
"directoryId": "security-bulletins"
"directoryId": "media-resources"
const l = (o) => {
console.log(JSON.stringify(o, null, 2))
const sleep = ms => {
return new Promise(resolve => setTimeout(resolve, ms))
const fetchJSON = async (url) => {
const resp = await fetch(url)
const data = await resp.json();
return data;
const fetchDirectoryMetadata = async (directoryId) => {
const metadataURL = `${directoryId}&item.locale=en_US`;
const data = await fetchJSON(metadataURL)
return data;
const fetchDirectoryContent = async (directoryId, metadata) => {
const urlTemplate = `${directoryId}&size=${metadata.metadata.count}&item.locale=en_US&page=`;
const pageIndexes = Array.from(Array(metadata.metadata.pageCount).keys())
const pages = [];
for (const pageIndex of pageIndexes) {
const url = urlTemplate + `${pageIndex}`
const data = await fetchJSON(url);
if (data.items.length > 0) {
} else if (pageIndex < pageIndexes[pageIndexes.length-1]) { // appears max page is 1000. this is based on "directoryId=blog-posts,action=break,pageIndex=1000,pageIndexes.length=2077""
return pages;
const fetchDirectory = async (directoryId) => {
const metadata = await fetchDirectoryMetadata(directoryId)
const pageCount = Math.ceil(metadata.metadata.totalHits / metadata.metadata.count);
metadata.metadata.pageCount = pageCount;
const data = await fetchDirectoryContent(directoryId, metadata);
return data;
const saveFile = async () => {
// create a new handle
const newHandle = await window.showSaveFilePicker();
// create a FileSystemWritableFileStream to write to
const writableStream = await newHandle.createWritable();
const obj = {hello: 'world'};
const blob = new Blob([JSON.stringify(obj, null, 2)], {type : 'application/json'});
// write our file
await writableStream.write(blob);
// close the file and write the contents to disk.
await writableStream.close();
const main = async () => {
//await saveFile();
try {
for (const directory of directories.slice(0,1)) {
const data = await fetchDirectory(directory.directoryId)
const totalItems = data.reduce((previous, current) => {
return previous + current.items.length;
}, 0)
//await sleep(1000)
} catch (e) {
await main();
var params = {
Bucket: 'STRING_VALUE', /* required */
ContinuationToken: 'STRING_VALUE',
Delimiter: 'STRING_VALUE',
EncodingType: url,
ExpectedBucketOwner: 'STRING_VALUE',
FetchOwner: true || false,
RequestPayer: requester,
StartAfter: 'STRING_VALUE'
s3.listObjectsV2(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
const oldMain = async () => {
const baseURL = ``
let page = 0;
let more = true;
let items = [];
while (more) {
const resp = await fetch(baseURL + `${page}`)
const data = await resp.json();
more = data.metadata.count > 0;
if (more) {
const output = JSON.stringify(items, null, 2);
* all blogs (<>)
* template URL - ""
* all events content (<>) page=0...N
* template URL ""
* Builders Library -
* whats new, item.directoryId=whats-new
* item.directoryId=security-bulletins
* item.directoryId=aws-products
* item.directoryId=blog-posts
* item.directoryId=media-resources
* item.directoryId=free-tier-products
* use <> for searching
* step fn processing logic - need to figure out how to not download everything each run. way to download only new or changed items since last run
* define work by getting metadata for number results via https://...?item.directoryId=${directoryId}&item.locale=en_US&page=0. generate singe sqs message for each unique URL
* use returned `metadata.count` for `size` query string parameter
* &sort_by=item.[dateCreated|dateUpdated]&sort_order=desc
* lambda subscription to SQS. process sequencially. set batch size to >1 initially to see if throttling. can always set batch size to 1
# source: <>
# One-liner to retrieve a list of all AWS products from sorted by product category (requires jq). Worked as of 2022-01-03. Page format tends to change a lot...
curl --silent --compressed \
'' \
| jq -r '.items[].item | .additionalFields.productCategory + " | " + .additionalFields.productName' \
| sort
* <> - good refernce project that "Downloads AWS documents, currently whitepapers, from AWS documentation website."
* <> - An AWS CDK application that sends AWS new service and feature release announcements to a Slack channel of your choice
querying various content types in the directory by querystring value
"contentType": "AWS Solution",
"contentType": "Pattern",
"contentType": "Reference Architecture Diagram",
"contentType": "Guide",
"contentType": multi-valued,
"contentType": "Whitepaper",
-- general response shape
"items": [],
"metadata": {
"count": 0,
"totalHits": 299
"fieldTypes": {
"updateDate": "Date",
"imageSrcUrl": "URL",
"featureFlag": "Text",
"description": "LongText",
"sortDate": "Date",
"docTitle": "Text",
"primaryURL": "URL",
"datePublished": "Date",
"publishedText": "Text",
"footerInfoSubtext": "Text",
"subHeadline": "Text",
"enableShare": "Boolean",
"category": "Text",
"contentType": "Text"
--- example item (items[0].item)
"item": {
"id": "whitepapers#image-moderation-chatbot",
"locale": "en_US",
"directoryId": "whitepapers",
"name": "image-moderation-chatbot",
"author": "julicoll",
"createdBy": "julicoll",
"lastUpdatedBy": "julicoll",
"numImpressions": 0,
"score": 0,
"dateCreated": "2019-06-25T17:21:57+0000",
"dateUpdated": "2021-07-29T17:01:46+0000",
"additionalFields": {
"datePublished": "2018-12-05",
"publishedText": "December 2018",
"description": "Shows you how to build a serverless chatbot on AWS that monitors your chat channels and removes images containing suggestive or explicit content.<p><a href=\"\" target=\"_blank\" rel=\"noopener\">Code</a></p><p class=\"m-subheadline\">Media Services | Serverless</p>",
"docTitle": "Image Moderation Chatbot",
"sortDate": "2018-12-05",
"enableShare": "1",
"contentType": "Reference Architecture Diagram",
"primaryURL": ""
"tags": [
"id": "GLOBAL#content-type#reference-arch-diagram",
"locale": "en_US",
"tagNamespaceId": "GLOBAL#content-type",
"name": "Reference Architecture Diagram",
"description": "Reference Architecture Diagram",
"createdBy": "jenbar",
"lastUpdatedBy": "jenbar",
"dateCreated": "2020-04-29T05:19:31+0000",
"dateUpdated": "2022-02-03T03:31:09+0000"
"id": "GLOBAL#methodology#serverless",
"locale": "en_US",
"tagNamespaceId": "GLOBAL#methodology",
"name": "Serverless",
"description": "Serverless",
"createdBy": "jenbar",
"lastUpdatedBy": "jenbar",
"dateCreated": "2020-06-05T07:06:34+0000",
"dateUpdated": "2022-02-03T03:32:11+0000"
"id": "GLOBAL#tech-category#media-services",
"locale": "en_US",
"tagNamespaceId": "GLOBAL#tech-category",
"name": "Media Services",
"description": "Media Services",
"createdBy": "jarfaa",
"lastUpdatedBy": "jenbar",
"dateCreated": "2020-07-17T03:06:10+0000",
"dateUpdated": "2022-02-03T03:35:28+0000"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment