Skip to content

Instantly share code, notes, and snippets.

@v-stickykeys
Created April 8, 2022 15:26
Show Gist options
  • Save v-stickykeys/6538ae4c7a4e32adba16688e9aef0ccd to your computer and use it in GitHub Desktop.
Save v-stickykeys/6538ae4c7a4e32adba16688e9aef0ccd to your computer and use it in GitHub Desktop.
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
// Migration in bucket
// SOURCE_BUCKET=... node s3-shard-migration.js
// Migration from one source bucket to a target bucket
// SOURCE_BUCKET=... TARGET_BUCKET=... node s3-shard-migration.js
// TODO consider retry/backoff/connect config again
const SOURCE_BUCKET = process.env.SOURCE_BUCKET
const TARGET_BUCKET = process.env.TARGET_BUCKET || SOURCE_BUCKET
const SKIP_IF_EXIST = true
const KEYS_PER_ROUND = 1000
// next to last sharding (len 2)
const mapKey = (key) => {
const offset = key.length - 3
const shard = key.slice(offset, offset + 2)
const id = key.split('/').pop()
return `ipfs/blocks/${shard}/${id}`
}
// copy file from source bucket key to target bucket newKey
const copy = async (key, newKey) => {
const copyParams = {
Bucket: TARGET_BUCKET,
CopySource: `${SOURCE_BUCKET}/${key}`,
Key: newKey,
MetadataDirective: 'COPY',
ACL: 'authenticated-read'
}
return new Promise((resolve, reject) => {
s3.copyObject(copyParams, function(err, data) {
if (err) {
console.log(err, err.stack);
console.log(`ERROR: ${SOURCE_BUCKET}/${key} -> ${TARGET_BUCKET}/${newKey}`)
} else {
console.log(`COPY: ${SOURCE_BUCKET}/${key} -> ${TARGET_BUCKET}/${newKey}`)
resolve(data)
}
})
})
}
// determine if exist in target bucket already
const exist = async (key) => {
const headParams = {
Bucket: TARGET_BUCKET,
Key: key
}
return new Promise((resolve, reject) => {
s3.headObject(headParams, function(err, data) {
if (err) resolve(false)
resolve(true)
})
})
}
// migrate array of keys
const migrate = async (keys) => Promise.all(
keys.map(async key => {
// if folder then skip (everything looks like objects)
if (key.slice(-1) === '/') return
const newKey = mapKey(key)
console.log(`ATTEMPT: ${key} -> ${newKey}`)
if (SKIP_IF_EXIST && await exist(newKey)) {
console.log(`EXIST: ${SOURCE_BUCKET}/${key} -> ${TARGET_BUCKET}/${newKey}`)
return
}
return copy(key, newKey)
})
)
const filterKeys = (key) => {
if (key.includes('SHARDING')) return false
if (key.includes('README')) return false
const keyParts = key.split('/')
if (keyParts.length === 3) {
return true
}
return false
}
// get list of existing keys in source bucket
const getKeys = async (continuationToken) => {
const params = {
Bucket: SOURCE_BUCKET,
MaxKeys: KEYS_PER_ROUND,
Prefix: 'ipfs/blocks/'
}
if (continuationToken) params.ContinuationToken = continuationToken
const listPromise = new Promise((resolve, reject) => {
s3.listObjectsV2(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else resolve(data);
})
})
const list = await listPromise
const res = {}
res.keys = list.Contents.map(val => val.Key).filter(key => filterKeys(key))
if (list.NextContinuationToken) res.ContinuationToken = list.NextContinuationToken
return res
}
// get keys and migrate them
// max 1k keys return per cycle, continuationToken for next available keys, loop while more keys available
const run = async () => {
let res = await getKeys()
let continuationToken = res.ContinuationToken
await migrate(res.keys)
while (continuationToken) {
res = await getKeys(continuationToken)
continuationToken = res.ContinuationToken
await migrate(res.keys)
await new Promise(resolve => setTimeout(resolve, 1000));
}
console.log('Migration Complete')
}
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment