Skip to content

Instantly share code, notes, and snippets.

@jonathangaldino
Created February 26, 2024 12:48
Show Gist options
  • Save jonathangaldino/17c71e47e4f648fe0c97c654a46030f8 to your computer and use it in GitHub Desktop.
Save jonathangaldino/17c71e47e4f648fe0c97c654a46030f8 to your computer and use it in GitHub Desktop.
Chunk by chunk, from a web address to s3
const axios = require('axios');
const { MongoClient } = require('mongodb');
const { S3 } = require('aws-sdk');
const { PassThrough } = require('stream');
// Function to download image and upload to S3
async function downloadAndUploadImage(imageURL, s3, s3BucketName) {
try {
const response = await axios.get(imageURL, { responseType: 'stream' });
const s3Params = {
Bucket: s3BucketName,
Key: 'images/' + imageURL.substring(imageURL.lastIndexOf('/') + 1),
Body: response.data
};
await s3.upload(s3Params).promise();
console.log(`Uploaded image from ${imageURL} to S3`);
} catch (error) {
console.error(`Error downloading/uploading image from ${imageURL}: ${error.message}`);
}
}
// Function to process documents and download/upload images
async function processDocuments(docs, s3, s3BucketName) {
const promises = [];
for (const doc of docs) {
const imageURL = doc.oldImageURL;
promises.push(downloadAndUploadImage(imageURL, s3, s3BucketName));
if (promises.length >= 2) {
await Promise.all(promises);
promises.length = 0;
}
}
// Upload any remaining images
await Promise.all(promises);
}
// Main function to connect to MongoDB, fetch documents, and start processing
async function main() {
const mongoClient = new MongoClient('mongodb://localhost:27017', { useNewUrlParser: true, useUnifiedTopology: true });
const s3 = new S3(/* S3 configuration */);
const s3BucketName = 'your-s3-bucket-name';
try {
await mongoClient.connect();
const db = mongoClient.db('your-database-name');
const collection = db.collection('your-collection-name');
let cursor = collection.find(/* your query to filter documents */);
while (await cursor.hasNext()) {
const docs = await cursor.nextBatch(2); // Fetch documents in batches of 2
await processDocuments(docs, s3, s3BucketName);
}
console.log('All documents processed successfully.');
} catch (error) {
console.error('Error:', error);
} finally {
await mongoClient.close();
}
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment