Skip to content

Instantly share code, notes, and snippets.

@Stanback
Created October 15, 2014 18:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Stanback/574e6380e5a3722afd87 to your computer and use it in GitHub Desktop.
Save Stanback/574e6380e5a3722afd87 to your computer and use it in GitHub Desktop.
MongoDB Curator
//
// This script is useful for managing time series or log data
// that is stored in MongoDB.
//
// I'm currently using logstash to store system logs in MongoDB, where
// each database name denotes type of log and each collection name
// reflects the date on which the log entries it contains occurred.
//
// This script expects database collections to be in YYYY.MM.DD format.
//
// To run from the command line, you can put the following in a shell script:
// mongo --quiet --nodb --eval "var database='$1', timeout='$2';" mongo_curator.js
//
// And invoke it like:
// ./mongo_curator.sh nginx-access-logs 30
//
var database = database || null;
var timeout = timeout || null;
var host = host || "localhost:27017"
var uri = host + '/' + database;
var date = new Date();
var cutoff = date.setDate(date.getDate() - timeout);
if (!database) {
print("Please specify the database to clean out logs from");
quit();
}
if (!timeout) {
print("Please specify the max time, in days, that logs should be kept for");
quit();
}
print("Removing logs from " + database + " that are older than " + timeout + " days");
var db = connect(uri);
if (!db) {
print("Unable to connect to the database: " + uri);
quit();
}
var collections = db.getCollectionNames();
for (var i = 0; i < collections.length; i++) {
var collection = collections[i];
var collDate = new Date(collection.replace('.', '/'));
if (!isNaN(collDate) && collDate < cutoff) {
print("Dropping: " + database + "/" + collection);
db[collection].drop();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment