Skip to content

Instantly share code, notes, and snippets.

@nathanathan
Last active May 5, 2017 17:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nathanathan/678e8515dcc1f8b8982e5935f9c7f471 to your computer and use it in GitHub Desktop.
Save nathanathan/678e8515dcc1f8b8982e5935f9c7f471 to your computer and use it in GitHub Desktop.
var chinaAirports = db.airports.find({countryName:"China"}).map(x=>x._id)
var USAirports = db.airports.find({countryName:"United States"}).map(x=>x._id)
var mapFunction = function() {
var monthPtr = new Date(this.effectiveDate);
monthPtr.setDate(1);
while(monthPtr <= this.discontinuedDate) {
var dayPtr = new Date(monthPtr);
var nextMontStart = new Date(monthPtr);
nextMontStart.setMonth(monthPtr.getMonth() + 1);
var activeDays = 0;
// Assuming day1 is monday
var dayBitArray = [this.day7, this.day1, this.day2, this.day3, this.day4, this.day5, this.day6];
while(dayPtr < nextMontStart){
if(dayPtr >= this.effectiveDate && dayPtr <= this.discontinuedDate) {
if(dayBitArray[dayPtr.getDay()]){
activeDays++;
}
}
dayPtr.setDate(dayPtr.getDate() + 1);
}
emit(monthPtr.getFullYear() + '-' + monthPtr.getMonth() + "," + this.departureAirport._id + "," + this.arrivalAirport._id, {
origin: this.departureAirport._id,
destination: this.arrivalAirport._id,
seats: this.totalSeats * activeDays,
monthStartDate: new Date(monthPtr)
});
monthPtr = nextMontStart;
}
}
// combine seats with the same origin and destination
var reduceFunction = function(yearMonthOriginDest, seatCounts) {
return seatCounts.reduce(function(sofar, cur){
if(sofar){
sofar.seats += cur.seats;
return sofar;
} else {
return Object.create(cur);
}
});
};
db.legs.mapReduce(
mapFunction,
reduceFunction,
{
// Ideally this would run on the full collection but the query is taking
// too long, so I'm limiting it to airports of interest.
query: {
"departureAirport._id": { $in: chinaAirports },
"arrivalAirport._id": { $in: USAirports }
},
out: { replace: "groupedRoutes" }
}
);
function run_aggs(query, airport=""){
print(JSON.stringify(query));
print("Seats by month");
print("date, airport, seats");
db.groupedRoutes.aggregate([
{
$match: query
}, {
$match: {
$and: [
{"value.monthStartDate": {$gte: new Date(2016, 0, 0)}},
{"value.monthStartDate": {$lte: new Date(2018, 0, 0)}}
]
}
}, {
$group: {
_id: {
$dateToString: {
format: "%Y-%m",
date: "$value.monthStartDate"
}
},
seats: { $sum: "$value.seats" }
}
}, {
$sort: { "_id": -1 }
}
]).forEach(x=>print([x._id, airport, x.seats].join(",")));
print("Seats by quarter");
print("date, airport, seats");
db.groupedRoutes.aggregate([
{
$match: query
}, {
$match: {
$and: [
{"value.monthStartDate": {$gte: new Date(2016, 0, 0)}},
{"value.monthStartDate": {$lte: new Date(2018, 0, 0)}}
]
}
}, {
$project: {
seats: "$value.seats",
year: {
$substr: [{$year: "$value.monthStartDate" }, 0, -1]
},
quarter: {
$cond: [{
$lte: [{
$month: "$value.monthStartDate"
}, 3]
},
"Q1", {
$cond: [{
$lte: [{
$month: "$value.monthStartDate"
}, 6]
},
"Q2", {
$cond: [{
$lte: [{
$month: "$value.monthStartDate"
}, 9]
},
"Q3",
"Q4"
]
}
]
}
]
}
}
}, {
$group: {
_id: {
$concat: ["$year", "$quarter"]
},
seats: { $sum: "$seats" }
}
}, {
$sort: { "_id": -1 }
}
]).forEach(x=>print([x._id, airport, x.seats].join(",")));
}
var chinaUSQuery = {
"value.origin": { $in: chinaAirports },
"value.destination": { $in: USAirports }
};
run_aggs(chinaUSQuery);
var topPOEs = db.groupedRoutes.aggregate([{
$group: {
_id: "$value.destination",
seats: {
$sum: "$value.seats"
}
}
}, {
$sort: {
"seats": -1
}
}, {
$limit: 3
}]);
topPOEs.forEach(function(poe){
run_aggs({
"value.origin": { $in: chinaAirports },
"value.destination": poe._id
}, poe._id);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment