var chinaAirports = db.airports.find({countryName:"China"}).map(x=>x._id) | |
var USAirports = db.airports.find({countryName:"United States"}).map(x=>x._id) | |
var mapFunction = function() { | |
var monthPtr = new Date(this.effectiveDate); | |
monthPtr.setDate(1); | |
while(monthPtr <= this.discontinuedDate) { | |
var dayPtr = new Date(monthPtr); | |
var nextMontStart = new Date(monthPtr); | |
nextMontStart.setMonth(monthPtr.getMonth() + 1); | |
var activeDays = 0; | |
// Assuming day1 is monday | |
var dayBitArray = [this.day7, this.day1, this.day2, this.day3, this.day4, this.day5, this.day6]; | |
while(dayPtr < nextMontStart){ | |
if(dayPtr >= this.effectiveDate && dayPtr <= this.discontinuedDate) { | |
if(dayBitArray[dayPtr.getDay()]){ | |
activeDays++; | |
} | |
} | |
dayPtr.setDate(dayPtr.getDate() + 1); | |
} | |
emit(monthPtr.getFullYear() + '-' + monthPtr.getMonth() + "," + this.departureAirport._id + "," + this.arrivalAirport._id, { | |
origin: this.departureAirport._id, | |
destination: this.arrivalAirport._id, | |
seats: this.totalSeats * activeDays, | |
monthStartDate: new Date(monthPtr) | |
}); | |
monthPtr = nextMontStart; | |
} | |
} | |
// combine seats with the same origin and destination | |
var reduceFunction = function(yearMonthOriginDest, seatCounts) { | |
return seatCounts.reduce(function(sofar, cur){ | |
if(sofar){ | |
sofar.seats += cur.seats; | |
return sofar; | |
} else { | |
return Object.create(cur); | |
} | |
}); | |
}; | |
db.legs.mapReduce( | |
mapFunction, | |
reduceFunction, | |
{ | |
// Ideally this would run on the full collection but the query is taking | |
// too long, so I'm limiting it to airports of interest. | |
query: { | |
"departureAirport._id": { $in: chinaAirports }, | |
"arrivalAirport._id": { $in: USAirports } | |
}, | |
out: { replace: "groupedRoutes" } | |
} | |
); | |
function run_aggs(query, airport=""){ | |
print(JSON.stringify(query)); | |
print("Seats by month"); | |
print("date, airport, seats"); | |
db.groupedRoutes.aggregate([ | |
{ | |
$match: query | |
}, { | |
$match: { | |
$and: [ | |
{"value.monthStartDate": {$gte: new Date(2016, 0, 0)}}, | |
{"value.monthStartDate": {$lte: new Date(2018, 0, 0)}} | |
] | |
} | |
}, { | |
$group: { | |
_id: { | |
$dateToString: { | |
format: "%Y-%m", | |
date: "$value.monthStartDate" | |
} | |
}, | |
seats: { $sum: "$value.seats" } | |
} | |
}, { | |
$sort: { "_id": -1 } | |
} | |
]).forEach(x=>print([x._id, airport, x.seats].join(","))); | |
print("Seats by quarter"); | |
print("date, airport, seats"); | |
db.groupedRoutes.aggregate([ | |
{ | |
$match: query | |
}, { | |
$match: { | |
$and: [ | |
{"value.monthStartDate": {$gte: new Date(2016, 0, 0)}}, | |
{"value.monthStartDate": {$lte: new Date(2018, 0, 0)}} | |
] | |
} | |
}, { | |
$project: { | |
seats: "$value.seats", | |
year: { | |
$substr: [{$year: "$value.monthStartDate" }, 0, -1] | |
}, | |
quarter: { | |
$cond: [{ | |
$lte: [{ | |
$month: "$value.monthStartDate" | |
}, 3] | |
}, | |
"Q1", { | |
$cond: [{ | |
$lte: [{ | |
$month: "$value.monthStartDate" | |
}, 6] | |
}, | |
"Q2", { | |
$cond: [{ | |
$lte: [{ | |
$month: "$value.monthStartDate" | |
}, 9] | |
}, | |
"Q3", | |
"Q4" | |
] | |
} | |
] | |
} | |
] | |
} | |
} | |
}, { | |
$group: { | |
_id: { | |
$concat: ["$year", "$quarter"] | |
}, | |
seats: { $sum: "$seats" } | |
} | |
}, { | |
$sort: { "_id": -1 } | |
} | |
]).forEach(x=>print([x._id, airport, x.seats].join(","))); | |
} | |
var chinaUSQuery = { | |
"value.origin": { $in: chinaAirports }, | |
"value.destination": { $in: USAirports } | |
}; | |
run_aggs(chinaUSQuery); | |
var topPOEs = db.groupedRoutes.aggregate([{ | |
$group: { | |
_id: "$value.destination", | |
seats: { | |
$sum: "$value.seats" | |
} | |
} | |
}, { | |
$sort: { | |
"seats": -1 | |
} | |
}, { | |
$limit: 3 | |
}]); | |
topPOEs.forEach(function(poe){ | |
run_aggs({ | |
"value.origin": { $in: chinaAirports }, | |
"value.destination": poe._id | |
}, poe._id); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment