BEWARE: You can't have a uniqueness index on a timeseries collection. If you write usage records that MUST be unique, they must be taken care of in the aggregation.
stackoverflow
mongodb timeseries issue
Which then whats the point of using a timeseries DB for normal queries if there are duplicates.
To capture all usage records, even ones that come in with a timestamp in the deep past, we will bucket data as it comes in.
A good bucket_id is an hour bucket.
i.e. 2023-11-30T15:00:00+00:00
Accounting for clock-skew (5 minutes or so) we may get data that goes into a different bucket on one server and into the different bucket on another server. This is ok, because when we aggregate data we will only do it for buckets 2 or more hours in the past. This will let the churn happen in the current 2 buckets.
An alternative is to have a fast cache like REDIS that stores the current bucket for all the servers. If you trust your clocks then just computing it is more efficient.
Create Collection
[
{
"create": "usage_normal"
}
]
[
{
"drop": "usage_normal"
}
]
Indexes
[{
"createIndexes": "usage_normal",
"indexes": [
{
"key": {
"idempotency_key": 1
},
"name": "unique_idempotency_key",
"unique": true,
"background": true,
"collation":{
"locale":"en_US",
"strength": 2
}
},
{
"key": {
"meta.org_id" :1,
"meta.bucket_id" : 1,
"date" : 1
},
"name" : "meta.org_id_1_meta.bucket_id_1_date_1",
"unique": false,
"background": true,
"collation":{
"locale":"en_US",
"strength": 2
}
}
]
}]
[
{
"dropIndexes": "usage_normal",
"index": "meta.org_id_1_meta.bucket_id_1_date_1"
},
{
"dropIndexes": "usage_normal",
"index": "idempotency_key"
}
]
db.getCollection("usage_normal").insert([
{
date: ISODate("2020-01-03T05:00:00.000Z"),
count: 1,
idempotency_key :"RAT",
meta:{org_id:"ORG1234a",bucket_id:"ABCD"},
},
{
date: ISODate("2020-01-03T05:00:00.000Z"),
count: 5,
idempotency_key :"abcd",
meta:{org_id:"ORG1234a",bucket_id:"ABCD"},
},
{
date: ISODate("2020-01-03T05:00:01.000Z"),
count: 6,
idempotency_key :"1234",
meta:{org_id:"ORG1234a",bucket_id:"ABCD"},
}])
const PAGE_SIZE = 1;
const PAGE_NUMBER = 1;
db.getCollection("usage_normal").aggregate([
{ $group: { _id: "$meta.bucket_id" } },
{ $project: { _id: 0, bucket_id: "$_id" } },
{ $skip: (PAGE_NUMBER - 1) * PAGE_SIZE },
{ $limit: PAGE_SIZE }
])
All
db.getCollection("usage_normal").aggregate([
{ $match: { "meta.bucket_id": "ABCD" } },
{ $group: { _id: "$meta.org_id" } },
{ $project: { _id: 0, org_id: "$_id" } }
])
Pagination
const PAGE_SIZE = 1;
const PAGE_NUMBER = 2;
db.getCollection("usage_normal").aggregate([
{ $match: { "meta.bucket_id": "ABCD" } },
{ $group: { _id: "$meta.org_id" } },
{ $project: { _id: 0, org_id: "$_id" } },
{ $skip: (PAGE_NUMBER - 1) * PAGE_SIZE },
{ $limit: PAGE_SIZE }
])
db.getCollection("usage").aggregate([
{
$match: {
"meta.org_id": "ORG1234a",
"meta.bucket_id": "ABCD"
}
},
{
$group:
{
_id: "$meta.org_id",
total: {
$sum: "$count"
}
}
}
])