ghstahl/Mongo_usage_collection_with_uniqueness_and_bucket_ids.md

## Mongo_usage_collection_with_uniqueness_and_bucket_ids.md

      
    Raw
  

              Mongo_usage_collection_with_uniqueness_and_bucket_ids.md
            
          
    Mongo Normal Collection Usage w/ Bucket ID

BEWARE: You can't have a uniqueness index on a timeseries collection.  If you write usage records that MUST be unique, they must be taken care of in the aggregation.
stackoverflow

mongodb timeseries issue
Which then whats the point of using a timeseries DB for normal queries if there are duplicates.
To capture all usage records, even ones that come in with a timestamp in the deep past, we will bucket data as it comes in.
A good bucket_id is an hour bucket.

i.e. 2023-11-30T15:00:00+00:00
Accounting for clock-skew (5 minutes or so) we may get data that goes into a different bucket on one server and into the different bucket on another server.  This is ok, because when we aggregate data we will only do it for buckets 2 or more hours in the past.  This will let the churn happen in the current 2 buckets.
An alternative is to have a fast cache like REDIS that stores the current bucket for all the servers.  If you trust your clocks then just computing it is more efficient.
go.migrate

Create Collection
Up

[
  {
      "create": "usage_normal"
  }
]
Down

[
  {
      "drop": "usage_normal"
  }
]
Indexes
Up

[{
  "createIndexes": "usage_normal",
  "indexes": [
    {
      "key": {
        "idempotency_key": 1
      },
      "name": "unique_idempotency_key",
      "unique": true,
      "background": true,
      "collation":{
          "locale":"en_US",
          "strength": 2
      }
    },
    {
      "key": {
        "meta.org_id" :1,
        "meta.bucket_id" : 1,
        "date" : 1
      },
      "name" : "meta.org_id_1_meta.bucket_id_1_date_1",
      "unique": false,
      "background": true,
      "collation":{
          "locale":"en_US",
          "strength": 2
      }
    }
  ]
}]
Down

[
  {
    "dropIndexes": "usage_normal",
    "index": "meta.org_id_1_meta.bucket_id_1_date_1"
  },
  {
    "dropIndexes": "usage_normal",
    "index": "idempotency_key"
  }
]
Insert Data

db.getCollection("usage_normal").insert([
{
    date: ISODate("2020-01-03T05:00:00.000Z"),
    count: 1,
    idempotency_key :"RAT",
    meta:{org_id:"ORG1234a",bucket_id:"ABCD"},
},
{
    date: ISODate("2020-01-03T05:00:00.000Z"),
    count: 5,
    idempotency_key :"abcd",
    meta:{org_id:"ORG1234a",bucket_id:"ABCD"},
},
{
    date: ISODate("2020-01-03T05:00:01.000Z"),
    count: 6,
    idempotency_key :"1234",
    meta:{org_id:"ORG1234a",bucket_id:"ABCD"},
}])


Aggregate

Find all bucket_id(s)

const PAGE_SIZE = 1;
const PAGE_NUMBER = 1;

db.getCollection("usage_normal").aggregate([
    { $group: { _id: "$meta.bucket_id" } },
    { $project: { _id: 0, bucket_id: "$_id" } },
    { $skip: (PAGE_NUMBER - 1) * PAGE_SIZE },
    { $limit: PAGE_SIZE }
])

Find all the orgs for the given bucket_id

All
db.getCollection("usage_normal").aggregate([
    { $match: { "meta.bucket_id": "ABCD" } },
    { $group: { _id: "$meta.org_id" } },
    { $project: { _id: 0, org_id: "$_id" } }
])

Pagination
const PAGE_SIZE = 1;
const PAGE_NUMBER = 2;

db.getCollection("usage_normal").aggregate([
    { $match: { "meta.bucket_id": "ABCD" } },
    { $group: { _id: "$meta.org_id" } },
    { $project: { _id: 0, org_id: "$_id" } },
    { $skip: (PAGE_NUMBER - 1) * PAGE_SIZE },
    { $limit: PAGE_SIZE }
])

Find the sum of counts for a given org_id/bucket_id pair

db.getCollection("usage").aggregate([
{
    $match: {
        "meta.org_id": "ORG1234a",
        "meta.bucket_id": "ABCD"
    }
},
{
    $group: 
    {
        _id: "$meta.org_id",
        total: {
            $sum: "$count"
        }
    }
}
])