Skip to content

Instantly share code, notes, and snippets.

@eduardinni
Created January 10, 2020 00:58
Show Gist options
  • Save eduardinni/c0742a896fbfc34c7393d02c43ea351a to your computer and use it in GitHub Desktop.
Save eduardinni/c0742a896fbfc34c7393d02c43ea351a to your computer and use it in GitHub Desktop.
MapReduce function

Calculate most used hashtags (JavaScript)

AWS Lambda function to calculate most used hashtags from an Instagram account. The Instagram posts are stored in a DynamoDB table.

var doc = require('dynamodb-doc');
var dynamo = new doc.DynamoDB();
var mapreduce = require('mapred')(1);

// POST /igCalcPopularTags
// params:
// +ig_user_id:   instagram user id
// +user_id:      app devise user id
exports.igCalcPopularTags = function(event, context) {
  if(event.ig_user_id !== null) {
    var ig_user_id = parseInt(event.ig_user_id);

    // DynamoDB params
    var dynParamsQ1 = {};
    dynParamsQ1.TableName = "IGMedia";
    dynParamsQ1.IndexName = "IndexUserYear";
    dynParamsQ1.KeyConditions = [dynamo.Condition("ig_user_id", "EQ", ig_user_id)];

    dynamo.query(dynParamsQ1, function(err, data) {
      if(err) {
        console.log(err, err.stack);
        context.fail('DynamoDB query failed');
      }
      else {
        var tagsData = [];
        for (var i = 0, len = data.Count; i < len; i++) {
          if(data.Items[i].tags != undefined) {
            tagsData.push([data.Items[i].media_id, data.Items[i].tags]);
          }
        }

        mapreduce(tagsData,
          // map
          function(key, value) {
            var list = [], aux = {};
            value = value.split(',');
            value.forEach(function(w){
              aux[w] = (aux[w] || 0) + 1;
            });
            for(var k in aux){
              list.push([k, aux[k]]);
            }
            return list;
          },
          // reduce
          function(key, values) {
            var sum = 0;
            values.forEach(function(e){
              sum += e;
            });
            return sum;
          },
          function(MRresult) {
            // Sort results DESC
            var tagsSortable = Object.keys(MRresult).sort(function(a, b) { return -(MRresult[a] - MRresult[b]) });
            // Select only top 5
            tagsSortable = tagsSortable.slice(0,5);

            // DynamoDB params
            var dynParams = {};
            dynParams.TableName = "IGStatsGlobal";
            dynParams.Key = {'ig_user_id': ig_user_id};
            dynParams.UpdateExpression = "set popular_tags = :tags";
            dynParams.ExpressionAttributeValues = {":tags" : tagsSortable};

            // Save to DynamoDB
            dynamo.updateItem(dynParams, function(err, data) {
              if(err) {
                console.log(err, err.stack);
              } else {
                context.succeed({
                  result:        "ok",
                  user_id:       event.user_id,
                  popular_tags:  tagsSortable
                });
              }
            });
          }
        );
      }
    });
  }
  else {
    context.fail('No ig_user_id provided');
  }
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment