Skip to content

Instantly share code, notes, and snippets.

@kofrasa
Last active September 14, 2017 13:38
Show Gist options
  • Save kofrasa/07640aa6d9ec39824280132977f55f95 to your computer and use it in GitHub Desktop.
Save kofrasa/07640aa6d9ec39824280132977f55f95 to your computer and use it in GitHub Desktop.
function generateTestData(n) {
  var alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=`[]\\;',./{}|:\"<>?"
  var len = alphabet.length

  var data = []

  function randString(size) {
    let s = []
    for (var j = 0; j < size; j++) {
      s.push(alphabet[Math.floor((Math.random() * len))])
    }
    return s.join('')
  }

  for (var i = 0; i < n; i++) {
    var p = {
      "_id": randString(20),
      "firstName": randString(20),
      "lastName": randString(20),
      "username": randString(20),
      "title": randString(20),
      "jobs": 6,
      "date": {
        "year": 2013,
        "month": 9,
        "day": 25
      },
      "languages": {
        "spoken": ["english", "french", "spanish"],
        "programming": ["C", "Erlang", "Scala", "Java", "Javascript", randString(20), "C#"]
      },
      "circles": {
        "school": ["Kobby", "Henry", "Kanba", "Nana", "Albert", "Yayra", "Linda", "Sophia", randString(20), "Eben"],
        "work": ["Kobby", "KT", "Evans", "Robert", "Ehi", "Ebo", "KO"],
        "family": ["Richard", randString(20), "Michael", "Rachel"]
      },
      "projects": {
        "C": ["word_grid", "student_record", "calendar"],
        "Java": ["Easy Programming Language", "SurveyMobile"],
        "Python": ["Kasade", "Code Jam", randString(20), "FlaskUtils"],
        "Scala": [randString(20)],
        "Javascript": [randString(20), randString(20)]
      },
      "grades": [
        {"grade": 92, "mean": 88, "std": 8},
        {"grade": 78, "mean": 90, "std": 5},
        {"grade": 88, "mean": 85, "std": 3}
      ],
      "today": "1970-01-01"
    }

    data.push(JSON.stringify(p))
  }
  return data
}

function testHash(name, fn, data, stats) {
  var result = []
  var start = new Date()
  for (let i = 0, len = data.length; i < len; i++) {
    result.push(fn(data[i]))
  }
  var end = new Date()
  var c = 0
  var h = result.reduce(function (h, val) {
    if (!h[val]) {
      h[val] = 1
    } else {
      c++
    }
    return h
  }, {})

  if (!stats[name]) {
    stats[name] = {time:[],collisions:0}
  }

  stats[name].time.push(end.getTime() - start.getTime())
  stats[name].collisions = c
}


function sdbm(s) {
  var i = s.length
  let hash = 0
  while (i) hash = s.charCodeAt(--i) + (hash << 6) + (hash << 16) - hash
  return hash >>> 0
}

function hashCode(s) {
  let hash = 0
  let i = s.length
  while (i) hash = ((hash << 5) - hash) + s.charCodeAt(--i)
  return hash >>> 0
}

function djb2(s) {
  let i = s.length
  let hash = 0
  while (i) hash = (hash * 33) ^ s.charCodeAt(--i)
  return hash >>> 0
}

function murmur3(s) {
  return murmurhash3_32_gc(s, 5381)
}

//https://rawgithub.com/garycourt/murmurhash-js/master/murmurhash3_gc.js
function murmurhash3_32_gc(key, seed) {
	var remainder, bytes, h1, h1b, c1, c1b, c2, c2b, k1, i;

	remainder = key.length & 3; // key.length % 4
	bytes = key.length - remainder;
	h1 = seed;
	c1 = 0xcc9e2d51;
	c2 = 0x1b873593;
	i = 0;

	while (i < bytes) {
	  	k1 =
	  	  ((key.charCodeAt(i) & 0xff)) |
	  	  ((key.charCodeAt(++i) & 0xff) << 8) |
	  	  ((key.charCodeAt(++i) & 0xff) << 16) |
	  	  ((key.charCodeAt(++i) & 0xff) << 24);
		++i;

		k1 = ((((k1 & 0xffff) * c1) + ((((k1 >>> 16) * c1) & 0xffff) << 16))) & 0xffffffff;
		k1 = (k1 << 15) | (k1 >>> 17);
		k1 = ((((k1 & 0xffff) * c2) + ((((k1 >>> 16) * c2) & 0xffff) << 16))) & 0xffffffff;

		h1 ^= k1;
        h1 = (h1 << 13) | (h1 >>> 19);
		h1b = ((((h1 & 0xffff) * 5) + ((((h1 >>> 16) * 5) & 0xffff) << 16))) & 0xffffffff;
		h1 = (((h1b & 0xffff) + 0x6b64) + ((((h1b >>> 16) + 0xe654) & 0xffff) << 16));
	}

	k1 = 0;

	switch (remainder) {
		case 3: k1 ^= (key.charCodeAt(i + 2) & 0xff) << 16;
		case 2: k1 ^= (key.charCodeAt(i + 1) & 0xff) << 8;
		case 1: k1 ^= (key.charCodeAt(i) & 0xff);

		k1 = (((k1 & 0xffff) * c1) + ((((k1 >>> 16) * c1) & 0xffff) << 16)) & 0xffffffff;
		k1 = (k1 << 15) | (k1 >>> 17);
		k1 = (((k1 & 0xffff) * c2) + ((((k1 >>> 16) * c2) & 0xffff) << 16)) & 0xffffffff;
		h1 ^= k1;
	}

	h1 ^= key.length;

	h1 ^= h1 >>> 16;
	h1 = (((h1 & 0xffff) * 0x85ebca6b) + ((((h1 >>> 16) * 0x85ebca6b) & 0xffff) << 16)) & 0xffffffff;
	h1 ^= h1 >>> 13;
	h1 = ((((h1 & 0xffff) * 0xc2b2ae35) + ((((h1 >>> 16) * 0xc2b2ae35) & 0xffff) << 16))) & 0xffffffff;
	h1 ^= h1 >>> 16;

	return h1 >>> 0;
}

function printStats(stats) {
  for (name in stats) {
    if (stats.hasOwnProperty(name)) {
      // compute average disregarding first result
      let d = stats[name]
      d.time = d.time.slice(1)
      d.avg = d.time.reduce(function(acc,n) { return acc+n; }, 0) / d.time.length
      console.log(name + ":\n" + JSON.stringify(d))
      console.log()
    }
  }
}

function runTest(data, stats) {
  testHash('hashcode', hashCode, data, stats)
  testHash('djb2', djb2, data, stats)
  testHash('murmurhash3', murmur3, data, stats)
  testHash('sdbm', sdbm, data, stats)
}

function main() {
  let data = generateTestData(Math.pow(10,6))
  console.log('data size: ' + data.length)
  console.log('====================')

  let i = 5
  let stats = {}
  while (i--) runTest(data, stats)
  printStats(stats)
}

// run benchmark
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment