Skip to content

Instantly share code, notes, and snippets.

@9b
Created March 22, 2012 20:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 9b/2163242 to your computer and use it in GitHub Desktop.
Save 9b/2163242 to your computer and use it in GitHub Desktop.
Build JSON structure representing how often a given string occurs and insert into a mongodb collection
var map = function () {
fhash = this.hash;
this.static.strings.forEach(function (z) {emit(z.name, {hash: fhash, count_sum: z.count, count: 1});});
}
var reduce = function (key, values) {
var hashes = [];var total = 0;var count = 0;var stotal = 0;
for (var i = 0; i < values.length; i++) {
stotal += values[i].count;
total += values[i].count_sum;
obj = { "hash" : values[i].hash, "hash_count" : values[i].count_sum };
hashes.push(obj);
count++;
}return { hashes: hashes, count_sum:total, count: stotal };}
db.korean_malware.mapReduce(map,reduce, {out: "string_counts" });
var map = function () {
hash = this._id;
this.value.hashes.forEach( function(z) {
emit(z, {hashes: [hash]});
});
}
var reduce = function (key, values) {
var hashes = [];
values.forEach(
function(z) {
z.hashes.forEach(
function(y) {
hashes.push(y);
}
)
}
);
var newArray=new Array();
label:for(var i=0; i<hashes.length;i++ ) {
for(var j=0; j<newArray.length;j++ ) {
if(newArray[j]==hashes[i])
continue label;
}
newArray[newArray.length] = hashes[i];
}
return { hashes: newArray };
}
__description__ = 'Rip through Strings files, build a count tree and insert to mongo'
__author__ = 'Brandon Dixon'
__version__ = '1.0'
__date__ = '2012/03/22'
import sys
import os
import pymongo
import simplejson as json
from pymongo import Connection
#mongo
def connect_to_mongo(host, port, database, collection):
connection = Connection(host, port)
db = connection[database]
collection = db[collection]
return collection
con = connect_to_mongo("localhost", 27017, "exe", "korean_malware")
files = []
dirlist = os.listdir(sys.argv[1])
for fname in dirlist:
files.append(fname)
files.sort()
count = 0
for file in files:
tmp = {}
string_dmp = []
f = open(sys.argv[1] + file,"r")
lines = f.readlines()
for line in lines:
line = line.strip()
if tmp.has_key(line):
tmp[line] = tmp[line] + 1
else:
tmp[line] = 1
for k,v in tmp.items():
hold = { 'name':k,'count':v }
string_dmp.append(hold)
hash = file.split(".")
obj = { 'hash' : hash[0], 'static' : { 'strings' : string_dmp } }
con.insert(json.loads(json.dumps(obj)))
print hash[0] + " strings inserted"
var map = function () {
fhash = this.hash;
this.static.strings.forEach(function (z) {emit(z.name, {hash: fhash, count_sum: z.count, count: 1});});
}
var reduce = function (key, values) {
var hashes = []
var total = 0;
var count = 0;
var stotal = 0;
for (var i = 0; i < values.length; i++) {
stotal += values[i].count;
total += values[i].count_sum;
hashes.push(values[i].hash);
count++;
}
return { hashes: hashes, count_sum:total, count: stotal };
}
db.korean_malware.mapReduce(map,reduce, {out: "string_counts" });
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment