Created
March 22, 2012 20:16
-
-
Save 9b/2163242 to your computer and use it in GitHub Desktop.
Build JSON structure representing how often a given string occurs and insert into a mongodb collection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var map = function () { | |
fhash = this.hash; | |
this.static.strings.forEach(function (z) {emit(z.name, {hash: fhash, count_sum: z.count, count: 1});}); | |
} | |
var reduce = function (key, values) { | |
var hashes = [];var total = 0;var count = 0;var stotal = 0; | |
for (var i = 0; i < values.length; i++) { | |
stotal += values[i].count; | |
total += values[i].count_sum; | |
obj = { "hash" : values[i].hash, "hash_count" : values[i].count_sum }; | |
hashes.push(obj); | |
count++; | |
}return { hashes: hashes, count_sum:total, count: stotal };} | |
db.korean_malware.mapReduce(map,reduce, {out: "string_counts" }); | |
var map = function () { | |
hash = this._id; | |
this.value.hashes.forEach( function(z) { | |
emit(z, {hashes: [hash]}); | |
}); | |
} | |
var reduce = function (key, values) { | |
var hashes = []; | |
values.forEach( | |
function(z) { | |
z.hashes.forEach( | |
function(y) { | |
hashes.push(y); | |
} | |
) | |
} | |
); | |
var newArray=new Array(); | |
label:for(var i=0; i<hashes.length;i++ ) { | |
for(var j=0; j<newArray.length;j++ ) { | |
if(newArray[j]==hashes[i]) | |
continue label; | |
} | |
newArray[newArray.length] = hashes[i]; | |
} | |
return { hashes: newArray }; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__description__ = 'Rip through Strings files, build a count tree and insert to mongo' | |
__author__ = 'Brandon Dixon' | |
__version__ = '1.0' | |
__date__ = '2012/03/22' | |
import sys | |
import os | |
import pymongo | |
import simplejson as json | |
from pymongo import Connection | |
#mongo | |
def connect_to_mongo(host, port, database, collection): | |
connection = Connection(host, port) | |
db = connection[database] | |
collection = db[collection] | |
return collection | |
con = connect_to_mongo("localhost", 27017, "exe", "korean_malware") | |
files = [] | |
dirlist = os.listdir(sys.argv[1]) | |
for fname in dirlist: | |
files.append(fname) | |
files.sort() | |
count = 0 | |
for file in files: | |
tmp = {} | |
string_dmp = [] | |
f = open(sys.argv[1] + file,"r") | |
lines = f.readlines() | |
for line in lines: | |
line = line.strip() | |
if tmp.has_key(line): | |
tmp[line] = tmp[line] + 1 | |
else: | |
tmp[line] = 1 | |
for k,v in tmp.items(): | |
hold = { 'name':k,'count':v } | |
string_dmp.append(hold) | |
hash = file.split(".") | |
obj = { 'hash' : hash[0], 'static' : { 'strings' : string_dmp } } | |
con.insert(json.loads(json.dumps(obj))) | |
print hash[0] + " strings inserted" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var map = function () { | |
fhash = this.hash; | |
this.static.strings.forEach(function (z) {emit(z.name, {hash: fhash, count_sum: z.count, count: 1});}); | |
} | |
var reduce = function (key, values) { | |
var hashes = [] | |
var total = 0; | |
var count = 0; | |
var stotal = 0; | |
for (var i = 0; i < values.length; i++) { | |
stotal += values[i].count; | |
total += values[i].count_sum; | |
hashes.push(values[i].hash); | |
count++; | |
} | |
return { hashes: hashes, count_sum:total, count: stotal }; | |
} | |
db.korean_malware.mapReduce(map,reduce, {out: "string_counts" }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment