Skip to content

Instantly share code, notes, and snippets.

@reachtarunhere
Created June 12, 2015 14:19
Show Gist options
  • Save reachtarunhere/094c5b57ef39a5745d84 to your computer and use it in GitHub Desktop.
Save reachtarunhere/094c5b57ef39a5745d84 to your computer and use it in GitHub Desktop.
Clubs together different collections and inserts to new db
import pymongo
import simplejson
from bson import ObjectId
from urlparse import urlparse
client = pymongo.MongoClient()
db = client.thug
new_db = client.test_db
# check all places to put object id
def urlid_to_url(document):
document["url"] = db.urls.find_one({"_id":ObjectId(document["url_id"])})["url"]
document.pop("url_id")
return document
def remove_analysis_id(document):
document.pop("analysis_id")
return document
def club_collections(analysis_id):
analysis = db.analyses.find_one({"_id":ObjectId(analysis_id)})
analysis["exploits"] = [remove_analysis_id(urlid_to_url(x)) for x in db.exploits.find({"analysis_id":ObjectId(analysis_id)})]
analysis["codes"] = [remove_analysis_id(x) for x in db.codes.find({"analysis_id":ObjectId(analysis_id)})]
analysis["behaviors"] = [remove_analysis_id(x) for x in db.behaviors.find({"analysis_id":ObjectId(analysis_id)})]
analysis["certificates"] = [remove_analysis_id(urlid_to_url(x)) for x in db.certificates.find({"analysis_id":ObjectId(analysis_id)})]
analysis["maec11"] = [remove_analysis_id(x) for x in db.maec11.find({"analysis_id":ObjectId(analysis_id)})]
analysis["url_map"] = [db.urls.find_one({"_id":ObjectId(analysis["url_id"])})] #for further grid_fs maps id to url
analysis = urlid_to_url(analysis)
#now cleaning connections
#using urls instead of url_ids
connections = [x for x in db.connections.find({"analysis_id":ObjectId(analysis_id)})]
for x in connections:
x.pop("analysis_id")
x["source_url"] = db.urls.find_one({"_id":ObjectId(x["source_id"])})["url"]
temp = db.urls.find_one({"_id":ObjectId(x["destination_id"])})
if temp not in url_map:
analysis["url_map"].append("temp")
x["destination_url"] = temp["url"]
x.pop("source_id")
x.pop("destination_id")
analysis["connections"] = connections
# preserving all grid_fs related
# collections as it is
analyis["locations"] = [x for x in db.locations.find({"analysis_id":ObjectId(analysis_id)})]
analyis["virustotal"] = [x for x in db.virustotal.find({"analysis_id":ObjectId(analysis_id)})]
analyis["honeyagent"] = [x for x in db.honeyagent.find({"analysis_id":ObjectId(analysis_id)})]
analyis["androguard"] = [x for x in db.androguard.find({"analysis_id":ObjectId(analysis_id)})]
analyis["peepdf"] = [x for x in db.peepdf.find({"analysis_id":ObjectId(analysis_id)})]
return analysis
# inserting into new_db
analysiscombo = db.analysiscombo
final_id = analysiscombo.insert_one(club_collections("INSERT ANALYSIS ID HERE")).inserted_id
# add final_id to django models (requires change in refrence to db in backend and its api)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment