Skip to content

Instantly share code, notes, and snippets.

@BoredHackerBlog
Last active April 9, 2023 17:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BoredHackerBlog/ee35b467389b66728799e259937b2b36 to your computer and use it in GitHub Desktop.
Save BoredHackerBlog/ee35b467389b66728799e259937b2b36 to your computer and use it in GitHub Desktop.
procsearch
# ingest data from hybrid analysis
import requests
import psycopg2
HA_API = ""
ha_header = {'api-key':HA_API, 'user-agent': 'Falcon Sandbox', 'accept': 'application/json'}
POSTGRES_HOST = "localhost"
POSTGRES_DB = "procsearch"
POSTGRES_SEARCH_USER = "postgres"
POSTGRES_SEARCH_PASSWORD = ""
conn = psycopg2.connect(database=POSTGRES_DB, user=POSTGRES_SEARCH_USER, password=POSTGRES_SEARCH_PASSWORD, host=POSTGRES_HOST, port='5432', options='-c statement_timeout=15s')
cur = conn.cursor()
ha_response = requests.get("https://www.hybrid-analysis.com/api/v2/feed/latest",headers=ha_header)
if ha_response.json()['status'] == 'ok':
sample_info = ha_response.json()['data']
for sample in sample_info:
if 'processes' in sample.keys():
link = "https://www.hybrid-analysis.com" + str(sample['report_url'])
cur.execute("INSERT INTO reports(link) values (%s) RETURNING reportid;", (link,))
reportid = cur.fetchall()[0][0]
conn.commit()
for process in sample['processes']:
reportid = reportid
pid = process.get("uid")
ppid = process.get("parentuid")
proc_name = process.get("name")
proc_path = process.get("normalized_path")
proc_commandline = process.get("command_line")
cur.execute("INSERT INTO processes(reportid, pid, ppid, proc_name, proc_path, proc_commandline) values (%s, %s, %s, %s, %s, %s);", (reportid, pid,ppid, proc_nam
e, proc_path, proc_commandline,))
conn.commit()
init file:
CREATE DATABASE procsearch;
\c procsearch;
CREATE TABLE reports (reportid SERIAL PRIMARY KEY, link TEXT NOT NULL, timestamp timestamp default current_timestamp);
CREATE TABLE processes (reportid int NOT NULL, pid text, ppid text, proc_name text, proc_path text, proc_commandline text);
CREATE VIEW proc_searchable AS SELECT reports.link as report_link, reports.timestamp as timestamp, parent.proc_path as parent_proc_path, parent.proc_commandline as parent_proc_
commandline, proc.proc_path as proc_path, proc.proc_commandline as proc_commandline from processes proc LEFT OUTER join processes parent on proc.ppid = parent.pid and proc.repo
rtid=parent.reportid LEFT OUTER join reports on proc.reportid=reports.reportid;
CREATE USER searchapp WITH PASSWORD 'password';
GRANT SELECT ON proc_searchable TO searchapp;
current stats:
procsearch=# \dt+
List of relations
Schema | Name | Type | Owner | Persistence | Size | Description
--------+-----------+-------+----------+-------------+--------+-------------
public | processes | table | postgres | permanent | 228 MB |
public | reports | table | postgres | permanent | 12 MB |
(2 rows)
procsearch=# select count(*) from reports;
count
--------
108111
(1 row)
procsearch=# select count(*) from processes;
count
---------
1014474
(1 row)
# ingest HA data daily
# ingest triage/abusech data hourly
# m h dom mon dow command
0 0 * * * python3 /home/pi/ha_ingest.py
0 * * * * python3 /home/pi/triage_ingest.py
# parse input query and convert to postgres
pattern = "(?<field>parent_proc_path|parent_proc_commandline|proc_path|proc_commandline) (?<op>=|!=) %{GREEDYDATA:value}"
grok = Grok(pattern)
def parse_query(query_params):
query_split = query_params.split(" AND ")
transformed_queries = []
for subquery in query_split:
extracted = grok.match(subquery)
if extracted:
field = extracted['field']
if extracted['op'] == "=":
op = "ILIKE"
elif extracted['op'] == "!=":
op = "NOT ILIKE"
value = f"'%{extracted['value']}%'"
transformed_queries.append(f"{field} {op} {value}")
else:
return False
query_built = " AND ".join(transformed_queries)
return query_built
def query_database(query_params):
try:
conditions = parse_query(query_params)
if conditions == False:
return False
cur = conn.cursor()
print("Query Built", conditions, flush=True)
print("Query Executed", f"select * from proc_searchable where {conditions} limit 100;", flush=True)
cur.execute(f"select * from proc_searchable where {conditions} limit 100;")
results = cur.fetchall()
cur.close()
return results
except:
return False
@app.route('/search',methods=['GET'])
def search():
query_params = request.args["query_params"]
if " = " not in query_params:
return "Query didn't work. Try a different query."
if (";" in query_params) or ("%" in query_params) or ("'" in query_params) or ('"' in query_params):
return "Query didn't work. Try a different query."
else:
results = query_database(query_params)
if results == False:
return "Query didn't work. Try a different query."
else:
return render_template("search.html",results=results)
# ingest data from abusech and triage
import requests
import psycopg2
from time import sleep
MALWARE_BAAZAR_API = ""
mw_b_header = {"API-KEY":MALWARE_BAAZAR_API}
TRIAGE_API = ""
triage_header = {"Authorization": "Bearer "+TRIAGE_API}
POSTGRES_HOST = "localhost"
POSTGRES_DB = "procsearch"
POSTGRES_SEARCH_USER = "postgres"
POSTGRES_SEARCH_PASSWORD = ""
conn = psycopg2.connect(database=POSTGRES_DB, user=POSTGRES_SEARCH_USER, password=POSTGRES_SEARCH_PASSWORD, host=POSTGRES_HOST, port='5432', options='-c statement_timeout=15s')
cur = conn.cursor()
mw_baazar = requests.post("https://mb-api.abuse.ch/api/v1/", headers = mw_b_header, data={"query":"get_recent", "selector":"time"})
if mw_baazar.json()['query_status'] == 'ok':
sample_info = mw_baazar.json()['data']
for sample in sample_info:
sample_hash = sample['sha256_hash']
sleep(1)
sample_info = requests.post("https://mb-api.abuse.ch/api/v1/", headers = {"API-KEY":MALWARE_BAAZAR_API}, data={"query":"get_info", "hash":sample_hash})
if sample_info.json()['query_status'] == 'ok':
if 'Triage' in sample_info.json()['data'][0]['vendor_intel'].keys():
triage_link = sample_info.json()['data'][0]['vendor_intel']['Triage']['link']
sample_id = triage_link.split("/")[-2]
sleep(1)
sample_summary = requests.get(f"https://tria.ge/api/v0/samples/{sample_id}/summary",headers=triage_header)
if sample_summary.status_code == 200:
if len(sample_summary.json()['tasks']) > 0:
for task in sample_summary.json()['tasks']:
if 'behavioral' in task:
task_id = task.split('-')[-1]
sleep(1)
task_results = requests.get(f"https://tria.ge/api/v0/samples/{sample_id}/{task_id}/report_triage.json", headers=triage_header)
if task_results.status_code == 200:
if 'processes' in task_results.json().keys():
if len(task_results.json()['processes']) > 0:
link = "https://tria.ge/"+str(sample_id)+"/"+str(task_id)
cur.execute("INSERT INTO reports(link) values (%s) RETURNING reportid;", (link,))
reportid = cur.fetchall()[0][0]
conn.commit()
for process in task_results.json()['processes']:
reportid = reportid
pid = process.get("pid")
ppid = process.get("ppid")
proc_name = process.get("image")
proc_path = process.get("image")
proc_commandline = process.get("cmd")
cur.execute("INSERT INTO processes(reportid, pid, ppid, proc_name, proc_path, proc_commandline) values (%s, %s, %s, %s, %s, %s);
", (reportid, pid,ppid,proc_name, proc_path, proc_commandline,))
conn.commit()
@BoredHackerBlog
Copy link
Author

image

image

image

@BoredHackerBlog
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment