Last active
March 21, 2020 19:26
-
-
Save taco-shellcode/3e2ae51d98a55016e796a7614f50376e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.6 | |
''' | |
Requirements: | |
sudo apt-get install python3 | |
sudo apt-get install pip3 | |
pip3 install python-twitter | |
pip3 install beautifulsoup4 | |
Compile script into binary: | |
pyinstaller --onefile osint-collector.py | |
Execution Examples: | |
./osint-collector | |
./osint-collector --debug | |
./osint-collector --output_format csv --output_file iocs.csv | |
./osint-collector --output_format json --output_file iocs.json | |
config.json example - store this in the same location as the osint.py script or use the command line args to specify it's location | |
{ | |
"twitter_access_token": "", | |
"twitter_access_token_secret": "", | |
"twitter_consumer_key": "", | |
"twitter_consumer_secret": "", | |
"whitelist": [ | |
"app.any.run", | |
"hybrid-analysis", | |
"pastebin" | |
] | |
} | |
''' | |
import os | |
import re | |
import sys | |
import bs4 | |
import json | |
import twitter | |
import requests | |
import argparse | |
class AppConfig(): | |
def __init__(self): | |
self.twitter_access_token = "" | |
self.twitter_access_token_secret = "" | |
self.twitter_consumer_key = "" | |
self.twitter_consumer_secret = "" | |
self.whitelist = "" | |
@staticmethod | |
def load_config(config_file): | |
app_config = AppConfig() | |
config = None | |
if config_file == "config.json": | |
config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.json") | |
if os.path.isfile(config_file): | |
config = app_config._read_from_disk(config_file) | |
if not app_config._load_config_json(config): | |
if debug is True: | |
print(f"\r\n[!] An error occurred while attempting to load the configuration from: {config_file}") | |
if debug is True: | |
print(app_config) | |
return app_config | |
def _read_from_disk(self, filename): | |
config_json = None | |
if os.path.isfile(filename): | |
with open(filename, "r") as input_config_file: | |
config_json = json.loads(input_config_file.read()) | |
else: | |
if debug is True: | |
print(f"\r\n[!] Config file not found: {filename}") | |
return config_json | |
def _load_config_json(self, config_json): | |
required_keys = [ | |
"twitter_access_token", | |
"twitter_access_token_secret", | |
"twitter_consumer_key", | |
"twitter_consumer_secret", | |
"whitelist" | |
] | |
for key_name in required_keys: | |
if key_name not in config_json: | |
if debug is True: | |
print(f"\r\n[!] Invalid config file, missing attribute: {key_name}") | |
return False | |
self.twitter_access_token = config_json["twitter_access_token"] | |
self.twitter_access_token_secret = config_json["twitter_access_token_secret"] | |
self.twitter_consumer_key = config_json["twitter_consumer_key"] | |
self.twitter_consumer_secret = config_json["twitter_consumer_secret"] | |
self.whitelist = config_json["whitelist"] | |
return True | |
class TweetCollector: | |
def __init__(self, config): | |
self.twitter_api = twitter.Api( | |
config.twitter_consumer_key, | |
config.twitter_consumer_secret, | |
config.twitter_access_token, | |
config.twitter_access_token_secret, | |
tweet_mode="extended" | |
) | |
if debug is True: | |
print(self.twitter_api) | |
print(config.twitter_consumer_key) | |
print(config.twitter_consumer_secret) | |
print(config.twitter_access_token) | |
print(config.twitter_access_token_secret) | |
print(self.twitter_api.VerifyCredentials()) | |
def search(self, search_string): | |
search_results = [] | |
if search_string.startswith("q="): | |
try: | |
results = self.twitter_api.GetSearch(raw_query=f"{search_string}") | |
except (requests.exceptions.SSLError) as e: | |
if debug is True: | |
print(f"\r\n[!] An error occurred while making the Twitter GetSearch api call with search string: {search_string}") | |
print(f"\r\n[!] Exception Information:\r\n{e}\r\n") | |
sys.exit(1) | |
elif search_string is not None: | |
try: | |
results = self.twitter_api.GetSearch(raw_query=f"q={search_string}") | |
except (requests.exceptions.SSLError) as e: | |
if debug is True: | |
print(f"\r\n[!] An error occurred while making the Twitter GetSearch api call with search string: {search_string}") | |
print(f"\r\n[!] Exception Information:\r\n{e}\r\n") | |
sys.exit(1) | |
for result in results: | |
search_results.append(json.loads(result.AsJsonString())) | |
return search_results | |
class TweetFilter: | |
def __init__(self): | |
self.regex = RegexMatch() | |
def contains_urls(self, search_results): | |
tweets_with_urls = [] | |
for result in search_results: | |
for url in result["urls"]: | |
if url["expanded_url"] is not None and self.regex.url(url["expanded_url"]): | |
tweets_with_urls.append(result) | |
return tweets_with_urls | |
def contains_pastebin_urls(self, search_results): | |
tweets_with_pastebin_urls = [] | |
for result in search_results: | |
for url in result["urls"]: | |
if url["expanded_url"] is not None and "pastebin.com/" in url["expanded_url"] and self.regex.url(url["expanded_url"]): | |
tweets_with_pastebin_urls.append(result) | |
return tweets_with_pastebin_urls | |
def get_original_tweet(self, search_results): | |
original_tweets = [] | |
for result in search_results: | |
if "retweeted_status" not in result.keys(): | |
original_tweets.append(result) | |
elif "retweet_count" and "retweeted_status" in result.keys(): | |
original_tweets.append(result["retweeted_status"]) | |
return original_tweets | |
def get_tweet_properties(self, search_results): | |
filtered_tweets = [] | |
for result in search_results: | |
new_tweet = { | |
"created_at": "", | |
"full_text": "", | |
"ioc": "", | |
"url": "", | |
"screen_name": "" | |
} | |
new_tweet["created_at"] = result["created_at"] | |
new_tweet["full_text"] = result["full_text"] | |
new_tweet["url"] = result["urls"][0]["expanded_url"] | |
new_tweet["screen_name"] = result["user"]["screen_name"] | |
filtered_tweets.append(new_tweet) | |
return filtered_tweets | |
class ThreatCollector: | |
def __init__(self, config): | |
self.regex = RegexMatch() | |
# TODO Add support for file extensions and hashes | |
def get_pastebin_iocs(self, pastebin_urls, recursive_search=False): | |
ioc_list = [] | |
for url in pastebin_urls: | |
if url is not None and "pastebin.com/" in url and self.regex.url(url): | |
if "pastebin.com/raw" in url: | |
url = url.replace("/raw", "") | |
try: | |
parsed_lines = bs4.BeautifulSoup(requests.get(url).text, "html.parser").find("textarea").string.splitlines() | |
except (requests.exceptions.SSLError) as e: | |
if debug is True: | |
print(f"\r\n[!] An error occurred while requesting the URL: {url}") | |
print(f"\r\n[!] Exception Information:\r\n{e}\r\n") | |
break | |
except AttributeError as e: | |
if debug is True: | |
print(f"\r\n[!] Exception Information:\r\n{e}\r\n") | |
break | |
for line in parsed_lines: | |
line_split = line.split() | |
for split in line_split: | |
if recursive_search is True and "pastebin.com/" in split.lower() and self.regex.url(split) and url != split: | |
ioc_list.extend(self.get_pastebin_iocs(split)) | |
elif recursive_search is True and "pastebin.com/" in split.lower() and url != split: | |
ioc_list.extend([self.get_pastebin_iocs(x) for x in split.split(" ") if self.regex.url(x)]) | |
elif self.regex.ipv4(split) or self.regex.url(split) and url != split: | |
ioc_list.append(split) | |
return ioc_list | |
def get_pulsedive_iocs(self, threat): | |
iocs = [] | |
if threat is not None: | |
url = self._prepare_pulsedive_url(threat) | |
if url is not None: | |
response = requests.get(url).text | |
response_json = json.loads(response) | |
iocs = self._handle_pulsedive_response(response_json) | |
return iocs | |
def _prepare_pulsedive_url(self, threat): | |
prepared_url = None | |
query = None | |
if isinstance(threat, int): | |
query = f"&get=links&tid={threat}" | |
elif isinstance(threat, str): | |
query = f"&tname={threat}" | |
if query is not None: | |
prepared_url = f"https://pulsedive.com/api/info.php?{query}&key={self.pulsedive_api_key}" | |
return prepared_url | |
def _handle_pulsedive_response(self, pulsedive_response): | |
handled_response = None | |
if pulsedive_response and "tid" in pulsedive_response.keys(): | |
tid = pulsedive_response["tid"] | |
handled_response = self.get_pulsedive_iocs(tid) | |
elif pulsedive_response and "results" in pulsedive_response.keys() and len(pulsedive_response["results"]) is not 0: | |
handled_response = pulsedive_response["results"] | |
return handled_response | |
class ThreatFilter: | |
def __init__(self, config): | |
if len(config.whitelist) is 0: | |
self.whitelist = [ | |
"app.any.run", | |
"hybrid_analysis", | |
"pastebin" | |
] | |
else: | |
self.whitelist = config.whitelist | |
return None | |
# TODO - add functions to manage whitelist | |
def filter_whitelisted_iocs(self, ioc_list): | |
filtered_iocs = [] | |
for indicator in ioc_list: | |
whitelisted_ioc = False | |
for known_good in self.whitelist: | |
if known_good in indicator: | |
whitelisted_ioc = True | |
if whitelisted_ioc is True: | |
break | |
else: | |
filtered_iocs.append(indicator) | |
filtered_iocs = set(filtered_iocs) | |
return filtered_iocs | |
def filter_pulsedive_iocs_by_type(self, pulsedive_indicators, filter_types): | |
filtered_iocs = [] | |
for indicator in pulsedive_indicators: | |
for filter_type in filter_types: | |
if "type" in indicator.keys() and indicator["type"] == filter_type: | |
filtered_iocs.append(indicator) | |
return filtered_iocs | |
def filter_pulsedive_iocs_by_risk(self, pulsedive_indicators, filter_risks): | |
filtered_iocs = [] | |
for indicator in pulsedive_indicators: | |
for filter_risk in filter_risks: | |
if "risk" in indicator.keys() and indicator["risk"] == filter_risk: | |
filtered_iocs.append(indicator) | |
return filtered_iocs | |
class RegexMatch: | |
# TODO Add support for hashes and files with extensions | |
def __init__(self): | |
#This regex will not match urls with no uri content such as www.google.com. This was by design as many security researchers include the basedomain | |
#Did not want to generate false positives with hits such as drive.google.com & drive.google.com/evilcontent appearing in pastebin data | |
#In order to get the regex to match urls with no uri content replace (\/.*|:[0-9]{1,5}) with (\/.*|:[0-9]{1,5})? | |
self.url_regex = r"\b(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.(aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|bosch|bostik|boston|bot|boutique|box|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|charity|chase|chat|cheap|chintai|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|eat|ec|eco|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|fly|fm|fo|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|golf|goo|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|grocery|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hr|hsbc|ht|hu|hughes|hyatt|hyundai|ibm|icbc|ice|icu|id|ie|ieee|ifm|ikano|il|im|imamat|imdb|immo|immobilien|in|inc|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|merckmsd|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtr|mu|museum|mutual|mv|mw|mx|my|mz|na|nab|nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|origins|osaka|otsuka|ott|ovh|pa|page|panasonic|paris|pars|partners|parts|party|passagens|pay|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|rugby|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|starhub|statebank|statefarm|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xn|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw)(\/.*|:[0-9]{1,5})\b|(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?(?:(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\.){3}(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)(\/|:)\S*" | |
self.ipv4_regex = r"\b(?:(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\.){3}(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\b" | |
def url(self, object_to_filter): | |
if re.match(self.url_regex, object_to_filter): | |
return True | |
else: | |
return False | |
def ipv4(self, object_to_filter): | |
if re.match(self.ipv4_regex, object_to_filter): | |
return True | |
else: | |
return False | |
class OutputHandler: | |
def __init__(self): | |
return None | |
# TODO - ADD LOGIC TO WRITE TO FILE | |
def to_file(self, output_file, output_format, threat_name): | |
if output_file == "output": | |
output_file = output_file + ".{0}".format(output_format) | |
output_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), output_file) | |
else: | |
output_file = output_file + output_format | |
return None | |
def to_stdout(self, object_to_output): | |
sys.stdout.write(object_to_output) | |
return None | |
if __name__ == "__main__": | |
argument_parser = argparse.ArgumentParser(description="Twitter Threat Collection Bot") | |
argument_parser.add_argument("-c", "--config_file", dest="config", default="config.json", help="Path to configuration file.") | |
argument_parser.add_argument("-o", "--output_file", dest="output_file", default="output", help="Name of file to output results to.") | |
argument_parser.add_argument("-f", "--output_format", dest="output_format", default="json", help="Format in which to write the output file.") | |
argument_parser.add_argument("-d", "--debug", dest="debug", action="store_true", help="Enable debugging mode to print troubleshooting statements.") | |
arguments = argument_parser.parse_args() | |
debug = arguments.debug | |
output_format = arguments.output_format | |
output_file = arguments.output_file | |
# Attempting to load the application configuration | |
config = AppConfig.load_config(arguments.config) | |
if debug is True: | |
print(arguments.config) | |
print(config) | |
print(output_format) | |
print(output_file) | |
if config is None: | |
sys.exit(1) | |
# Getting Twitter search results for '#emotet filter:links pastebin' | |
tweet_collector = TweetCollector(config) | |
twitter_search_results = tweet_collector.search("%23emotet%20filter%3Alinks%20pastebin%20-filter%3Aretweets%20-filter%3Areplies") | |
# Filtering Tweets for pastebin URLs | |
tweet_filter = TweetFilter() | |
pastebin_tweets = tweet_filter.contains_pastebin_urls(twitter_search_results) | |
pastebin_tweet_info = tweet_filter.get_tweet_properties(pastebin_tweets) | |
# Getting unique tweet objects by converting them to hashable data type of string | |
pastebin_tweet_strings = [] | |
for tweet in pastebin_tweet_info: | |
pastebin_tweet_strings.append(json.dumps(tweet, sort_keys=True)) | |
unique_pastebin_tweet_strings = set(pastebin_tweet_strings) | |
# Converting the unique strings back to JSON | |
unique_pastebin_tweets = [] | |
for tweet in unique_pastebin_tweet_strings: | |
unique_pastebin_tweets.append(json.loads(tweet)) | |
# Scraping the returned pastebin sites for Emotet Indicators of Compromise (IOCs) | |
threat_collector = ThreatCollector(config) | |
threat_filter = ThreatFilter(config) | |
emotet_iocs = [] | |
for tweet in unique_pastebin_tweets: | |
regex = RegexMatch() | |
tweet.update({"source_url":tweet["url"]}) | |
tweet_url = [] | |
tweet_url.append(tweet["source_url"]) | |
if "url" in tweet.keys(): | |
del tweet["url"] | |
iocs = threat_collector.get_pastebin_iocs(tweet_url, True) | |
filtered_iocs = threat_filter.filter_whitelisted_iocs(iocs) | |
for ioc in filtered_iocs: | |
new_ioc_event = tweet.copy() | |
new_ioc_event["ioc"] = ioc | |
if regex.ipv4(ioc): | |
new_ioc_event.update({"ioc_type":"ip"}) | |
if regex.url(ioc): | |
new_ioc_event.update({"ioc_type":"url"}) | |
emotet_iocs.append(new_ioc_event) | |
# Output of indicators and relevant Twitter information to standard output for Splunk | |
if output_format is "json": | |
output_handler = OutputHandler() | |
for ioc in emotet_iocs: | |
output_handler.to_stdout(json.dumps(ioc, sort_keys=True)) | |
if "csv" in output_format: | |
if not os.path.isfile(output_file): | |
output_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), output_file) | |
with open(output_file, 'w') as csvfile: | |
count = 0 | |
for ioc in emotet_iocs: | |
ioc["full_text"] = ioc["full_text"].replace("\n", " ").replace(",", " ") | |
if count == 0: | |
csvfile.write(",".join(ioc.keys())) | |
count += 1 | |
csvfile.write("\n" + ",".join(ioc.values())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment