Skip to content

Instantly share code, notes, and snippets.

@taco-shellcode
Last active March 21, 2020 19:26
Show Gist options
  • Save taco-shellcode/3e2ae51d98a55016e796a7614f50376e to your computer and use it in GitHub Desktop.
Save taco-shellcode/3e2ae51d98a55016e796a7614f50376e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3.6
'''
Requirements:
sudo apt-get install python3
sudo apt-get install pip3
pip3 install python-twitter
pip3 install beautifulsoup4
Compile script into binary:
pyinstaller --onefile osint-collector.py
Execution Examples:
./osint-collector
./osint-collector --debug
./osint-collector --output_format csv --output_file iocs.csv
./osint-collector --output_format json --output_file iocs.json
config.json example - store this in the same location as the osint.py script or use the command line args to specify it's location
{
"twitter_access_token": "",
"twitter_access_token_secret": "",
"twitter_consumer_key": "",
"twitter_consumer_secret": "",
"whitelist": [
"app.any.run",
"hybrid-analysis",
"pastebin"
]
}
'''
import os
import re
import sys
import bs4
import json
import twitter
import requests
import argparse
class AppConfig():
def __init__(self):
self.twitter_access_token = ""
self.twitter_access_token_secret = ""
self.twitter_consumer_key = ""
self.twitter_consumer_secret = ""
self.whitelist = ""
@staticmethod
def load_config(config_file):
app_config = AppConfig()
config = None
if config_file == "config.json":
config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.json")
if os.path.isfile(config_file):
config = app_config._read_from_disk(config_file)
if not app_config._load_config_json(config):
if debug is True:
print(f"\r\n[!] An error occurred while attempting to load the configuration from: {config_file}")
if debug is True:
print(app_config)
return app_config
def _read_from_disk(self, filename):
config_json = None
if os.path.isfile(filename):
with open(filename, "r") as input_config_file:
config_json = json.loads(input_config_file.read())
else:
if debug is True:
print(f"\r\n[!] Config file not found: {filename}")
return config_json
def _load_config_json(self, config_json):
required_keys = [
"twitter_access_token",
"twitter_access_token_secret",
"twitter_consumer_key",
"twitter_consumer_secret",
"whitelist"
]
for key_name in required_keys:
if key_name not in config_json:
if debug is True:
print(f"\r\n[!] Invalid config file, missing attribute: {key_name}")
return False
self.twitter_access_token = config_json["twitter_access_token"]
self.twitter_access_token_secret = config_json["twitter_access_token_secret"]
self.twitter_consumer_key = config_json["twitter_consumer_key"]
self.twitter_consumer_secret = config_json["twitter_consumer_secret"]
self.whitelist = config_json["whitelist"]
return True
class TweetCollector:
def __init__(self, config):
self.twitter_api = twitter.Api(
config.twitter_consumer_key,
config.twitter_consumer_secret,
config.twitter_access_token,
config.twitter_access_token_secret,
tweet_mode="extended"
)
if debug is True:
print(self.twitter_api)
print(config.twitter_consumer_key)
print(config.twitter_consumer_secret)
print(config.twitter_access_token)
print(config.twitter_access_token_secret)
print(self.twitter_api.VerifyCredentials())
def search(self, search_string):
search_results = []
if search_string.startswith("q="):
try:
results = self.twitter_api.GetSearch(raw_query=f"{search_string}")
except (requests.exceptions.SSLError) as e:
if debug is True:
print(f"\r\n[!] An error occurred while making the Twitter GetSearch api call with search string: {search_string}")
print(f"\r\n[!] Exception Information:\r\n{e}\r\n")
sys.exit(1)
elif search_string is not None:
try:
results = self.twitter_api.GetSearch(raw_query=f"q={search_string}")
except (requests.exceptions.SSLError) as e:
if debug is True:
print(f"\r\n[!] An error occurred while making the Twitter GetSearch api call with search string: {search_string}")
print(f"\r\n[!] Exception Information:\r\n{e}\r\n")
sys.exit(1)
for result in results:
search_results.append(json.loads(result.AsJsonString()))
return search_results
class TweetFilter:
def __init__(self):
self.regex = RegexMatch()
def contains_urls(self, search_results):
tweets_with_urls = []
for result in search_results:
for url in result["urls"]:
if url["expanded_url"] is not None and self.regex.url(url["expanded_url"]):
tweets_with_urls.append(result)
return tweets_with_urls
def contains_pastebin_urls(self, search_results):
tweets_with_pastebin_urls = []
for result in search_results:
for url in result["urls"]:
if url["expanded_url"] is not None and "pastebin.com/" in url["expanded_url"] and self.regex.url(url["expanded_url"]):
tweets_with_pastebin_urls.append(result)
return tweets_with_pastebin_urls
def get_original_tweet(self, search_results):
original_tweets = []
for result in search_results:
if "retweeted_status" not in result.keys():
original_tweets.append(result)
elif "retweet_count" and "retweeted_status" in result.keys():
original_tweets.append(result["retweeted_status"])
return original_tweets
def get_tweet_properties(self, search_results):
filtered_tweets = []
for result in search_results:
new_tweet = {
"created_at": "",
"full_text": "",
"ioc": "",
"url": "",
"screen_name": ""
}
new_tweet["created_at"] = result["created_at"]
new_tweet["full_text"] = result["full_text"]
new_tweet["url"] = result["urls"][0]["expanded_url"]
new_tweet["screen_name"] = result["user"]["screen_name"]
filtered_tweets.append(new_tweet)
return filtered_tweets
class ThreatCollector:
def __init__(self, config):
self.regex = RegexMatch()
# TODO Add support for file extensions and hashes
def get_pastebin_iocs(self, pastebin_urls, recursive_search=False):
ioc_list = []
for url in pastebin_urls:
if url is not None and "pastebin.com/" in url and self.regex.url(url):
if "pastebin.com/raw" in url:
url = url.replace("/raw", "")
try:
parsed_lines = bs4.BeautifulSoup(requests.get(url).text, "html.parser").find("textarea").string.splitlines()
except (requests.exceptions.SSLError) as e:
if debug is True:
print(f"\r\n[!] An error occurred while requesting the URL: {url}")
print(f"\r\n[!] Exception Information:\r\n{e}\r\n")
break
except AttributeError as e:
if debug is True:
print(f"\r\n[!] Exception Information:\r\n{e}\r\n")
break
for line in parsed_lines:
line_split = line.split()
for split in line_split:
if recursive_search is True and "pastebin.com/" in split.lower() and self.regex.url(split) and url != split:
ioc_list.extend(self.get_pastebin_iocs(split))
elif recursive_search is True and "pastebin.com/" in split.lower() and url != split:
ioc_list.extend([self.get_pastebin_iocs(x) for x in split.split(" ") if self.regex.url(x)])
elif self.regex.ipv4(split) or self.regex.url(split) and url != split:
ioc_list.append(split)
return ioc_list
def get_pulsedive_iocs(self, threat):
iocs = []
if threat is not None:
url = self._prepare_pulsedive_url(threat)
if url is not None:
response = requests.get(url).text
response_json = json.loads(response)
iocs = self._handle_pulsedive_response(response_json)
return iocs
def _prepare_pulsedive_url(self, threat):
prepared_url = None
query = None
if isinstance(threat, int):
query = f"&get=links&tid={threat}"
elif isinstance(threat, str):
query = f"&tname={threat}"
if query is not None:
prepared_url = f"https://pulsedive.com/api/info.php?{query}&key={self.pulsedive_api_key}"
return prepared_url
def _handle_pulsedive_response(self, pulsedive_response):
handled_response = None
if pulsedive_response and "tid" in pulsedive_response.keys():
tid = pulsedive_response["tid"]
handled_response = self.get_pulsedive_iocs(tid)
elif pulsedive_response and "results" in pulsedive_response.keys() and len(pulsedive_response["results"]) is not 0:
handled_response = pulsedive_response["results"]
return handled_response
class ThreatFilter:
def __init__(self, config):
if len(config.whitelist) is 0:
self.whitelist = [
"app.any.run",
"hybrid_analysis",
"pastebin"
]
else:
self.whitelist = config.whitelist
return None
# TODO - add functions to manage whitelist
def filter_whitelisted_iocs(self, ioc_list):
filtered_iocs = []
for indicator in ioc_list:
whitelisted_ioc = False
for known_good in self.whitelist:
if known_good in indicator:
whitelisted_ioc = True
if whitelisted_ioc is True:
break
else:
filtered_iocs.append(indicator)
filtered_iocs = set(filtered_iocs)
return filtered_iocs
def filter_pulsedive_iocs_by_type(self, pulsedive_indicators, filter_types):
filtered_iocs = []
for indicator in pulsedive_indicators:
for filter_type in filter_types:
if "type" in indicator.keys() and indicator["type"] == filter_type:
filtered_iocs.append(indicator)
return filtered_iocs
def filter_pulsedive_iocs_by_risk(self, pulsedive_indicators, filter_risks):
filtered_iocs = []
for indicator in pulsedive_indicators:
for filter_risk in filter_risks:
if "risk" in indicator.keys() and indicator["risk"] == filter_risk:
filtered_iocs.append(indicator)
return filtered_iocs
class RegexMatch:
# TODO Add support for hashes and files with extensions
def __init__(self):
#This regex will not match urls with no uri content such as www.google.com. This was by design as many security researchers include the basedomain
#Did not want to generate false positives with hits such as drive.google.com & drive.google.com/evilcontent appearing in pastebin data
#In order to get the regex to match urls with no uri content replace (\/.*|:[0-9]{1,5}) with (\/.*|:[0-9]{1,5})?
self.url_regex = r"\b(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.(aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|bosch|bostik|boston|bot|boutique|box|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|charity|chase|chat|cheap|chintai|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|eat|ec|eco|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|fly|fm|fo|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|golf|goo|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|grocery|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hr|hsbc|ht|hu|hughes|hyatt|hyundai|ibm|icbc|ice|icu|id|ie|ieee|ifm|ikano|il|im|imamat|imdb|immo|immobilien|in|inc|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|merckmsd|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtr|mu|museum|mutual|mv|mw|mx|my|mz|na|nab|nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|origins|osaka|otsuka|ott|ovh|pa|page|panasonic|paris|pars|partners|parts|party|passagens|pay|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|rugby|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|starhub|statebank|statefarm|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xn|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw)(\/.*|:[0-9]{1,5})\b|(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?(?:(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\.){3}(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)(\/|:)\S*"
self.ipv4_regex = r"\b(?:(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\.){3}(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\b"
def url(self, object_to_filter):
if re.match(self.url_regex, object_to_filter):
return True
else:
return False
def ipv4(self, object_to_filter):
if re.match(self.ipv4_regex, object_to_filter):
return True
else:
return False
class OutputHandler:
def __init__(self):
return None
# TODO - ADD LOGIC TO WRITE TO FILE
def to_file(self, output_file, output_format, threat_name):
if output_file == "output":
output_file = output_file + ".{0}".format(output_format)
output_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), output_file)
else:
output_file = output_file + output_format
return None
def to_stdout(self, object_to_output):
sys.stdout.write(object_to_output)
return None
if __name__ == "__main__":
argument_parser = argparse.ArgumentParser(description="Twitter Threat Collection Bot")
argument_parser.add_argument("-c", "--config_file", dest="config", default="config.json", help="Path to configuration file.")
argument_parser.add_argument("-o", "--output_file", dest="output_file", default="output", help="Name of file to output results to.")
argument_parser.add_argument("-f", "--output_format", dest="output_format", default="json", help="Format in which to write the output file.")
argument_parser.add_argument("-d", "--debug", dest="debug", action="store_true", help="Enable debugging mode to print troubleshooting statements.")
arguments = argument_parser.parse_args()
debug = arguments.debug
output_format = arguments.output_format
output_file = arguments.output_file
# Attempting to load the application configuration
config = AppConfig.load_config(arguments.config)
if debug is True:
print(arguments.config)
print(config)
print(output_format)
print(output_file)
if config is None:
sys.exit(1)
# Getting Twitter search results for '#emotet filter:links pastebin'
tweet_collector = TweetCollector(config)
twitter_search_results = tweet_collector.search("%23emotet%20filter%3Alinks%20pastebin%20-filter%3Aretweets%20-filter%3Areplies")
# Filtering Tweets for pastebin URLs
tweet_filter = TweetFilter()
pastebin_tweets = tweet_filter.contains_pastebin_urls(twitter_search_results)
pastebin_tweet_info = tweet_filter.get_tweet_properties(pastebin_tweets)
# Getting unique tweet objects by converting them to hashable data type of string
pastebin_tweet_strings = []
for tweet in pastebin_tweet_info:
pastebin_tweet_strings.append(json.dumps(tweet, sort_keys=True))
unique_pastebin_tweet_strings = set(pastebin_tweet_strings)
# Converting the unique strings back to JSON
unique_pastebin_tweets = []
for tweet in unique_pastebin_tweet_strings:
unique_pastebin_tweets.append(json.loads(tweet))
# Scraping the returned pastebin sites for Emotet Indicators of Compromise (IOCs)
threat_collector = ThreatCollector(config)
threat_filter = ThreatFilter(config)
emotet_iocs = []
for tweet in unique_pastebin_tweets:
regex = RegexMatch()
tweet.update({"source_url":tweet["url"]})
tweet_url = []
tweet_url.append(tweet["source_url"])
if "url" in tweet.keys():
del tweet["url"]
iocs = threat_collector.get_pastebin_iocs(tweet_url, True)
filtered_iocs = threat_filter.filter_whitelisted_iocs(iocs)
for ioc in filtered_iocs:
new_ioc_event = tweet.copy()
new_ioc_event["ioc"] = ioc
if regex.ipv4(ioc):
new_ioc_event.update({"ioc_type":"ip"})
if regex.url(ioc):
new_ioc_event.update({"ioc_type":"url"})
emotet_iocs.append(new_ioc_event)
# Output of indicators and relevant Twitter information to standard output for Splunk
if output_format is "json":
output_handler = OutputHandler()
for ioc in emotet_iocs:
output_handler.to_stdout(json.dumps(ioc, sort_keys=True))
if "csv" in output_format:
if not os.path.isfile(output_file):
output_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), output_file)
with open(output_file, 'w') as csvfile:
count = 0
for ioc in emotet_iocs:
ioc["full_text"] = ioc["full_text"].replace("\n", " ").replace(",", " ")
if count == 0:
csvfile.write(",".join(ioc.keys()))
count += 1
csvfile.write("\n" + ",".join(ioc.values()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment