taco-shellcode/osint.py

## osint.py
#!/usr/bin/env python3.6
'''
Requirements:
    sudo apt-get install python3
    sudo apt-get install pip3
    pip3 install python-twitter
    pip3 install beautifulsoup4

Compile script into binary:
    pyinstaller --onefile osint-collector.py

Execution Examples:
    ./osint-collector
    ./osint-collector --debug
    ./osint-collector --output_format csv --output_file iocs.csv
    ./osint-collector --output_format json --output_file iocs.json

config.json example - store this in the same location as the osint.py script or use the command line args to specify it's location
{
    "twitter_access_token": "",
    "twitter_access_token_secret": "",
    "twitter_consumer_key": "",
    "twitter_consumer_secret": "",
    "whitelist": [
        "app.any.run",
        "hybrid-analysis",
        "pastebin"
    ]
}
'''

import os
import re
import sys
import bs4
import json
import twitter
import requests
import argparse


class AppConfig():
    def __init__(self):
        self.twitter_access_token = ""
        self.twitter_access_token_secret = ""
        self.twitter_consumer_key = ""
        self.twitter_consumer_secret = ""
        self.whitelist = ""

    @staticmethod
    def load_config(config_file):
        app_config = AppConfig()
        config = None

        if config_file == "config.json":
            config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.json")

        if os.path.isfile(config_file):
            config = app_config._read_from_disk(config_file)

        if not app_config._load_config_json(config):
            if debug is True:
                print(f"\r\n[!] An error occurred while attempting to load the configuration from: {config_file}")

        if debug is True:
            print(app_config)

        return app_config

    def _read_from_disk(self, filename):
        config_json = None

        if os.path.isfile(filename):
            with open(filename, "r") as input_config_file:
                config_json = json.loads(input_config_file.read())
        else:
            if debug is True:
                print(f"\r\n[!] Config file not found: {filename}")

        return config_json

    def _load_config_json(self, config_json):
        required_keys = [
            "twitter_access_token",
            "twitter_access_token_secret",
            "twitter_consumer_key",
            "twitter_consumer_secret",
            "whitelist"
        ]

        for key_name in required_keys:
            if key_name not in config_json:
                if debug is True:
                    print(f"\r\n[!] Invalid config file, missing attribute: {key_name}")

                return False

        self.twitter_access_token = config_json["twitter_access_token"]
        self.twitter_access_token_secret = config_json["twitter_access_token_secret"]
        self.twitter_consumer_key = config_json["twitter_consumer_key"]
        self.twitter_consumer_secret = config_json["twitter_consumer_secret"]
        self.whitelist = config_json["whitelist"]

        return True


class TweetCollector:
    def __init__(self, config):
        self.twitter_api = twitter.Api(
            config.twitter_consumer_key,
            config.twitter_consumer_secret,
            config.twitter_access_token,
            config.twitter_access_token_secret,
            tweet_mode="extended"
        )

        if debug is True:
            print(self.twitter_api)
            print(config.twitter_consumer_key)
            print(config.twitter_consumer_secret)
            print(config.twitter_access_token)
            print(config.twitter_access_token_secret)
            print(self.twitter_api.VerifyCredentials())

    def search(self, search_string):
        search_results = []

        if search_string.startswith("q="):
            try:
                results = self.twitter_api.GetSearch(raw_query=f"{search_string}")
            except (requests.exceptions.SSLError) as e:
                if debug is True:
                    print(f"\r\n[!] An error occurred while making the Twitter GetSearch api call with search string: {search_string}")
                    print(f"\r\n[!] Exception Information:\r\n{e}\r\n")

                sys.exit(1)
        elif search_string is not None:
            try:
                results = self.twitter_api.GetSearch(raw_query=f"q={search_string}")
            except (requests.exceptions.SSLError) as e:
                if debug is True:
                    print(f"\r\n[!] An error occurred while making the Twitter GetSearch api call with search string: {search_string}")
                    print(f"\r\n[!] Exception Information:\r\n{e}\r\n")

                sys.exit(1)

        for result in results:
            search_results.append(json.loads(result.AsJsonString()))

        return search_results


class TweetFilter:
    def __init__(self):
        self.regex = RegexMatch()

    def contains_urls(self, search_results):
        tweets_with_urls = []

        for result in search_results:
            for url in result["urls"]:
                if url["expanded_url"] is not None and self.regex.url(url["expanded_url"]):
                    tweets_with_urls.append(result)

        return tweets_with_urls

    def contains_pastebin_urls(self, search_results):
        tweets_with_pastebin_urls = []

        for result in search_results:
            for url in result["urls"]:
                if url["expanded_url"] is not None and "pastebin.com/" in url["expanded_url"] and self.regex.url(url["expanded_url"]):
                    tweets_with_pastebin_urls.append(result)

        return tweets_with_pastebin_urls

    def get_original_tweet(self, search_results):
        original_tweets = []

        for result in search_results:
            if "retweeted_status" not in result.keys():
                original_tweets.append(result)
            elif "retweet_count" and "retweeted_status" in result.keys():
                original_tweets.append(result["retweeted_status"])

        return original_tweets

    def get_tweet_properties(self, search_results):
        filtered_tweets = []

        for result in search_results:
            new_tweet = {
                "created_at": "",
                "full_text": "",
                "ioc": "",
                "url": "",
                "screen_name": ""
            }

            new_tweet["created_at"] = result["created_at"]
            new_tweet["full_text"] = result["full_text"]
            new_tweet["url"] = result["urls"][0]["expanded_url"]
            new_tweet["screen_name"] = result["user"]["screen_name"]

            filtered_tweets.append(new_tweet)

        return filtered_tweets


class ThreatCollector:
    def __init__(self, config):
        self.regex = RegexMatch()

    # TODO Add support for file extensions and hashes
    def get_pastebin_iocs(self, pastebin_urls, recursive_search=False):
        ioc_list = []

        for url in pastebin_urls:
            if url is not None and "pastebin.com/" in url and self.regex.url(url):
                if "pastebin.com/raw" in url:
                    url = url.replace("/raw", "")

                try:
                    parsed_lines = bs4.BeautifulSoup(requests.get(url).text, "html.parser").find("textarea").string.splitlines()
                except (requests.exceptions.SSLError) as e:
                    if debug is True:
                        print(f"\r\n[!] An error occurred while requesting the URL: {url}")
                        print(f"\r\n[!] Exception Information:\r\n{e}\r\n")
                    break
                except AttributeError as e:
                    if debug is True:
                        print(f"\r\n[!] Exception Information:\r\n{e}\r\n")
                    break

                for line in parsed_lines:
                    line_split = line.split()
                    for split in line_split:
                        if recursive_search is True and "pastebin.com/" in split.lower() and self.regex.url(split) and url != split:
                            ioc_list.extend(self.get_pastebin_iocs(split))
                        elif recursive_search is True and "pastebin.com/" in split.lower() and url != split:
                            ioc_list.extend([self.get_pastebin_iocs(x) for x in split.split(" ") if self.regex.url(x)])
                        elif self.regex.ipv4(split) or self.regex.url(split) and url != split:
                            ioc_list.append(split)

        return ioc_list

    def get_pulsedive_iocs(self, threat):
        iocs = []

        if threat is not None:
            url = self._prepare_pulsedive_url(threat)
            if url is not None:
                response = requests.get(url).text
                response_json = json.loads(response)
                iocs = self._handle_pulsedive_response(response_json)

        return iocs

    def _prepare_pulsedive_url(self, threat):
        prepared_url = None
        query = None

        if isinstance(threat, int):
            query = f"&get=links&tid={threat}"
        elif isinstance(threat, str):
            query = f"&tname={threat}"

        if query is not None:
            prepared_url = f"https://pulsedive.com/api/info.php?{query}&key={self.pulsedive_api_key}"

        return prepared_url

    def _handle_pulsedive_response(self, pulsedive_response):
        handled_response = None

        if pulsedive_response and "tid" in pulsedive_response.keys():
            tid = pulsedive_response["tid"]
            handled_response = self.get_pulsedive_iocs(tid)
        elif pulsedive_response and "results" in pulsedive_response.keys() and len(pulsedive_response["results"]) is not 0:
            handled_response = pulsedive_response["results"]

        return handled_response


class ThreatFilter:
    def __init__(self, config):
        if len(config.whitelist) is 0:
            self.whitelist = [
                "app.any.run",
                "hybrid_analysis",
                "pastebin"
            ]
        else:
            self.whitelist = config.whitelist

        return None

    # TODO - add functions to manage whitelist
    def filter_whitelisted_iocs(self, ioc_list):
        filtered_iocs = []

        for indicator in ioc_list:
            whitelisted_ioc = False

            for known_good in self.whitelist:
                if known_good in indicator:
                    whitelisted_ioc = True

            if whitelisted_ioc is True:
                break
            else:
                filtered_iocs.append(indicator)

        filtered_iocs = set(filtered_iocs)

        return filtered_iocs

    def filter_pulsedive_iocs_by_type(self, pulsedive_indicators, filter_types):
        filtered_iocs = []

        for indicator in pulsedive_indicators:
            for filter_type in filter_types:
                if "type" in indicator.keys() and indicator["type"] == filter_type:
                    filtered_iocs.append(indicator)

        return filtered_iocs

    def filter_pulsedive_iocs_by_risk(self, pulsedive_indicators, filter_risks):
        filtered_iocs = []

        for indicator in pulsedive_indicators:
            for filter_risk in filter_risks:
                if "risk" in indicator.keys() and indicator["risk"] == filter_risk:
                    filtered_iocs.append(indicator)

        return filtered_iocs


class RegexMatch:
    # TODO Add support for hashes and files with extensions
    def __init__(self):

        #This regex will not match urls with no uri content such as www.google.com.  This was by design as many security researchers include the basedomain
        #Did not want to generate false positives with hits such as drive.google.com & drive.google.com/evilcontent appearing in pastebin data
        #In order to get the regex to match urls with no uri content replace (\/.*|:[0-9]{1,5}) with (\/.*|:[0-9]{1,5})?
        self.url_regex = r"\b(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.(aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|bosch|bostik|boston|bot|boutique|box|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|charity|chase|chat|cheap|chintai|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|eat|ec|eco|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|fly|fm|fo|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|golf|goo|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|grocery|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hr|hsbc|ht|hu|hughes|hyatt|hyundai|ibm|icbc|ice|icu|id|ie|ieee|ifm|ikano|il|im|imamat|imdb|immo|immobilien|in|inc|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|merckmsd|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtr|mu|museum|mutual|mv|mw|mx|my|mz|na|nab|nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|origins|osaka|otsuka|ott|ovh|pa|page|panasonic|paris|pars|partners|parts|party|passagens|pay|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|rugby|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|starhub|statebank|statefarm|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xn|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw)(\/.*|:[0-9]{1,5})\b|(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?(?:(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\.){3}(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)(\/|:)\S*"

        self.ipv4_regex = r"\b(?:(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\.){3}(?:1\d\d|2[0-5][0-5]|2[0-4]\d|0?[1-9]\d|0?0?\d)\b"

    def url(self, object_to_filter):
        if re.match(self.url_regex, object_to_filter):
            return True
        else:
            return False

    def ipv4(self, object_to_filter):
        if re.match(self.ipv4_regex, object_to_filter):
            return True
        else:
            return False


class OutputHandler:
    def __init__(self):
        return None

    # TODO - ADD LOGIC TO WRITE TO FILE
    def to_file(self, output_file, output_format, threat_name):
        if output_file == "output":
            output_file = output_file + ".{0}".format(output_format)
            output_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), output_file)
        else:
            output_file = output_file + output_format
        return None

    def to_stdout(self, object_to_output):
        sys.stdout.write(object_to_output)
        return None


if __name__ == "__main__":
    argument_parser = argparse.ArgumentParser(description="Twitter Threat Collection Bot")
    argument_parser.add_argument("-c", "--config_file", dest="config", default="config.json", help="Path to configuration file.")
    argument_parser.add_argument("-o", "--output_file", dest="output_file", default="output", help="Name of file to output results to.")
    argument_parser.add_argument("-f", "--output_format", dest="output_format", default="json", help="Format in which to write the output file.")
    argument_parser.add_argument("-d", "--debug", dest="debug", action="store_true", help="Enable debugging mode to print troubleshooting statements.")

    arguments = argument_parser.parse_args()
    debug = arguments.debug
    output_format = arguments.output_format
    output_file = arguments.output_file

    # Attempting to load the application configuration
    config = AppConfig.load_config(arguments.config)

    if debug is True:
        print(arguments.config)
        print(config)
        print(output_format)
        print(output_file)

    if config is None:
        sys.exit(1)

    # Getting Twitter search results for '#emotet filter:links pastebin'
    tweet_collector = TweetCollector(config)
    twitter_search_results = tweet_collector.search("%23emotet%20filter%3Alinks%20pastebin%20-filter%3Aretweets%20-filter%3Areplies")

    # Filtering Tweets for pastebin URLs
    tweet_filter = TweetFilter()
    pastebin_tweets = tweet_filter.contains_pastebin_urls(twitter_search_results)
    pastebin_tweet_info = tweet_filter.get_tweet_properties(pastebin_tweets)

    # Getting unique tweet objects by converting them to hashable data type of string
    pastebin_tweet_strings = []
    for tweet in pastebin_tweet_info:
        pastebin_tweet_strings.append(json.dumps(tweet, sort_keys=True))
    unique_pastebin_tweet_strings = set(pastebin_tweet_strings)

    # Converting the unique strings back to JSON
    unique_pastebin_tweets = []
    for tweet in unique_pastebin_tweet_strings:
        unique_pastebin_tweets.append(json.loads(tweet))

    # Scraping the returned pastebin sites for Emotet Indicators of Compromise (IOCs)
    threat_collector = ThreatCollector(config)
    threat_filter = ThreatFilter(config)

    emotet_iocs = []
    for tweet in unique_pastebin_tweets:
        regex = RegexMatch()
        tweet.update({"source_url":tweet["url"]})
        tweet_url = []
        tweet_url.append(tweet["source_url"])

        if "url" in tweet.keys():
            del tweet["url"]

        iocs = threat_collector.get_pastebin_iocs(tweet_url, True)
        filtered_iocs = threat_filter.filter_whitelisted_iocs(iocs)

        for ioc in filtered_iocs:
            new_ioc_event = tweet.copy()
            new_ioc_event["ioc"] = ioc

            if regex.ipv4(ioc):
                new_ioc_event.update({"ioc_type":"ip"})

            if regex.url(ioc):
                new_ioc_event.update({"ioc_type":"url"})

            emotet_iocs.append(new_ioc_event)

    # Output of indicators and relevant Twitter information to standard output for Splunk
    if output_format is "json":
        output_handler = OutputHandler()
        for ioc in emotet_iocs:
            output_handler.to_stdout(json.dumps(ioc, sort_keys=True))

    if "csv" in output_format:
        if not os.path.isfile(output_file):
            output_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), output_file)

        with open(output_file, 'w') as csvfile:
            count = 0

            for ioc in emotet_iocs:
                ioc["full_text"] = ioc["full_text"].replace("\n", " ").replace(",", " ")
                if count == 0:
                    csvfile.write(",".join(ioc.keys()))
                    count += 1

                csvfile.write("\n" + ",".join(ioc.values()))