Created
October 31, 2019 08:24
-
-
Save schniggie/c2be493d690b530d5ec1361c6b84c099 to your computer and use it in GitHub Desktop.
Get Domain+TLD from list of URLs with or without protocol handler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!/usr/bin/env python3 | |
import sys | |
import os | |
import tld | |
from tld import get_fld | |
def main(): | |
filepath = sys.argv[1] | |
if not os.path.isfile(filepath): | |
print("File path {} does not exist. Exiting...".format(filepath)) | |
sys.exit() | |
with open(filepath) as fp: | |
cnt = 0 | |
for line in fp: | |
#print("line {} contents {}".format(cnt, line)) | |
domain = get_fld(line.strip(), fail_silently=True, fix_protocol=True) | |
if domain is not None: | |
print(domain) | |
cnt += 1 | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment