Created
February 13, 2017 11:27
-
-
Save VGostyuzhov/49679fdd3fb4719dee5f1de8fb239301 to your computer and use it in GitHub Desktop.
Parse hostnames from URLs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from urlparse import urlparse | |
from argparse import ArgumentParser | |
import sys | |
def main(): | |
parser = ArgumentParser(description='Provide file with URLs and file to save extracted hostnames') | |
parser.add_argument('-i', dest='input_filename', help='Enter name of input file, containing URLs', metavar='FILE') | |
parser.add_argument('-o', dest='output_filename', help='Enter name of output file to save extracted hostnames', metavar='FILE') | |
if len(sys.argv) == 1: | |
parser.print_help() | |
sys.exit(1) | |
args = parser.parse_args() | |
hostnames = [] | |
with open(args.input_filename, 'r') as input_file, open(args.output_filename, 'wb') as output_file: | |
for line in input_file: | |
hostname = urlparse(line.strip('\n')).netloc | |
if hostname not in hostnames: | |
hostnames.append(hostname) | |
for hostname in hostnames: | |
output_file.write(hostname + '\n') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment