Skip to content

Instantly share code, notes, and snippets.

@VGostyuzhov
Created February 13, 2017 11:27
Show Gist options
  • Save VGostyuzhov/49679fdd3fb4719dee5f1de8fb239301 to your computer and use it in GitHub Desktop.
Save VGostyuzhov/49679fdd3fb4719dee5f1de8fb239301 to your computer and use it in GitHub Desktop.
Parse hostnames from URLs
#!/usr/bin/python
from urlparse import urlparse
from argparse import ArgumentParser
import sys
def main():
parser = ArgumentParser(description='Provide file with URLs and file to save extracted hostnames')
parser.add_argument('-i', dest='input_filename', help='Enter name of input file, containing URLs', metavar='FILE')
parser.add_argument('-o', dest='output_filename', help='Enter name of output file to save extracted hostnames', metavar='FILE')
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
hostnames = []
with open(args.input_filename, 'r') as input_file, open(args.output_filename, 'wb') as output_file:
for line in input_file:
hostname = urlparse(line.strip('\n')).netloc
if hostname not in hostnames:
hostnames.append(hostname)
for hostname in hostnames:
output_file.write(hostname + '\n')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment