Last active
February 10, 2018 11:49
-
-
Save seospace/2c5a89666b4d841fad17936068010f14 to your computer and use it in GitHub Desktop.
verisign ftp download unzip parse zone files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ftplib import FTP | |
import gzip | |
import re | |
import os | |
class ComParser: | |
_host = 'rz.verisign-grs.com' | |
_file_in = 'com.zone.gz' | |
_file_out = 'com.zone.txt' | |
_file_parsed = 'com.zone.parsed' | |
_file_parsed_old = 'com.zone.parsed.old' | |
_file_new_domains = 'com.zone.parsed.newdomains' | |
_login = '' | |
_passwd = '' | |
def __init__(self): | |
self.ftp = FTP(self._host) | |
def login(self): | |
self.ftp.login(user=self._login, passwd=self._passwd) | |
def download(self): | |
file = open(self._file_in, 'wb') | |
self.ftp.retrbinary('RETR %s' % self._file_in, file.write) | |
file.close() | |
def unzip(self): | |
f_in = gzip.open(self._file_in) | |
f_out = open(self._file_out, 'wb') | |
for l in f_in: | |
f_out.write(l) | |
def parse(self): | |
input_file = open(self._file_out, 'r') | |
output_file = open(self._file_parsed, 'w+') | |
previous = None | |
for line in input_file: | |
match = re.search('^([A-Z0-9-]+) NS', line) | |
if match: | |
domain = match.group(1).lower() | |
if domain != previous: | |
domain_str = 'http://%s.com\n' % domain | |
output_file.writelines(domain_str) | |
previous = domain | |
def rename(self): | |
os.rename(self._file_parsed, self._file_parsed_old) | |
if __name__ == '__main__': | |
parser = ComParser() | |
parser.login() | |
parser.download() | |
parser.unzip() | |
parser.parse() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment