Skip to content

Instantly share code, notes, and snippets.

@oleksandriegorov
Last active June 23, 2020 13:14
Show Gist options
  • Save oleksandriegorov/d97c500b8cc1899aa2e80d1c3218b1b6 to your computer and use it in GitHub Desktop.
Save oleksandriegorov/d97c500b8cc1899aa2e80d1c3218b1b6 to your computer and use it in GitHub Desktop.
Download RPMs from graylog repo
#!/usr/bin/env python3
# Simple downloader for RPMs from Graylog repository
from html.parser import HTMLParser
from urllib.request import urlopen,urlretrieve
from os.path import isfile,isdir
from re import compile
import argparse
from sys import exit
from subprocess import call
parser = argparse.ArgumentParser(description='Download files over http from graylog repository')
parser.add_argument('--force',default = False, action = 'store_true', help='force download even if file already exists. False by default.')
parser.add_argument('--directory',default = '.', help='save downloaded files into directory. It is current directory by default.')
parser.add_argument('--createrepo',default = False, action = 'store_true', help='create yum repository. False by default')
args = parser.parse_args()
force = args.force
directory = args.directory
createrepo = args.createrepo
if not isdir(directory):
print("Directory {0} should exist".format(directory))
exit(1)
def bytes2str(data):
if type(data) is list:
res = []
for entry in data:
res.append(entry.decode('utf-8'))
return res
elif type(data) is str:
return data.decode('utf-8')
else:
raise TypeError
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.data = []
def handle_starttag(self, tag, attrs):
if tag == 'a':
for attr in attrs:
if attr[0] == 'href':
#print(attr[1])
self.data.append(attr[1])
base_url = 'https://packages.graylog2.org'
stableserver_url = 'https://packages.graylog2.org/el/stableServer'
html = urlopen(stableserver_url)
parser = MyHTMLParser()
pattern_repo = compile('^/el/stableServer/(\d\.\d)$')
pattern_server_rpm = compile('.*((graylog-server|graylog-integrations-plugins)-\d{1,2}\.\d{1,2}\.\d{1,2}-\d{1,2}\.noarch\.rpm)')
parser.feed(''.join(bytes2str(html.readlines())))
refs_from_stableserverurl = parser.data
for ref in refs_from_stableserverurl:
match_repo = pattern_repo.match(ref)
if match_repo:
repo_version = match_repo[0]
repo_url = base_url + repo_version + '/x86_64'
html = urlopen(repo_url)
parser = MyHTMLParser()
parser.feed(''.join(bytes2str(html.readlines())))
rpms_from_repourl = parser.data
for rpm in rpms_from_repourl:
match_rpm = pattern_server_rpm.match(rpm)
if match_rpm:
print(match_rpm[1])
saveto = directory+'/'+match_rpm[1]
if not isfile(saveto) or force:
urlretrieve(base_url+rpm,saveto)
if createrepo:
r = call(['createrepo','.'])
if r != 0:
print("Repository for graylog could not be created!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment