Skip to content

Instantly share code, notes, and snippets.

@tsujimitsu
Last active December 12, 2015 15:28
Show Gist options
  • Save tsujimitsu/e955efc2a02ef981ed3e to your computer and use it in GitHub Desktop.
Save tsujimitsu/e955efc2a02ef981ed3e to your computer and use it in GitHub Desktop.
file download from web page
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import re
import urllib
import urllib2 as request
from bs4 import BeautifulSoup
def get_rpm_list(url):
response = request.urlopen(url)
body = response.read()
soup = BeautifulSoup(body, "lxml")
list = []
pattern = re.compile(".*.rpm|.*.gz|.*.bz2|.*.xml")
for link in soup.find_all('a'):
text = link.get('href')
#if text.find('.rpm') > -1:
if pattern.match(text):
list.append(text)
return list
url = 'http://buildlogs.centos.org/centos/7/cloud/openstack-liberty/repodata/'
for item in get_rpm_list(url):
urllib.urlretrieve(url + item, "E:/SoftWare/openstack/liberty/rpm/openstack-liberty/repodata/" + item)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment