Skip to content

Instantly share code, notes, and snippets.

@mkouhei

mkouhei/retrieve_jdk.py

Last active Aug 29, 2015
Embed
What would you like to do?
retrieving JDK
# -*- coding: utf-8 -*-
"""
Copyright (C) 2014 Kouhei Maeda <mkouhei@palmtb.net>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
---
requires:
requests
pyquery
and
wget command
usage:
python retrieve_jdk.py -j <7|8>
"""
import sys
import requests
import json
from urlparse import urlparse
from pyquery import PyQuery as pq
import argparse
import subprocess
import os.path
VERSIONS = [7, 8]
URL = 'http://www.oracle.com/technetwork/java/javase/downloads/index.html'
def retrieve_download_page(url):
o = urlparse(url)
base_url = '%s://%s' % (o.scheme, o.netloc)
res = requests.get(url)
html = res.content.decode('utf-8')
return pq(html), base_url
def arg_parse():
parser = argparse.ArgumentParser(description='usage')
parser.add_argument('-j', '--jdk', action='store',
required=True, type=int, choices=VERSIONS,
help='specify JKD version')
args = parser.parse_args()
return args.jdk
def parse_href(h3_element, base_url):
# a element
a_element = h3_element.getnext().getchildren()[0]
path = a_element.attrib.get('href')
return '%s%s' % (base_url, path)
def retrieve_url(jdk_page_url):
r = requests.get(jdk_page_url)
h = r.content.decode('utf-8')
q = pq(h)
t = [e.text for e in q.find('script')
if e.text is not None and e.text.find('-linux-x64.tar.gz') > 0][0]
line = [s for s in t.split('\n') if s.find('-linux-x64.tar.gz') > 0]
return json.loads(line[0].split(' = ')[1].rstrip(';')).get('filepath')
def check_file(filename):
return os.path.isfile(filename)
def main():
version = arg_parse()
query, base_url = retrieve_download_page(URL)
jdk_page_url = [parse_href(e, base_url) for e in query.find('h3')
if e.text == 'JDK' and
'jdk%s' % version in parse_href(e, base_url)][0]
jdk_url = retrieve_url(jdk_page_url)
jdk_file = os.path.basename(jdk_url)
if check_file(jdk_file):
print('%s is already downloaded.' % jdk_file)
sys.exit(0)
return jdk_url
if __name__ == '__main__':
subprocess.check_call(['wget',
'--no-check-certificate',
'--no-cookies',
'--header',
'Cookie: oraclelicense=accept-securebackup-cookie',
main()])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment