mkouhei/retrieve_jdk.py

## retrieve_jdk.py
# -*- coding: utf-8 -*-
"""
    Copyright (C) 2014 Kouhei Maeda <mkouhei@palmtb.net>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

---
requires:
    requests
    pyquery

    and

    wget command

usage:
    python retrieve_jdk.py -j <7|8>

"""
import sys
import requests
import json
from urlparse import urlparse
from pyquery import PyQuery as pq
import argparse
import subprocess
import os.path

VERSIONS = [7, 8]
URL = 'http://www.oracle.com/technetwork/java/javase/downloads/index.html'


def retrieve_download_page(url):
    o = urlparse(url)
    base_url = '%s://%s' % (o.scheme, o.netloc)
    res = requests.get(url)
    html = res.content.decode('utf-8')
    return pq(html), base_url


def arg_parse():
    parser = argparse.ArgumentParser(description='usage')
    parser.add_argument('-j', '--jdk', action='store',
                        required=True, type=int, choices=VERSIONS,
                        help='specify JKD version')
    args = parser.parse_args()
    return args.jdk


def parse_href(h3_element, base_url):
    # a element
    a_element = h3_element.getnext().getchildren()[0]
    path = a_element.attrib.get('href')
    return '%s%s' % (base_url, path)


def retrieve_url(jdk_page_url):
    r = requests.get(jdk_page_url)
    h = r.content.decode('utf-8')
    q = pq(h)
    t = [e.text for e in q.find('script')
         if e.text is not None and e.text.find('-linux-x64.tar.gz') > 0][0]
    line = [s for s in t.split('\n') if s.find('-linux-x64.tar.gz') > 0]
    return json.loads(line[0].split(' = ')[1].rstrip(';')).get('filepath')


def check_file(filename):
    return os.path.isfile(filename)


def main():
    version = arg_parse()
    query, base_url = retrieve_download_page(URL)
    jdk_page_url = [parse_href(e, base_url) for e in query.find('h3')
                    if e.text == 'JDK' and
                    'jdk%s' % version in parse_href(e, base_url)][0]
    jdk_url = retrieve_url(jdk_page_url)
    jdk_file = os.path.basename(jdk_url)
    if check_file(jdk_file):
        print('%s is already downloaded.' % jdk_file)
        sys.exit(0)
    return jdk_url


if __name__ == '__main__':
    subprocess.check_call(['wget',
                           '--no-check-certificate',
                           '--no-cookies',
                           '--header',
                           'Cookie: oraclelicense=accept-securebackup-cookie',
                           main()])
	# -- coding: utf-8 --
	"""
	Copyright (C) 2014 Kouhei Maeda <mkouhei@palmtb.net>

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <http://www.gnu.org/licenses/>.

	---
	requires:
	requests
	pyquery

	and

	wget command

	usage:
	python retrieve_jdk.py -j <7\|8>

	"""
	import sys
	import requests
	import json
	from urlparse import urlparse
	from pyquery import PyQuery as pq
	import argparse
	import subprocess
	import os.path

	VERSIONS = [7, 8]
	URL = 'http://www.oracle.com/technetwork/java/javase/downloads/index.html'


	def retrieve_download_page(url):
	o = urlparse(url)
	base_url = '%s://%s' % (o.scheme, o.netloc)
	res = requests.get(url)
	html = res.content.decode('utf-8')
	return pq(html), base_url


	def arg_parse():
	parser = argparse.ArgumentParser(description='usage')
	parser.add_argument('-j', '--jdk', action='store',
	required=True, type=int, choices=VERSIONS,
	help='specify JKD version')
	args = parser.parse_args()
	return args.jdk


	def parse_href(h3_element, base_url):
	# a element
	a_element = h3_element.getnext().getchildren()[0]
	path = a_element.attrib.get('href')
	return '%s%s' % (base_url, path)


	def retrieve_url(jdk_page_url):
	r = requests.get(jdk_page_url)
	h = r.content.decode('utf-8')
	q = pq(h)
	t = [e.text for e in q.find('script')
	if e.text is not None and e.text.find('-linux-x64.tar.gz') > 0][0]
	line = [s for s in t.split('\n') if s.find('-linux-x64.tar.gz') > 0]
	return json.loads(line[0].split(' = ')[1].rstrip(';')).get('filepath')


	def check_file(filename):
	return os.path.isfile(filename)


	def main():
	version = arg_parse()
	query, base_url = retrieve_download_page(URL)
	jdk_page_url = [parse_href(e, base_url) for e in query.find('h3')
	if e.text == 'JDK' and
	'jdk%s' % version in parse_href(e, base_url)][0]
	jdk_url = retrieve_url(jdk_page_url)
	jdk_file = os.path.basename(jdk_url)
	if check_file(jdk_file):
	print('%s is already downloaded.' % jdk_file)
	sys.exit(0)
	return jdk_url


	if __name__ == '__main__':
	subprocess.check_call(['wget',
	'--no-check-certificate',
	'--no-cookies',
	'--header',
	'Cookie: oraclelicense=accept-securebackup-cookie',
	main()])