comdotlinux/download_linux_journal.py

## download_linux_journal.py
#!/usr/bin/python3
import requests
import bs4
import re
import os

full_html_response = requests.get('https://secure2.linuxjournal.com/pdf/dljdownload.php')

full_html_response.raise_for_status()

full_html = bs4.BeautifulSoup(full_html_response.text, 'html.parser')
all_tags = full_html.find_all(href=re.compile("pdf"))

print('Number of download links found :: {0}'.format(len(all_tags)))

for tag in all_tags:
    print('--')
    try:
        redirected_link = requests.get(tag.attrs.get('href'))
        link_html = bs4.BeautifulSoup(redirected_link.text, 'html.parser')
        link_tag = link_html.find_all(href=re.compile("pdf"))[0]

        content_response_head = requests.head('http://download.linuxjournal.com{0}'.format(link_tag.attrs.get('href')))
        content_length = int(content_response_head.headers.get('Content-Length'))
        content_disposition_header = content_response_head.headers.get('content-disposition')
        print('content disposition header : {0}'.format(content_disposition_header))
        filename = content_disposition_header.split(' ')[1].split('=')[1].strip('"')

        print('Current file -> {0}'.format(filename))
        if os.path.isfile(filename):
            print('Checking filename {0}'.format(filename))
            existing_file_content_length = len(open(filename, 'rb').read())
            print('Existing file length {0} | download content length {1}'.format(existing_file_content_length, content_length))
            if existing_file_content_length != content_length:
                open(filename, 'wb').write(requests.get('http://download.linuxjournal.com' + link_tag.attrs.get('href')).content)
                print('Wrote {0}'.format(filename))
            else:
                print('File {0} exists.. skipping'.format(filename))
        else:
            open(filename, 'wb').write(requests.get('http://download.linuxjournal.com' + link_tag.attrs.get('href')).content)
            print('Wrote {0}'.format(filename))
    except Exception:
        print('Exception occurred, skipping')

print('Done -- PDFs Downloaded!')
	#!/usr/bin/python3
	import requests
	import bs4
	import re
	import os

	full_html_response = requests.get('https://secure2.linuxjournal.com/pdf/dljdownload.php')

	full_html_response.raise_for_status()

	full_html = bs4.BeautifulSoup(full_html_response.text, 'html.parser')
	all_tags = full_html.find_all(href=re.compile("pdf"))

	print('Number of download links found :: {0}'.format(len(all_tags)))

	for tag in all_tags:
	print('--')
	try:
	redirected_link = requests.get(tag.attrs.get('href'))
	link_html = bs4.BeautifulSoup(redirected_link.text, 'html.parser')
	link_tag = link_html.find_all(href=re.compile("pdf"))[0]

	content_response_head = requests.head('http://download.linuxjournal.com{0}'.format(link_tag.attrs.get('href')))
	content_length = int(content_response_head.headers.get('Content-Length'))
	content_disposition_header = content_response_head.headers.get('content-disposition')
	print('content disposition header : {0}'.format(content_disposition_header))
	filename = content_disposition_header.split(' ')[1].split('=')[1].strip('"')

	print('Current file -> {0}'.format(filename))
	if os.path.isfile(filename):
	print('Checking filename {0}'.format(filename))
	existing_file_content_length = len(open(filename, 'rb').read())
	print('Existing file length {0} \| download content length {1}'.format(existing_file_content_length, content_length))
	if existing_file_content_length != content_length:
	open(filename, 'wb').write(requests.get('http://download.linuxjournal.com' + link_tag.attrs.get('href')).content)
	print('Wrote {0}'.format(filename))
	else:
	print('File {0} exists.. skipping'.format(filename))
	else:
	open(filename, 'wb').write(requests.get('http://download.linuxjournal.com' + link_tag.attrs.get('href')).content)
	print('Wrote {0}'.format(filename))
	except Exception:
	print('Exception occurred, skipping')

	print('Done -- PDFs Downloaded!')