Skip to content

Instantly share code, notes, and snippets.

@BerkeKaragoz
Created March 20, 2022 20:32
Show Gist options
  • Save BerkeKaragoz/6c856a9f9e6351e5835ae7aafb2ce1d3 to your computer and use it in GitHub Desktop.
Save BerkeKaragoz/6c856a9f9e6351e5835ae7aafb2ce1d3 to your computer and use it in GitHub Desktop.
Simple File Set Downloader
"""
Simple File Set Downloader, berkekaragoz.com
Example
Lets say that we want to download all files starting from:
https://example.com//assets/file-01.zip
to
https://example.com/assets/file-30.zip
URL: https://example.com/assets/file-01.zip
Filename Start: file-
Filename End: .zip
zFill: 2 # file-1.zip is zFill 1, file-001.zip is zFill 3
Range Start: 1 End: 30
Sleep: 0
The files will be downloaded to the ./file-/ directory:
file-/
|- _source-url.txt
|- file-01.zip
|- file-02.zip
|- ...
|- file-29.zip
|- file-30.zip
"""
import urllib.request
import pathlib
import os
import time
from pathlib import Path
from tkinter import Tk, Label, Button, Entry, Frame
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures.process import _MAX_WINDOWS_WORKERS
MAX_THREADS = _MAX_WINDOWS_WORKERS or 24
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36/8mqQhSuL-09',
'Accept-Language': 'en-US,en;q=0.5',
'Cache-Control': 'max-age=0',
'Referer': 'https://duckduckgo.com/' # Or any search engine
}
def print_operation(base_url, range_start, range_to, zFill, req_file_start, req_file_end, sleep):
"""Prints out your comprehended input."""
print(base_url + "" + req_file_start + "" +
str(range_start).zfill(zFill) + "" + req_file_end)
print(base_url + "" + req_file_start + "" + str(range_to).zfill(zFill) + "" +
req_file_end + "\nSleep: " + str(sleep) + "\n" + str(range_start) + " -> " + str(range_to))
def download_request(request, download_dir):
"""Sends the actual download request and writes the contents."""
try:
with urllib.request.urlopen(request) as response:
filename = download_dir + '/' + os.path.basename(request.full_url)
chunk_no = 1
with open(filename, 'wb') as f:
while True:
chunk = response.read(1024)
if not chunk:
break
# Check if its the right file type
if chunk_no == 1:
# Check SHALLOWLY if its an html page
# If you get an HTML page its probably an error
# There are better options for downloading HTML pages
if not str(chunk)[:20].__contains__("html>") and len(str(chunk)) > 75:
print(request.full_url)
else:
print("This file won't be written: " + filename)
break
chunk_no += 1
f.write(chunk)
except IOError:
print("Error: ", request.full_url)
def download_files(base_url, range_start, range_to, zFill, req_file_start, req_file_end, sleep, download_dir):
"""Initiates downloading process."""
# Create download directory
Path(download_dir).mkdir(parents=True, exist_ok=True)
# Write Source Details
open(download_dir + "/_source_url.txt", 'w').write(base_url +
req_file_start + str(range_start).zfill(zFill) + req_file_end)
# Create Requests
request_list = []
for i in range(range_start, range_to + 1):
req_url = base_url + req_file_start + \
str(i).zfill(zFill) + req_file_end
request_list.append(urllib.request.Request(
req_url, headers=headers, unverifiable=True))
# Create the request threads and start executing them
with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
futures = []
for request in request_list:
futures.append(executor.submit(
download_request, request, download_dir))
if (sleep > 0):
time.sleep(sleep)
if __name__ == '__main__':
def submit():
# Process GUI Input
_url = url_entry.get()
base_url = _url[:(_url.rindex('/') + 1)]
req_file_start = reqFileStart_entry.get()
req_file_end = reqFileEnd_entry.get()
zFill = int(zFill_entry.get())
range_start = int(rangeStart_entry.get())
range_to = int(rangeTo_entry.get())
sleep = float(sleep_entry.get())
##
print_operation(base_url, range_start, range_to, zFill,
req_file_start, req_file_end, sleep)
cwd = pathlib.Path(__file__).parent.absolute()
# Determine download directory
if req_file_start is None:
# If a Folder Name entry is added in the future:
#dir_name = input("Folder name (Def: No folder): ")
pass # downloads to the current directory
else:
dir_name = req_file_start
download_dir = os.path.join(cwd, dir_name)
# Start downloading
download_files(base_url, range_start, range_to, zFill,
req_file_start, req_file_end, sleep, download_dir)
# GUI
root = Tk()
root.title("Simple Set Downloader")
root.grid_columnconfigure(0, weight=1)
url_label = Label(root, text="URL:")
url_label.grid(row=0, column=0)
url_entry = Entry(root, width=96)
url_entry.grid(row=0, column=1)
req_fileStart_label = Label(root, text="URL Filename Start:")
req_fileStart_label.grid(row=1, column=0)
reqFileStart_entry = Entry(root)
reqFileStart_entry.grid(row=1, column=1, sticky='we')
req_fileEnd_label = Label(root, text="URL Filename End:")
req_fileEnd_label.grid(row=2, column=0)
reqFileEnd_entry = Entry(root)
reqFileEnd_entry.grid(row=2, column=1, sticky='we')
reqFileEnd_entry.insert(0, '.zip')
zFill_label = Label(root, text="zFill:")
zFill_label.grid(row=3, column=0)
zFill_entry = Entry(root)
zFill_entry.grid(row=3, column=1, sticky='we')
zFill_entry.insert(0, '1')
range_frame_label = Label(root, text="Range ")
range_frame_label.grid(row=4, column=0)
range_frame = Frame(root)
range_frame.grid(row=4, column=1, sticky='we')
rangeStart_label = Label(range_frame, text="Start:")
rangeStart_label.grid(row=0, column=0)
rangeStart_entry = Entry(range_frame)
rangeStart_entry.grid(row=0, column=1)
rangeStart_entry.insert(0, '1')
rangeTo_label = Label(range_frame, text="To:")
rangeTo_label.grid(row=0, column=2)
rangeTo_entry = Entry(range_frame)
rangeTo_entry.grid(row=0, column=3)
sleep_label = Label(root, text="Sleep:")
sleep_label.grid(row=5, column=0)
sleep_entry = Entry(root)
sleep_entry.grid(row=5, column=1, sticky='we')
sleep_entry.insert(0, '0')
submit_button = Button(root, text='Download', command=submit)
submit_button.grid(columnspan=2, sticky='we')
root.mainloop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment