Created
March 20, 2022 20:32
-
-
Save BerkeKaragoz/6c856a9f9e6351e5835ae7aafb2ce1d3 to your computer and use it in GitHub Desktop.
Simple File Set Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Simple File Set Downloader, berkekaragoz.com | |
Example | |
Lets say that we want to download all files starting from: | |
https://example.com//assets/file-01.zip | |
to | |
https://example.com/assets/file-30.zip | |
URL: https://example.com/assets/file-01.zip | |
Filename Start: file- | |
Filename End: .zip | |
zFill: 2 # file-1.zip is zFill 1, file-001.zip is zFill 3 | |
Range Start: 1 End: 30 | |
Sleep: 0 | |
The files will be downloaded to the ./file-/ directory: | |
file-/ | |
|- _source-url.txt | |
|- file-01.zip | |
|- file-02.zip | |
|- ... | |
|- file-29.zip | |
|- file-30.zip | |
""" | |
import urllib.request | |
import pathlib | |
import os | |
import time | |
from pathlib import Path | |
from tkinter import Tk, Label, Button, Entry, Frame | |
from concurrent.futures import ThreadPoolExecutor | |
from concurrent.futures.process import _MAX_WINDOWS_WORKERS | |
MAX_THREADS = _MAX_WINDOWS_WORKERS or 24 | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36/8mqQhSuL-09', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Cache-Control': 'max-age=0', | |
'Referer': 'https://duckduckgo.com/' # Or any search engine | |
} | |
def print_operation(base_url, range_start, range_to, zFill, req_file_start, req_file_end, sleep): | |
"""Prints out your comprehended input.""" | |
print(base_url + "" + req_file_start + "" + | |
str(range_start).zfill(zFill) + "" + req_file_end) | |
print(base_url + "" + req_file_start + "" + str(range_to).zfill(zFill) + "" + | |
req_file_end + "\nSleep: " + str(sleep) + "\n" + str(range_start) + " -> " + str(range_to)) | |
def download_request(request, download_dir): | |
"""Sends the actual download request and writes the contents.""" | |
try: | |
with urllib.request.urlopen(request) as response: | |
filename = download_dir + '/' + os.path.basename(request.full_url) | |
chunk_no = 1 | |
with open(filename, 'wb') as f: | |
while True: | |
chunk = response.read(1024) | |
if not chunk: | |
break | |
# Check if its the right file type | |
if chunk_no == 1: | |
# Check SHALLOWLY if its an html page | |
# If you get an HTML page its probably an error | |
# There are better options for downloading HTML pages | |
if not str(chunk)[:20].__contains__("html>") and len(str(chunk)) > 75: | |
print(request.full_url) | |
else: | |
print("This file won't be written: " + filename) | |
break | |
chunk_no += 1 | |
f.write(chunk) | |
except IOError: | |
print("Error: ", request.full_url) | |
def download_files(base_url, range_start, range_to, zFill, req_file_start, req_file_end, sleep, download_dir): | |
"""Initiates downloading process.""" | |
# Create download directory | |
Path(download_dir).mkdir(parents=True, exist_ok=True) | |
# Write Source Details | |
open(download_dir + "/_source_url.txt", 'w').write(base_url + | |
req_file_start + str(range_start).zfill(zFill) + req_file_end) | |
# Create Requests | |
request_list = [] | |
for i in range(range_start, range_to + 1): | |
req_url = base_url + req_file_start + \ | |
str(i).zfill(zFill) + req_file_end | |
request_list.append(urllib.request.Request( | |
req_url, headers=headers, unverifiable=True)) | |
# Create the request threads and start executing them | |
with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor: | |
futures = [] | |
for request in request_list: | |
futures.append(executor.submit( | |
download_request, request, download_dir)) | |
if (sleep > 0): | |
time.sleep(sleep) | |
if __name__ == '__main__': | |
def submit(): | |
# Process GUI Input | |
_url = url_entry.get() | |
base_url = _url[:(_url.rindex('/') + 1)] | |
req_file_start = reqFileStart_entry.get() | |
req_file_end = reqFileEnd_entry.get() | |
zFill = int(zFill_entry.get()) | |
range_start = int(rangeStart_entry.get()) | |
range_to = int(rangeTo_entry.get()) | |
sleep = float(sleep_entry.get()) | |
## | |
print_operation(base_url, range_start, range_to, zFill, | |
req_file_start, req_file_end, sleep) | |
cwd = pathlib.Path(__file__).parent.absolute() | |
# Determine download directory | |
if req_file_start is None: | |
# If a Folder Name entry is added in the future: | |
#dir_name = input("Folder name (Def: No folder): ") | |
pass # downloads to the current directory | |
else: | |
dir_name = req_file_start | |
download_dir = os.path.join(cwd, dir_name) | |
# Start downloading | |
download_files(base_url, range_start, range_to, zFill, | |
req_file_start, req_file_end, sleep, download_dir) | |
# GUI | |
root = Tk() | |
root.title("Simple Set Downloader") | |
root.grid_columnconfigure(0, weight=1) | |
url_label = Label(root, text="URL:") | |
url_label.grid(row=0, column=0) | |
url_entry = Entry(root, width=96) | |
url_entry.grid(row=0, column=1) | |
req_fileStart_label = Label(root, text="URL Filename Start:") | |
req_fileStart_label.grid(row=1, column=0) | |
reqFileStart_entry = Entry(root) | |
reqFileStart_entry.grid(row=1, column=1, sticky='we') | |
req_fileEnd_label = Label(root, text="URL Filename End:") | |
req_fileEnd_label.grid(row=2, column=0) | |
reqFileEnd_entry = Entry(root) | |
reqFileEnd_entry.grid(row=2, column=1, sticky='we') | |
reqFileEnd_entry.insert(0, '.zip') | |
zFill_label = Label(root, text="zFill:") | |
zFill_label.grid(row=3, column=0) | |
zFill_entry = Entry(root) | |
zFill_entry.grid(row=3, column=1, sticky='we') | |
zFill_entry.insert(0, '1') | |
range_frame_label = Label(root, text="Range ") | |
range_frame_label.grid(row=4, column=0) | |
range_frame = Frame(root) | |
range_frame.grid(row=4, column=1, sticky='we') | |
rangeStart_label = Label(range_frame, text="Start:") | |
rangeStart_label.grid(row=0, column=0) | |
rangeStart_entry = Entry(range_frame) | |
rangeStart_entry.grid(row=0, column=1) | |
rangeStart_entry.insert(0, '1') | |
rangeTo_label = Label(range_frame, text="To:") | |
rangeTo_label.grid(row=0, column=2) | |
rangeTo_entry = Entry(range_frame) | |
rangeTo_entry.grid(row=0, column=3) | |
sleep_label = Label(root, text="Sleep:") | |
sleep_label.grid(row=5, column=0) | |
sleep_entry = Entry(root) | |
sleep_entry.grid(row=5, column=1, sticky='we') | |
sleep_entry.insert(0, '0') | |
submit_button = Button(root, text='Download', command=submit) | |
submit_button.grid(columnspan=2, sticky='we') | |
root.mainloop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment