Skip to content

Instantly share code, notes, and snippets.

@Tuhin-thinks
Created June 10, 2023 06:45
Show Gist options
  • Save Tuhin-thinks/5e0921ed5b425603591020e9e2566517 to your computer and use it in GitHub Desktop.
Save Tuhin-thinks/5e0921ed5b425603591020e9e2566517 to your computer and use it in GitHub Desktop.
Utility Python script to save files from server selectively.
import os
from pathlib import Path
from urllib.parse import urlparse
from bs4 import BeautifulSoup
import requests
SAVE_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / 'save'
TO_SAVE_DIR = ['Movies', 'TV Shows', 'Music', 'Books', 'Games', 'Software', 'Pictures']
def get_all_links(response_content: requests.Response.content):
soup = BeautifulSoup(response_content, 'html.parser')
dirs = []
files = []
for link in soup.find_all('a'):
href = link.get('href')
if href.endswith('/'):
dirs.append(href)
else:
files.append(href)
return dirs, files
def get_local_path_from_url(link: str):
parsed_url = urlparse(link)
path = SAVE_DIR / parsed_url.path.lstrip('/')
return path
def save_file_from_url(link: str):
path = get_local_path_from_url(link)
if not path.parent.exists():
path.parent.mkdir(parents=True)
with requests.session() as req_session:
resp = req_session.get(link)
with open(path, 'wb') as f:
for chunk in resp.iter_content(chunk_size=1024):
f.write(chunk)
def get_dir_name(link: str):
parsed_url = urlparse(link)
parsed_url_path = parsed_url.path.lstrip('/')
if parsed_url_path.endswith('/'):
return parsed_url_path.split('/')[-2]
else:
return parsed_url_path.split('/')[-3]
def main_parser(parse_address: str, parent_dir_name: str = None):
with requests.session() as req_session:
resp = req_session.get(parse_address)
dirs, files = get_all_links(resp.content)
if TO_SAVE_DIR and parent_dir_name in TO_SAVE_DIR:
for file in files:
print(get_local_path_from_url(file))
save_file_from_url(f"{parse_address}/{file}")
for dir_link in dirs:
next_addr = f"{parse_address}/{dir_link}"
dir_parent_name = get_dir_name(next_addr)
if dir_parent_name not in TO_SAVE_DIR:
continue
main_parser(next_addr, dir_parent_name)
if __name__ == '__main__':
address = "http://192.168.1.34:8080"
main_parser(address)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment