Skip to content

Instantly share code, notes, and snippets.

@KokoseiJ
Last active May 2, 2021 18:05
Show Gist options
  • Save KokoseiJ/1172a8129d22b7074b56f4b9f76abc56 to your computer and use it in GitHub Desktop.
Save KokoseiJ/1172a8129d22b7074b56f4b9f76abc56 to your computer and use it in GitHub Desktop.
Clone OneIndex recursively
#
# Requires requests, bs4 module.
# also you need wget binary on your system too.
# First argument is the URL to the target oneindex- this one is necessary.
# Second argument is the folder of the name where the contents will be stored-
# defaults to 'oneindex_download' if not provided.
# Other arguments will be ignored.
#
import os
import sys
import time
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup as bs
if len(sys.argv) < 2:
print("Error: Site URL not provided")
sys.exit(1)
grab_url = sys.argv[1]
name = sys.argv[2] if len(sys.argv) >= 3 else "oneindex_download"
def request(url, method, stream=False):
r = requests.request(method, url, stream=stream)
if r.status_code != 200:
print(f"Error: {r.status_code} while trying {url}. Retrying...")
time.sleep(1)
return request(url, method, stream)
return r
def get_lists(html):
soup = bs(html, features='lxml')
items = soup.find("div", {'class': 'items'}).find_all("a", {'class': 'item'})
return [item['href'] for item in items]
def download_dir(url, name):
print(f"Downloading folder '{name}'...")
try:
os.mkdir(name)
except FileExistsError:
pass
os.chdir(name)
url = url[:-1] if url.endswith("/") else url
r = request(url, "GET")
filelist = get_lists(r.text)
for file in filelist:
if file.endswith("/"):
download_dir(urljoin(url, file), file.replace("/", ""))
else:
download_file(urljoin(url, file))
os.chdir("..")
def download_file(url):
print(f"Downloading file '{url.rsplit('/')[-1]}'...")
os.system(f"wget '{url}'")
print("Finished!")
return
download_dir(grab_url, name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment