Skip to content

Instantly share code, notes, and snippets.

@Atari2
Created January 19, 2023 21:16
Show Gist options
  • Save Atari2/a81696f68319e5a8bcede2e21c4396d1 to your computer and use it in GitHub Desktop.
Save Atari2/a81696f68319e5a8bcede2e21c4396d1 to your computer and use it in GitHub Desktop.
downloadSMWCSection
import requests
import os
from bs4 import BeautifulSoup
import time
import re
import logging
import shutil
from urllib.parse import unquote_plus
logger = logging.Logger(name='Errors', level=logging.ERROR)
handler = logging.FileHandler(filename='download.log', encoding='utf-8', mode='w')
handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s:%(name)s: %(message)s'))
logger.addHandler(handler)
def download():
try:
pages = int(input('How many pages does the section have?'))
except ValueError:
print('Invalid pages number was passed')
return
valid_sections = {
'graphics': 'smwgraphics',
'music': 'smwmusic',
'blocks': 'smwblocks',
'sprites': 'smwsprites',
'patches': 'smwpatches',
'uberasm': 'uberasm',
'hacks': 'smwhacks'
}
section = input(f'Which section do you want to download the files from? Valid values are '
f'{list(valid_sections.keys())}').lower()
if section not in valid_sections.keys():
print('Invalid section was passed')
return
else:
section = valid_sections[section]
with requests.Session() as sess:
for i in range(1, pages):
print(f'Started page {i}')
pagename = f'Page{i}'
try:
os.mkdir(pagename)
except Exception as e:
logger.log(logging.ERROR, f'Ignoring exception: {str(e)} on page {i}')
shutil.rmtree(pagename)
os.mkdir(pagename)
page = sess.get(f'https://www.smwcentral.net/?p=section&s={section}&u=0&g=0&n={i}&o=date&d=desc')
soup = BeautifulSoup(page.text, 'html.parser')
links = soup.find_all('a', href=re.compile('dl.smwcentral.net'))
for link in links:
try:
with sess.get('https:' + link['href']) as response:
filename = unquote_plus(link['href']).split('/')[-1]
with open(f'{pagename}/{filename}', 'wb') as f:
f.write(response.content)
print(f'\tSaved file {filename}')
except Exception as e:
logger.log(logging.ERROR, f'Ignoring exception: {str(e)} on link {link}')
time.sleep(0.5)
print(f'Finished page {i}')
print('Eventual download errors will be reported in download.log')
download()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment