Skip to content

Instantly share code, notes, and snippets.

@rebane2001
Created September 30, 2020 13:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rebane2001/9683e476ad361c2b949f239c15a10928 to your computer and use it in GitHub Desktop.
Save rebane2001/9683e476ad361c2b949f239c15a10928 to your computer and use it in GitHub Desktop.
Laeb alla fotoalbum.ee albumeid
import urllib.request
import re
base_url = "http://fotoalbum.ee"
album = input("Sisesta fotoalbum.ee albumi link:")
album = album.split("?")[0]
if not "/sets/" in album:
print("Hoiatus: Link pole album ning võib seetõttu valesti toimida")
pildid = []
def sanitize_filename(s, restricted=False, is_id=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
Set is_id if this is not an arbitrary string, but an ID that should be kept
if possible.
"""
def replace_insane(char):
if restricted and char in ACCENT_CHARS:
return ACCENT_CHARS[char]
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
return '' if restricted else '\''
elif char == ':':
return '_-' if restricted else ' -'
elif char in '\\/|*<>':
return '_'
if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
return '_'
if restricted and ord(char) > 127:
return '_'
return char
# Handle timestamps
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
result = ''.join(map(replace_insane, s))
if not is_id:
while '__' in result:
result = result.replace('__', '_')
result = result.strip('_')
# Common case of "Foreign band name - English song title"
if restricted and result.startswith('-_'):
result = result[2:]
if result.startswith('-'):
result = '_' + result[len('-'):]
result = result.lstrip('.')
if not result:
result = '_'
return result
i = 1
while True:
print(f"Laeb albumi lehte {i}")
r = urllib.request.urlopen(f"{album}?page={i}").read().decode("utf-8")
lingid = re.findall(r"/photos/[^\/]+/[0-9]+",r)
pildid += lingid
if len(lingid) == 0:
break
i+=1
print(f"Leitud {len(pildid)} pilti")
for pilt in pildid:
r = urllib.request.urlopen(f"{base_url}{pilt}").read().decode("utf-8")
results = re.search(r'<img src="([^"]*)" border="[0-9]*" alt="([^"]*)" vspace="[0-9]*">', r)
dlurl = f"http:{results.group(1)}"
filename = sanitize_filename(results.group(2))
print(filename)
ext = dlurl.split(".")[-1]
if not filename.endswith(f".{ext}"):
filename += f".{ext}"
urllib.request.urlretrieve (dlurl, filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment