rebane2001/fotoalbum-dl.py

## fotoalbum-dl.py
import urllib.request
import re

base_url = "http://fotoalbum.ee"

album = input("Sisesta fotoalbum.ee albumi link:")
album = album.split("?")[0]
if not "/sets/" in album:
    print("Hoiatus: Link pole album ning võib seetõttu valesti toimida")
pildid = []

def sanitize_filename(s, restricted=False, is_id=False):
    """Sanitizes a string so it could be used as part of a filename.
    If restricted is set, use a stricter subset of allowed characters.
    Set is_id if this is not an arbitrary string, but an ID that should be kept
    if possible.
    """
    def replace_insane(char):
        if restricted and char in ACCENT_CHARS:
            return ACCENT_CHARS[char]
        if char == '?' or ord(char) < 32 or ord(char) == 127:
            return ''
        elif char == '"':
            return '' if restricted else '\''
        elif char == ':':
            return '_-' if restricted else ' -'
        elif char in '\\/|*<>':
            return '_'
        if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
            return '_'
        if restricted and ord(char) > 127:
            return '_'
        return char

    # Handle timestamps
    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
    result = ''.join(map(replace_insane, s))
    if not is_id:
        while '__' in result:
            result = result.replace('__', '_')
        result = result.strip('_')
        # Common case of "Foreign band name - English song title"
        if restricted and result.startswith('-_'):
            result = result[2:]
        if result.startswith('-'):
            result = '_' + result[len('-'):]
        result = result.lstrip('.')
        if not result:
            result = '_'
    return result

i = 1
while True:
    print(f"Laeb albumi lehte {i}")
    r = urllib.request.urlopen(f"{album}?page={i}").read().decode("utf-8")
    lingid = re.findall(r"/photos/[^\/]+/[0-9]+",r)
    pildid += lingid
    if len(lingid) == 0:
        break
    i+=1

print(f"Leitud {len(pildid)} pilti")

for pilt in pildid:
    r = urllib.request.urlopen(f"{base_url}{pilt}").read().decode("utf-8")
    results = re.search(r'<img src="([^"]*)" border="[0-9]*" alt="([^"]*)" vspace="[0-9]*">', r)
    dlurl = f"http:{results.group(1)}"
    filename = sanitize_filename(results.group(2))
    print(filename)
    ext = dlurl.split(".")[-1]
    if not filename.endswith(f".{ext}"):
        filename += f".{ext}"
    urllib.request.urlretrieve (dlurl, filename)
	import urllib.request
	import re

	base_url = "http://fotoalbum.ee"

	album = input("Sisesta fotoalbum.ee albumi link:")
	album = album.split("?")[0]
	if not "/sets/" in album:
	print("Hoiatus: Link pole album ning võib seetõttu valesti toimida")
	pildid = []

	def sanitize_filename(s, restricted=False, is_id=False):
	"""Sanitizes a string so it could be used as part of a filename.
	If restricted is set, use a stricter subset of allowed characters.
	Set is_id if this is not an arbitrary string, but an ID that should be kept
	if possible.
	"""
	def replace_insane(char):
	if restricted and char in ACCENT_CHARS:
	return ACCENT_CHARS[char]
	if char == '?' or ord(char) < 32 or ord(char) == 127:
	return ''
	elif char == '"':
	return '' if restricted else '\''
	elif char == ':':
	return '_-' if restricted else ' -'
	elif char in '\\/\|*<>':
	return '_'
	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
	return '_'
	if restricted and ord(char) > 127:
	return '_'
	return char

	# Handle timestamps
	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
	result = ''.join(map(replace_insane, s))
	if not is_id:
	while '__' in result:
	result = result.replace('__', '_')
	result = result.strip('_')
	# Common case of "Foreign band name - English song title"
	if restricted and result.startswith('-_'):
	result = result[2:]
	if result.startswith('-'):
	result = '_' + result[len('-'):]
	result = result.lstrip('.')
	if not result:
	result = '_'
	return result

	i = 1
	while True:
	print(f"Laeb albumi lehte {i}")
	r = urllib.request.urlopen(f"{album}?page={i}").read().decode("utf-8")
	lingid = re.findall(r"/photos/[^\/]+/[0-9]+",r)
	pildid += lingid
	if len(lingid) == 0:
	break
	i+=1

	print(f"Leitud {len(pildid)} pilti")

	for pilt in pildid:
	r = urllib.request.urlopen(f"{base_url}{pilt}").read().decode("utf-8")
	results = re.search(r'<img src="([^"])" border="[0-9]" alt="([^"])" vspace="[0-9]">', r)
	dlurl = f"http:{results.group(1)}"
	filename = sanitize_filename(results.group(2))
	print(filename)
	ext = dlurl.split(".")[-1]
	if not filename.endswith(f".{ext}"):
	filename += f".{ext}"
	urllib.request.urlretrieve (dlurl, filename)