madr/podcast-backup.py

## podcast-backup.py
"""
Podcast backup script

Parses an RSS feed from SOURCE_FILE and download all items to
DESTINATION_PATH. Downloads are done in parallel, for
PARALLEL_COUNT downloads at the time.

How to use
----------

 1. Set DESTINATION_PATH. Make sure the folder exists on your
    file system.
 2. Save the source file (RSS or Atom) on your computer and
    update SOURCE_FILE if needed.
 3. Alter PARALLEL_COUNT for your needs. Higher number will
    decrease total time for this script to be done, but will
    increase net traffic.
 4. Run script in a python intepreter, 3.7 is recommended.

This script was written by Anders Ytterström in October 2019.
If you find it useful, buy him a 🍺.
"""
import queue
import threading
import xml.etree.ElementTree as ET
from urllib.request import urlretrieve

DESTINATION_PATH = "D:\Asmodean\podcasts\inbox"
SOURCE_FILE = "D:\Kod\gists\src.xml"
PARALLEL_COUNT = 3


def download_file(url, target):
    print(f"Downloading {target} <- {url}")
    urlretrieve(url, f"{DESTINATION_PATH}\{target}.mp3")


def get_urls():
    tree = ET.parse(SOURCE_FILE)
    root = tree.getroot()

    def f(item):
        url = item.find("enclosure").attrib["url"]
        filename = slugify(item.find("title").text)
        return (url, filename)

    return map(f, root.findall("./channel/item"))


def slugify(text):
    return (
        text.lower()
        .replace(" ", "-")
        .replace(":", "")
        .replace("/", "-av-")
        .replace("?", "")
    )


def do_work(item):
    download_file(*item)


if __name__ == "__main__":

    def worker():
        while True:
            item = q.get()
            if item is None:
                break
            do_work(item)
            q.task_done()

    q = queue.Queue()
    threads = []
    for i in range(PARALLEL_COUNT):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)

    source = get_urls()
    for item in source:
        q.put(item)

    # block until all tasks are done
    q.join()

    # stop workers
    for i in range(PARALLEL_COUNT):
        q.put(None)
    for t in threads:
        t.join()
	"""
	Podcast backup script

	Parses an RSS feed from SOURCE_FILE and download all items to
	DESTINATION_PATH. Downloads are done in parallel, for
	PARALLEL_COUNT downloads at the time.

	How to use
	----------

	1. Set DESTINATION_PATH. Make sure the folder exists on your
	file system.
	2. Save the source file (RSS or Atom) on your computer and
	update SOURCE_FILE if needed.
	3. Alter PARALLEL_COUNT for your needs. Higher number will
	decrease total time for this script to be done, but will
	increase net traffic.
	4. Run script in a python intepreter, 3.7 is recommended.

	This script was written by Anders Ytterström in October 2019.
	If you find it useful, buy him a 🍺.
	"""
	import queue
	import threading
	import xml.etree.ElementTree as ET
	from urllib.request import urlretrieve

	DESTINATION_PATH = "D:\Asmodean\podcasts\inbox"
	SOURCE_FILE = "D:\Kod\gists\src.xml"
	PARALLEL_COUNT = 3


	def download_file(url, target):
	print(f"Downloading {target} <- {url}")
	urlretrieve(url, f"{DESTINATION_PATH}\{target}.mp3")


	def get_urls():
	tree = ET.parse(SOURCE_FILE)
	root = tree.getroot()

	def f(item):
	url = item.find("enclosure").attrib["url"]
	filename = slugify(item.find("title").text)
	return (url, filename)

	return map(f, root.findall("./channel/item"))


	def slugify(text):
	return (
	text.lower()
	.replace(" ", "-")
	.replace(":", "")
	.replace("/", "-av-")
	.replace("?", "")
	)


	def do_work(item):
	download_file(*item)


	if __name__ == "__main__":

	def worker():
	while True:
	item = q.get()
	if item is None:
	break
	do_work(item)
	q.task_done()

	q = queue.Queue()
	threads = []
	for i in range(PARALLEL_COUNT):
	t = threading.Thread(target=worker)
	t.start()
	threads.append(t)

	source = get_urls()
	for item in source:
	q.put(item)

	# block until all tasks are done
	q.join()

	# stop workers
	for i in range(PARALLEL_COUNT):
	q.put(None)
	for t in threads:
	t.join()