Skip to content

Instantly share code, notes, and snippets.

@erikvanzijst
Last active March 13, 2023 08:21
Show Gist options
  • Save erikvanzijst/71fc21fb10b8a112b741b7c152cb6248 to your computer and use it in GitHub Desktop.
Save erikvanzijst/71fc21fb10b8a112b741b7c152cb6248 to your computer and use it in GitHub Desktop.
Parallel web crawler
#!/usr/bin/env python3
# Simple multi-processing web crawler, following all a.href's that end in a '/'.
import os
import traceback
from itertools import chain
from multiprocessing import Pool
from time import time
from urllib.parse import urljoin
from bs4 import BeautifulSoup
import requests
session = requests.Session()
root = 'http://be.archive.ubuntu.com/ubuntu/dists/bionic/'
CONCURRENCY = 10
def crawl(url: str) -> set[str]:
while True:
print(f'{os.getpid()} processing {url}')
try:
html = session.get(url).content
soup = BeautifulSoup(html, 'html.parser')
urls = {urljoin(url, a.get('href')) for a in soup.find_all('a')}
return set(filter(lambda u: u.startswith(root) and u.endswith('/'), urls))
except IOError:
traceback.print_exc()
if __name__ == '__main__':
todo: set[str] = {root}
seen: set[str] = {root}
start = time()
with Pool(processes=CONCURRENCY) as pool:
while todo:
todo = set(chain(*pool.map(crawl, todo))) - seen
seen.update(todo)
print(f'{len(seen)} urls crawled in {time() - start:.2f} seconds ({len(seen) / (time() - start):.2f} urls/second with {CONCURRENCY} processes)')
#!/usr/bin/env python3
# Simple multiprocessing web crawler, following all a.href's that end in a '/'.
import os
import traceback
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Barrier
from urllib.parse import urljoin
from time import time
from bs4 import BeautifulSoup
import requests
root: str = 'http://be.archive.ubuntu.com/'
CONCURRENCY = 48
session = requests.Session()
def crawl(url: str) -> set[str]:
while True:
print(f'{os.getpid()} processing {url}')
try:
html = session.get(url).content
soup = BeautifulSoup(html, 'html.parser')
urls = {urljoin(url, a.get('href')) for a in soup.find_all('a')}
return set(filter(lambda u: u.startswith(root) and u.endswith('/'), urls))
except IOError:
traceback.print_exc()
if __name__ == '__main__':
seen: set[str] = {'http://be.archive.ubuntu.com/ubuntu/ubuntu/'}
barrier = Barrier(2)
inflight = 1
start = time()
with ProcessPoolExecutor(CONCURRENCY) as executor:
def schedule(url: str) -> None:
def cb(urls):
global inflight
for u in urls - seen:
inflight += 1
schedule(u)
inflight -= 1
if not inflight:
barrier.wait()
seen.add(url)
executor.submit(crawl, url).add_done_callback(lambda future: cb(future.result()))
schedule(root)
barrier.wait()
print(f'{len(seen)} urls crawled in {time() - start:.2f} seconds ({len(seen) / (time() - start):.2f} urls/second with {CONCURRENCY} threads)')
#!/usr/bin/env python3
# Simple multithreaded web crawler, following all a.href's that end in a '/'.
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import urljoin
from time import time
from threading import current_thread, Barrier, Lock
from bs4 import BeautifulSoup
import requests
root: str = 'http://be.archive.ubuntu.com/ubuntu/dists/bionic/'
CONCURRENCY = 10
if __name__ == '__main__':
seen: set[str] = set()
barrier = Barrier(2)
lock = Lock()
inflight = 1
start = time()
with ThreadPoolExecutor(CONCURRENCY) as executor, requests.Session() as session:
def crawl(url: str) -> None:
global inflight
print(f'{current_thread().ident} processing {url}')
html = session.get(url).content
soup = BeautifulSoup(html, 'html.parser')
paths = {urljoin(url, a.get('href')) for a in soup.find_all('a')}
with lock:
for u in filter(lambda p: p.startswith(root) and p.endswith('/'), paths - seen):
seen.add(u)
executor.submit(crawl, u)
inflight += 1
inflight -= 1
if not inflight:
barrier.wait()
crawl(root)
barrier.wait()
print(f'{len(seen)} urls crawled in {time() - start:.2f} seconds ({len(seen) / (time() - start):.2f} urls/second with {CONCURRENCY} threads)')
$ python mp.py
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/by-hash/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/by-hash/SHA256/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.17/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/by-hash/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/by-hash/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/by-hash/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/by-hash/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/by-hash/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/by-hash/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/by-hash/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/by-hash/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/by-hash/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/by-hash/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/by-hash/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/current/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/by-hash/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.15/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/by-hash/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/by-hash/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/by-hash/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.16/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/by-hash/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/by-hash/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/by-hash/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/by-hash/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/4.15.0-1009.9/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/current/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/by-hash/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/current/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/by-hash/SHA256/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/by-hash/SHA256/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/current/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/by-hash/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/2.00-5ubuntu2/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/by-hash/SHA256/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/10-3/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/by-hash/SHA256/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/by-hash/SHA256/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/current/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/current/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/by-hash/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/by-hash/SHA256/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/4.15.0-1004.4/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/current/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/by-hash/SHA256/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/by-hash/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/by-hash/SHA256/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/10-3/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/by-hash/SHA256/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/by-hash/SHA256/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/by-hash/SHA256/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/4.15.0-15.16/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/by-hash/SHA256/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/by-hash/SHA256/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/by-hash/SHA256/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/by-hash/SHA256/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/current/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/by-hash/SHA256/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/by-hash/SHA256/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/current/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/4.15.0-20.21/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/by-hash/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/by-hash/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/by-hash/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/current/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/by-hash/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/4.15.0-1004.5/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/by-hash/SHA256/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/current/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/4.15.0-1002.3/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/by-hash/SHA256/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/2.02-2ubuntu8/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/by-hash/SHA256/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/current/control/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/hd-media/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/by-hash/SHA256/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/by-hash/SHA256/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/hd-media/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/2.02-2ubuntu8/control/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/cdrom/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/current/control/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/hd-media/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/10-3/control/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/current/control/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/by-hash/SHA256/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/by-hash/SHA256/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/current/control/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/hd-media/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/10-3/control/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/by-hash/SHA256/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/4.15.0-1002.3/control/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/current/control/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/4.15.0-15.16/control/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/2.00-5ubuntu2/control/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/cdrom/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/current/control/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/current/control/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/4.15.0-1004.4/control/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/by-hash/SHA256/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/by-hash/SHA256/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/cdrom/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/pxelinux.cfg/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/xen/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/xen/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/pxelinux.cfg/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/xen/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/cdrom/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/pxelinux.cfg/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/pxelinux.cfg/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/xen/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/cdrom/xen/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/cdrom/xen/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/cdrom/xen/
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/cdrom/xen/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/boot-screens/
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/boot-screens/
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/boot-screens/
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/pxelinux.cfg/
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/boot-screens/
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/pxelinux.cfg/
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/pxelinux.cfg/
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/pxelinux.cfg/
197 urls crawled in 1.37 seconds (144.05 urls/second with 10 processes)
$ python mt.py
4554606080 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/by-hash/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/by-hash/SHA256/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/by-hash/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/by-hash/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/current/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.16/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.15/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.17/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/by-hash/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/by-hash/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/by-hash/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/by-hash/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/by-hash/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/by-hash/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/by-hash/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/by-hash/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/by-hash/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/by-hash/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/by-hash/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/by-hash/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/by-hash/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/by-hash/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/by-hash/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/by-hash/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/by-hash/SHA256/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/by-hash/SHA256/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/2.02-2ubuntu8/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/current/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/2.00-5ubuntu2/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/current/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/4.15.0-1004.4/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/current/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/current/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/4.15.0-15.16/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/current/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/10-3/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/4.15.0-1002.3/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/current/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/by-hash/SHA256/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/current/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/10-3/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/by-hash/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/4.15.0-1004.5/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/current/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/by-hash/SHA256/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/current/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/4.15.0-20.21/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/by-hash/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/4.15.0-1009.9/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/current/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/by-hash/SHA256/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/by-hash/SHA256/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/by-hash/SHA256/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/by-hash/SHA256/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/by-hash/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/by-hash/SHA256/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/by-hash/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/by-hash/SHA256/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/by-hash/SHA256/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/by-hash/SHA256/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/by-hash/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/by-hash/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/by-hash/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/by-hash/SHA256/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/by-hash/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/by-hash/SHA256/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/by-hash/SHA256/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/by-hash/SHA256/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/by-hash/SHA256/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/by-hash/SHA256/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/by-hash/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/hd-media/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/hd-media/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/cdrom/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/hd-media/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/2.02-2ubuntu8/control/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/current/control/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/current/control/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/current/control/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/10-3/control/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/4.15.0-1002.3/control/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/2.00-5ubuntu2/control/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/current/control/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/current/control/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/cdrom/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/hd-media/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/current/control/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/4.15.0-15.16/control/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/10-3/control/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/4.15.0-1004.4/control/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/by-hash/SHA256/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/current/control/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/by-hash/SHA256/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/by-hash/SHA256/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/by-hash/SHA256/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/by-hash/SHA256/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/by-hash/SHA256/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/by-hash/SHA256/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/cdrom/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/by-hash/SHA256/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/by-hash/SHA256/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/pxelinux.cfg/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/xen/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/cdrom/xen/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/pxelinux.cfg/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/xen/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/pxelinux.cfg/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/xen/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/cdrom/
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/cdrom/xen/
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/xen/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/pxelinux.cfg/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/cdrom/xen/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/cdrom/xen/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/pxelinux.cfg/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/boot-screens/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/boot-screens/
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/pxelinux.cfg/
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/boot-screens/
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/pxelinux.cfg/
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/pxelinux.cfg/
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/boot-screens/
197 urls crawled in 1.72 seconds (114.49 urls/second with 10 threads)
beautifulsoup4==4.11.2
certifi==2022.12.7
charset-normalizer==3.1.0
idna==3.4
requests==2.28.2
soupsieve==2.4
urllib3==1.26.14
#!/usr/bin/env python3
# Simple singlethreaded web crawler, following all a.href's that end in a '/'.
from urllib.parse import urljoin
from time import time
from bs4 import BeautifulSoup
import requests
root: str = 'http://be.archive.ubuntu.com/ubuntu/dists/bionic/'
if __name__ == '__main__':
seen: set[str] = set()
start = time()
with requests.Session() as session:
def crawl(url: str) -> None:
print(f'processing {url}')
html = session.get(url).content
soup = BeautifulSoup(html, 'html.parser')
paths = {urljoin(url, a.get('href')) for a in soup.find_all('a')}
for u in filter(lambda p: p.startswith(root) and p.endswith('/'), paths - seen):
seen.add(u)
crawl(u)
crawl(root)
print(f'{len(seen)} urls crawled in {time() - start:.2f} seconds ({len(seen) / (time() - start):.2f} urls/second)')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment