Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Script to find official Debian mirrors that support HTTPS
#!/usr/bin/env python3
"""Find Debian HTTPS archives.
Script based on
I made it asynchronous and parallel, so overall I measured it to be 6 times faster or more.
Requires Python 3.7+
Additional resources not exactly related to this script but could be helpful for
those than are looking for mirrors:
* <-- this one works through https
import asyncio
import concurrent.futures
import http.client
import re
import ssl
import sys
import typing
import urllib.error
import urllib.request
assert sys.version_info >= (3, 7), 'You need Python 3.7+ to run this script'
# Increasing this value has potential to make it faster, but there's also a
# chance for it to be slower. There's no way to know but trying, and
# nevertheless you might be using this script only once per machine.
def try_url(url: str) -> typing.Optional[http.client.HTTPResponse]:
return urllib.request.urlopen(url, timeout=1)
except (ssl.SSLError, ssl.SSLCertVerificationError) as err:
print('Bad TLS!', url, err)
except urllib.error.URLError as err:
print('Failure!', url, err)
except Exception as err:
print('Unexpected failure!', url, err)
async def search_mirror_list(mirror_list_url: str,
url_pattern: typing.AnyStr) -> typing.Set[str]:
def inner(url_) -> typing.Optional[str]:
print('Trying:', url_, '...')
if try_url(url_):
print('Success!', url_)
return url_
mirrors = try_url(mirror_list_url)
if not mirrors:
return set()
urls = set()
for line in mirrors.readlines():
url_match = re.match(url_pattern, line.decode())
if url_match:
https = set()
with concurrent.futures.ThreadPoolExecutor(max_workers=PARALLEL_COUNT) as executor:
loop = asyncio.get_running_loop()
futures = []
for url in urls:
for potential_url in await asyncio.gather(*futures):
if potential_url:
return https
async def search_generic_mirrors() -> typing.Set[str]:
# find generic mirrors
https = await search_mirror_list(
r'.*<td valign="top"><a rel="nofollow" href="http(.*)">.*',
https.add('') # It doesn't show up in the mirror list
return https
async def search_security_mirrors() -> typing.Set[str]:
# find security mirrors
return await search_mirror_list(
r'.*</tt><br>Security updates over HTTP: <tt><a rel="nofollow" '
async def search_backports_mirrors() -> typing.Set[str]:
# now find the backports mirrors
return await search_mirror_list(
r'.*<td><a href="http(.*)">.*/debian-backports/</a>.*',
async def search_cd_mirrors() -> typing.Set[str]:
# now find the CD image mirrors
return await search_mirror_list(
r'.*<a rel="nofollow" href="http(:.*)">HTTP</a></li>.*',
def save_to_file(mirrors: typing.Dict[str, typing.Set[str]], filename: str) -> None:
# now write everything to a file
with open(filename, 'w') as f:
for category, urls in mirrors.items():
f.write(f'{category.title()} repos\n')
for url in urls:
async def main() -> None:
print('Searching HTTPS mirrors...')
apt_mirrors = await search_generic_mirrors()
security_mirrors = await search_security_mirrors()
backports_mirrors = await search_backports_mirrors()
cd_mirrors = await search_cd_mirrors()
mirrors = {
'apt': apt_mirrors,
'security': security_mirrors,
'backports': backports_mirrors,
'cd': cd_mirrors,
filename = 'https-debian-archives.txt'
print('Saving to', filename, '...')
save_to_file(mirrors, filename)
if __name__ == '__main__':
Copy link

hellresistor commented Jan 18, 2020

No to much familiar with python.
cant find any .onion repos. something I do wrong?

Vey Nice adapted script! Thank you

Copy link

HacKanCuBa commented Jan 19, 2020

No to much familiar with python.
cant find any .onion repos. something I do wrong?

Vey Nice adapted script! Thank you

Hey there! Thanks :)
This scripts reads the official debian mirror list to find mirrors, and parses it. I've just checked and there's 1 onion mirror, mentioned in a comment (which is skipped by the script): lxpizzamm6twgep2.onion. Check

Copy link

hellresistor commented Feb 27, 2020

Thank you! I found it! 💯

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment