Skip to content

Instantly share code, notes, and snippets.

@chamoda
Created March 6, 2024 12:33
Show Gist options
  • Save chamoda/8a70d2e7b01117bd1df8084d94b50a12 to your computer and use it in GitHub Desktop.
Save chamoda/8a70d2e7b01117bd1df8084d94b50a12 to your computer and use it in GitHub Desktop.
Retrive all gazettes from http://documents.gov.lk/en/gazette.php
from datetime import date, timedelta
from aiohttp import ClientSession
from aiofiles import open
import asyncio
BASE_URL = "http://documents.gov.lk"
YEAR = 2022
def get_fridays(year) -> list[date]:
fridays = []
d = date(year, 1, 1)
while d.year == year:
if d.weekday() == 4:
fridays.append(d)
d += timedelta(days=1)
return fridays
async def download_file(url, filename):
print("Downloading: ", url)
try:
async with ClientSession() as session:
async with session.get(url) as response:
if response.status == 200:
f = await open(f"files/{filename}", "wb")
await f.write(await response.read())
print("File downloaded: ", filename)
else:
print("Error: ", response.status)
except Exception as e:
print("Error: ", e)
async def safe_download_file(url, filename):
async with sem:
return await download_file(url, filename)
async def main():
fridays = get_fridays(YEAR)
tasks = []
for friday in fridays:
url_prefix = f"{BASE_URL}/files/gz/{friday.year}/{friday.month}/{friday.strftime('%Y-%m-%d')}"
parts = [
"(I-I)E",
"(I-IIA)E",
"(I-IIB)E",
"(I-III)E",
"(II-0)E",
"(III-0)E",
"(IV-A)E",
"(IV-B)E",
"(V-0)E",
"(VI-0)E",
]
for part in parts:
url = f"{url_prefix}{part}.pdf"
filename = f"{friday.strftime('%Y-%m-%d')}-{part}.pdf"
tasks.append(asyncio.ensure_future(safe_download_file(url, filename)))
await asyncio.gather(*tasks)
if __name__ == "__main__":
sem = asyncio.Semaphore(4)
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
loop.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment