Created
March 6, 2024 12:33
-
-
Save chamoda/8a70d2e7b01117bd1df8084d94b50a12 to your computer and use it in GitHub Desktop.
Retrive all gazettes from http://documents.gov.lk/en/gazette.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import date, timedelta | |
from aiohttp import ClientSession | |
from aiofiles import open | |
import asyncio | |
BASE_URL = "http://documents.gov.lk" | |
YEAR = 2022 | |
def get_fridays(year) -> list[date]: | |
fridays = [] | |
d = date(year, 1, 1) | |
while d.year == year: | |
if d.weekday() == 4: | |
fridays.append(d) | |
d += timedelta(days=1) | |
return fridays | |
async def download_file(url, filename): | |
print("Downloading: ", url) | |
try: | |
async with ClientSession() as session: | |
async with session.get(url) as response: | |
if response.status == 200: | |
f = await open(f"files/{filename}", "wb") | |
await f.write(await response.read()) | |
print("File downloaded: ", filename) | |
else: | |
print("Error: ", response.status) | |
except Exception as e: | |
print("Error: ", e) | |
async def safe_download_file(url, filename): | |
async with sem: | |
return await download_file(url, filename) | |
async def main(): | |
fridays = get_fridays(YEAR) | |
tasks = [] | |
for friday in fridays: | |
url_prefix = f"{BASE_URL}/files/gz/{friday.year}/{friday.month}/{friday.strftime('%Y-%m-%d')}" | |
parts = [ | |
"(I-I)E", | |
"(I-IIA)E", | |
"(I-IIB)E", | |
"(I-III)E", | |
"(II-0)E", | |
"(III-0)E", | |
"(IV-A)E", | |
"(IV-B)E", | |
"(V-0)E", | |
"(VI-0)E", | |
] | |
for part in parts: | |
url = f"{url_prefix}{part}.pdf" | |
filename = f"{friday.strftime('%Y-%m-%d')}-{part}.pdf" | |
tasks.append(asyncio.ensure_future(safe_download_file(url, filename))) | |
await asyncio.gather(*tasks) | |
if __name__ == "__main__": | |
sem = asyncio.Semaphore(4) | |
loop = asyncio.get_event_loop() | |
try: | |
loop.run_until_complete(main()) | |
finally: | |
loop.run_until_complete(loop.shutdown_asyncgens()) | |
loop.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment