adiamaan92/delta_extract.py

## delta_extract.py
i = 1

while True:
    # This GET API request returns the ith page. The pages are sorted in descending order
    # based on the datetime it is published
    r = requests.get(
        f"https://www.narendramodi.in/speech/loadspeeche?page={i}&language=en",
        headers=headers,
    )

    # Exit out of the loop in case we run out of pages to acquire
    if r.status_code != 200:
        break

    tree = etree.fromstring(r.text, parser=etree.HTMLParser())

    # Loop through each element and matching title with our latest title from previous run
    for element in tree.xpath("//div[contains(@class, 'speechesItemLink')]"):
        title = element.xpath(".//a//text()")[0]

        if title == latest_speech_title:
            break

    if title == latest_speech_title:
        break

    # Get the article data if it is a new speech
    get_article_data(tree)
    i += 1
	i = 1

	while True:
	# This GET API request returns the ith page. The pages are sorted in descending order
	# based on the datetime it is published
	r = requests.get(
	f"https://www.narendramodi.in/speech/loadspeeche?page={i}&language=en",
	headers=headers,
	)

	# Exit out of the loop in case we run out of pages to acquire
	if r.status_code != 200:
	break

	tree = etree.fromstring(r.text, parser=etree.HTMLParser())

	# Loop through each element and matching title with our latest title from previous run
	for element in tree.xpath("//div[contains(@class, 'speechesItemLink')]"):
	title = element.xpath(".//a//text()")[0]

	if title == latest_speech_title:
	break

	if title == latest_speech_title:
	break

	# Get the article data if it is a new speech
	get_article_data(tree)
	i += 1