Last active
January 16, 2022 15:42
-
-
Save adiamaan92/c1d43003ddc3c92b49830ed84cdfd31c to your computer and use it in GitHub Desktop.
Fetch Delta using Direct API Calls
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
i = 1 | |
while True: | |
# This GET API request returns the ith page. The pages are sorted in descending order | |
# based on the datetime it is published | |
r = requests.get( | |
f"https://www.narendramodi.in/speech/loadspeeche?page={i}&language=en", | |
headers=headers, | |
) | |
# Exit out of the loop in case we run out of pages to acquire | |
if r.status_code != 200: | |
break | |
tree = etree.fromstring(r.text, parser=etree.HTMLParser()) | |
# Loop through each element and matching title with our latest title from previous run | |
for element in tree.xpath("//div[contains(@class, 'speechesItemLink')]"): | |
title = element.xpath(".//a//text()")[0] | |
if title == latest_speech_title: | |
break | |
if title == latest_speech_title: | |
break | |
# Get the article data if it is a new speech | |
get_article_data(tree) | |
i += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment