Created
April 2, 2026 10:07
-
-
Save l3utterfly/bf9f703c09932fd87dbf68f2118e5ab4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import re | |
| import html | |
| QUERY = """{{input}}""" | |
| LIMIT = 5 | |
| def get_duckduckgo_news(query=QUERY, limit=LIMIT): | |
| # 1. Start a session to automatically handle cookies between requests | |
| session = requests.Session() | |
| # 2. Set normal browser headers so DuckDuckGo doesn't block us | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'Referer': 'https://duckduckgo.com/' | |
| } | |
| # 3. Fetch the validation token (VQD) | |
| main_url = "https://duckduckgo.com/" | |
| try: | |
| # We use a POST request here. It is currently much more reliable | |
| # for bypassing DDG's bot detection than a standard GET request. | |
| response = session.post(main_url, data={'q': query}, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except requests.RequestException as e: | |
| print(f"Error connecting to DuckDuckGo: {e}") | |
| return[] | |
| # Look for the token in the headers first (DuckDuckGo's new standard) | |
| vqd_token = response.headers.get('x-vqd-4') | |
| # Fallback: search the raw HTML if the header is missing | |
| if not vqd_token: | |
| # We use a broader regex just in case DDG changes the token format | |
| vqd_match = re.search(r'vqd=[\'"]?([^\'"\s&]+)[\'"]?', response.text) | |
| if vqd_match: | |
| vqd_token = vqd_match.group(1) | |
| if not vqd_token: | |
| print("Error: Could not find VQD token. DuckDuckGo may have blocked the IP or changed layouts.") | |
| return[] | |
| # 4. Hit the hidden News API directly | |
| api_url = "https://duckduckgo.com/news.js" | |
| # CRUCIAL FIX: Added 'o': 'json', 'l': 'en-US', and 'noamp': '1' | |
| api_params = { | |
| 'l': 'en-US', # Language/Region parameter (Required) | |
| 'o': 'json', # Forces a pure JSON response instead of JS/JSONP | |
| 'noamp': '1', # Disables Google AMP links | |
| 'q': query, # Your search query | |
| 'vqd': vqd_token, # The authorization token we just scraped | |
| 'p': '1', # Page/Section identifier | |
| 'df': 'w' # Date filter: 'w' = past week | |
| } | |
| # Add AJAX headers to convince the API we are a legitimate browser script | |
| api_headers = headers.copy() | |
| api_headers['Accept'] = 'application/json, text/javascript, */*; q=0.01' | |
| api_headers['X-Requested-With'] = 'XMLHttpRequest' | |
| try: | |
| news_response = session.get(api_url, params=api_params, headers=api_headers, timeout=10) | |
| news_response.raise_for_status() | |
| # Double check if DDG still returned a JS error instead of JSON | |
| if "Spice.failed" in news_response.text: | |
| print(f"Error: API rejected the token. Raw response: {news_response.text[:100]}") | |
| return[] | |
| news_data = news_response.json() | |
| except ValueError: | |
| print(f"Error: Failed to parse JSON response. Raw API Response: {news_response.text[:200]}") | |
| return[] | |
| except requests.RequestException as e: | |
| print(f"Error fetching news JSON: {e}") | |
| return[] | |
| # 5. Extract the Title and Summary, limit to 5, and clean up the text | |
| news_list =[] | |
| for article in news_data.get('results',[]): | |
| # Stop processing once we reach the requested limit | |
| if len(news_list) >= limit: | |
| break | |
| raw_title = article.get('title', '') | |
| raw_summary = article.get('excerpt', '') | |
| # Clean up HTML bolding <b> and weird entities like ' | |
| clean_title = html.unescape(re.sub(r'<[^>]+>', '', raw_title)) | |
| clean_summary = html.unescape(re.sub(r'<[^>]+>', '', raw_summary)) | |
| news_list.append({ | |
| 'title': clean_title, | |
| 'summary': clean_summary, | |
| 'url': article.get('url', ''), | |
| 'source': article.get('source', '') | |
| }) | |
| return news_list | |
| # ========================================== | |
| # Example usage: | |
| # ========================================== | |
| if __name__ == "__main__": | |
| search_query = QUERY | |
| print(f"Fetching top {LIMIT} news articles for: {search_query}\n") | |
| # We pass limit=LIMIT here (though it's the default anyway) | |
| news_results = get_duckduckgo_news(search_query, limit=LIMIT) | |
| if not news_results: | |
| print("No news found.") | |
| else: | |
| for idx, news in enumerate(news_results, start=1): | |
| print(f"{idx}. {news['title']}") | |
| print(f" Source: {news['source']}") | |
| print(f" Summary: {news['summary']}") | |
| print(f" URL: {news['url']}\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment