Created
January 11, 2022 08:35
-
-
Save miohtama/89dde043f1c5461e0a24bae21398151a to your computer and use it in GitHub Desktop.
Bing IndexNow URL submitter for Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Use IndexNow API to send the latest updates to Bing. | |
Ensures Bing picks up the pages that should be fresh and interesting. | |
Do not wait them picked up by the natural crawler process. | |
To submit the pages you need to have a Bing known file at the root of your website. | |
https://www.indexnow.org/faq | |
IndexNow will also submit the Yandex. | |
For more information see https://tradingstrategy.ai | |
""" | |
import logging | |
from typing import Set | |
import requests | |
__license__ = "MIT" | |
logger = logging.getLogger(__name__) | |
class IndexNowSubmitter: | |
"""An IndexNow submitted. | |
The instance can be persistent, as already submitted URLs can be remembered and filtered out. | |
It uses simple in-process memory to store the submitted URLs, so it does not work | |
across persistence or process barriers. | |
""" | |
def __init__(self, domain: str, api_key: str): | |
# HTTP 1.1 keep-alive | |
self.session = requests.Session() | |
self.domain = domain | |
self.api_key = api_key | |
# A simple | |
self.already_submitted = set() | |
def index(self, urls: Set[str]) -> int: | |
"""Index a list of urls. | |
:param urls: The URL to index | |
:param apikey: The API key of the domain. This key is not secret, because it is verified at the domain root file. | |
:return: Number of urls submitted | |
""" | |
not_submitted_yet = urls - self.already_submitted | |
payload = { | |
"host": self.domain, | |
"key": self.api_key, | |
"keyLocation": f"https://{self.domain}/{self.api_key}.txt", | |
"urlList": list(not_submitted_yet), | |
} | |
endpoint = "https://www.bing.com/indexnow" | |
resp = self.session.post(endpoint, json=payload) | |
# Handle any error responses | |
resp.raise_for_status() | |
logger.info("IndexNow replied %d: %s", resp.status_code, resp.text) | |
self.already_submitted |= not_submitted_yet | |
return len(not_submitted_yet) | |
if __name__ == "__main__": | |
# A simple manual test. | |
# We cannot offer automated testing suite, because IndexNow does not have any kind of | |
# staging or sandbox environments for this. | |
# We do a test with hardcoded credentials. | |
logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler()]) | |
urls = {"https://tradingstrategy.ai/trading-view/top-list/daily-up"} | |
index_now = IndexNowSubmitter("tradingstrategy.ai", "b8e3e4232bd34aea86a92cbfce3dc767") | |
submitted_count = index_now.index(urls) | |
assert submitted_count == 1 | |
# Check we do not get a double submission | |
submitted_count = index_now.index(urls) | |
assert submitted_count == 0 | |
print("All ok") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment