Skip to content

Instantly share code, notes, and snippets.

@capjamesg
Created March 1, 2023 19:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save capjamesg/41a0461c40148c8e45b59c0583ce3bb8 to your computer and use it in GitHub Desktop.
Save capjamesg/41a0461c40148c8e45b59c0583ce3bb8 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
from bs4 import Comment
from urllib.parse import urlparse
from indieweb_utils import canonicalize_url
# from ..utils.urls import canonicalize_url
class TrackbackError(Exception):
"""Base class for trackback errors."""
pass
class ConnectionError(TrackbackError):
"""Raised when a connection error occurs."""
pass
class InvalidStatusCodeError(TrackbackError):
"""Raised when the server returns an invalid status code."""
pass
def discover_trackback_url(url: str):
"""Discover the trackback URL from a URL.
:param url: The URL to discover the trackback URL from.
:returns: The trackback URL.
"""
get_trackback_url_request = requests.get(url)
if get_trackback_url_request.status_code != 200:
raise InvalidStatusCodeError("The server returned a status code of {}.".format(get_trackback_url_request.status_code))
soup = BeautifulSoup(get_trackback_url_request.text, "html.parser")
# get all comments
comments = soup.find_all(string=lambda text:isinstance(text, Comment))
for c in comments:
soup = BeautifulSoup(c, "html.parser")
# print(soup)
rdf = soup.find("rdf:description")
if not rdf:
continue
trackback_url = rdf.get("trackback:ping")
if not trackback_url:
raise TrackbackError("No trackback URL found in RDF.")
domain = urlparse(url).netloc
return canonicalize_url(trackback_url, domain)
return ""
def send_trackback(url, title: str = None, excerpt: str = None, blog_name: str = None):
"""Send a trackback to a URL.
:param url: The URL to send the trackback to.
:param title: The title of the post.
:param excerpt: The excerpt of the post.
:param blog_name: The name of the blog.
:returns: The status code and message from the server.
"""
try:
send_trackback_request = requests.get(url)
except requests.exceptions.ConnectionError:
raise ConnectionError("Could not connect to the server.")
if send_trackback_request.status_code != 200:
raise InvalidStatusCodeError("The server returned a status code of {}.".format(send_trackback_request.status_code))
soup = BeautifulSoup(send_trackback_request.text, "html.parser")["response"]
# validate response
if soup.find("error") and soup.find("error").text != "0":
raise TrackbackError("The server returned an error: {}".format(soup.find("message").text))
new_trackback_url = discover_trackback_url("https://arxiv.org/abs/1706.03762")
print(new_trackback_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment