-
-
Save capjamesg/41a0461c40148c8e45b59c0583ce3bb8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
from bs4 import Comment | |
from urllib.parse import urlparse | |
from indieweb_utils import canonicalize_url | |
# from ..utils.urls import canonicalize_url | |
class TrackbackError(Exception): | |
"""Base class for trackback errors.""" | |
pass | |
class ConnectionError(TrackbackError): | |
"""Raised when a connection error occurs.""" | |
pass | |
class InvalidStatusCodeError(TrackbackError): | |
"""Raised when the server returns an invalid status code.""" | |
pass | |
def discover_trackback_url(url: str): | |
"""Discover the trackback URL from a URL. | |
:param url: The URL to discover the trackback URL from. | |
:returns: The trackback URL. | |
""" | |
get_trackback_url_request = requests.get(url) | |
if get_trackback_url_request.status_code != 200: | |
raise InvalidStatusCodeError("The server returned a status code of {}.".format(get_trackback_url_request.status_code)) | |
soup = BeautifulSoup(get_trackback_url_request.text, "html.parser") | |
# get all comments | |
comments = soup.find_all(string=lambda text:isinstance(text, Comment)) | |
for c in comments: | |
soup = BeautifulSoup(c, "html.parser") | |
# print(soup) | |
rdf = soup.find("rdf:description") | |
if not rdf: | |
continue | |
trackback_url = rdf.get("trackback:ping") | |
if not trackback_url: | |
raise TrackbackError("No trackback URL found in RDF.") | |
domain = urlparse(url).netloc | |
return canonicalize_url(trackback_url, domain) | |
return "" | |
def send_trackback(url, title: str = None, excerpt: str = None, blog_name: str = None): | |
"""Send a trackback to a URL. | |
:param url: The URL to send the trackback to. | |
:param title: The title of the post. | |
:param excerpt: The excerpt of the post. | |
:param blog_name: The name of the blog. | |
:returns: The status code and message from the server. | |
""" | |
try: | |
send_trackback_request = requests.get(url) | |
except requests.exceptions.ConnectionError: | |
raise ConnectionError("Could not connect to the server.") | |
if send_trackback_request.status_code != 200: | |
raise InvalidStatusCodeError("The server returned a status code of {}.".format(send_trackback_request.status_code)) | |
soup = BeautifulSoup(send_trackback_request.text, "html.parser")["response"] | |
# validate response | |
if soup.find("error") and soup.find("error").text != "0": | |
raise TrackbackError("The server returned an error: {}".format(soup.find("message").text)) | |
new_trackback_url = discover_trackback_url("https://arxiv.org/abs/1706.03762") | |
print(new_trackback_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment