Skip to content

Instantly share code, notes, and snippets.

@archatas
Created May 8, 2023 02:24
Show Gist options
  • Save archatas/3c3848662c3b26aeead6952465243a22 to your computer and use it in GitHub Desktop.
Save archatas/3c3848662c3b26aeead6952465243a22 to your computer and use it in GitHub Desktop.
Open Graph Parser Example
from html.parser import HTMLParser
import requests
from pprint import pprint
class OpenGraphParser(HTMLParser):
"""
Parses the Open Graph tags and returns a dictionary with values:
For example,
<meta property="og:type" content="website" />
<meta property="og:url" content="https://www.1st-things-1st.com/tips/4ysX4JhY4s8v/" />
<meta property="og:title" content="Daily Tips on Prioritization, Productivity, & Growth" />
<meta property="og:image" content="https://www.1st-things-1st.com/media/tips/2023/4ysX4JhY4s8v/tip.png" />
<meta property="og:description" content="" />
<meta property="og:site_name" content="1st things 1st" />
<meta property="og:locale" content="en_US" />
"""
OG_PROPERTIES = [
"og:title",
"og:type",
"og:image",
"og:url",
"og:description",
"og:locale",
"og:site_name",
]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.og_data = {}
def handle_starttag(self, tag, attrs):
if tag.lower() == "meta":
attrs_dict = dict(attrs)
if (
(prop := attrs_dict.get("property"))
and (content := attrs_dict.get("content"))
and prop in self.OG_PROPERTIES
):
self.og_data[prop.replace("og:", "")] = content
def get_data(self):
return self.og_data
if __name__ == "__main__":
response = requests.get("https://www.1st-things-1st.com/tips/4ysX4JhY4s8v/")
og_parser = OpenGraphParser()
og_parser.feed(response.text)
og_data = og_parser.get_data()
pprint(og_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment