Skip to content

Instantly share code, notes, and snippets.

@Helw150
Last active September 15, 2017 17:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Helw150/e2f60b6cf3dac51906f59473f04f7bbb to your computer and use it in GitHub Desktop.
Save Helw150/e2f60b6cf3dac51906f59473f04f7bbb to your computer and use it in GitHub Desktop.
Functions to work with Open Graph efficiently
# This function takes Open Graph info and just returns the articles
def returnArticles(og_array):
article_urls = []
for og in og_array:
is_article = False
for prop in og:
if hasattr(prop, "property"):
if prop["property"] == "og:type":
if prop["content"] == "article":
is_article = True
if prop["property"] == "og:url":
article_url = prop["content"]
if is_article:
article_urls.append(article_url)
return article_urls
def extractOpenGraph(urls):
heads = []
for url in urls:
r = requests.get(url, stream=True)
text = ''
for line in r.iter_lines():
line = line.decode("utf8")
text += line
if "</head>" in line:
break
doc = BeautifulSoup(text)
ogs = doc.html.head.findAll(property=re.compile(r'^og'))
heads.append(ogs)
return heads
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment