Skip to content

Instantly share code, notes, and snippets.

@jonadem
Last active February 1, 2018 21:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonadem/b5c72a919cb33801dd69f2d69d418fb9 to your computer and use it in GitHub Desktop.
Save jonadem/b5c72a919cb33801dd69f2d69d418fb9 to your computer and use it in GitHub Desktop.
# encoding: utf-8
from __future__ import print_function
import re
import requests
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
html_parser = HTMLParser()
soup = BeautifulSoup(requests.get("https://makilab.org/events-list").content, 'html.parser')
while soup:
for event in soup.find("div", {"class": "view-events-list"}).find("tbody"):
try:
#print(event.tr.string.find("td", {"class": "views-field-title"}))
title = event.find("td", {"class": "views-field-title"}).a.text
datetimeTag = event.find("td", {"class": "views-field-field-event-datetime"}).span
if datetimeTag.div:
start = datetimeTag.div.find("span", {"class": "date-display-start"})["content"]
end = datetimeTag.div.find("span", {"class": "date-display-end"})["content"]
all_day = False
else:
start = datetimeTag["content"]
end = None
all_day = True
urlTag = event.find("td", {"class": "views-field-view-node"})
base_domain = "https://makilab.org" if not urlTag.a["href"].startswith("http") else ""
url = (base_domain + urlTag.a["href"]) if urlTag.a else "https://makilab.org"
print("title: {}".format(title.encode('utf-8')))
print("start: {}".format(start))
print("end: {}".format(end))
print("all_day: {}".format(all_day))
print("url: {}".format(url))
print("")
except TypeError:
pass
next_page_links = soup('li', 'pager-next')
if next_page_links and next_page_links[0].text:
href = "https://makilab.org/" + next_page_links[0]('a')[0]['href']
soup = BeautifulSoup(requests.get(href).content)
else:
soup = None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment