jonadem/makilabEventsFetcher.py

## makilabEventsFetcher.py
# encoding: utf-8
from __future__ import print_function

import re
import requests

from bs4 import BeautifulSoup
from HTMLParser import HTMLParser

html_parser = HTMLParser()
soup = BeautifulSoup(requests.get("https://makilab.org/events-list").content, 'html.parser')

while soup:
	for event in soup.find("div", {"class": "view-events-list"}).find("tbody"):
		try:
			#print(event.tr.string.find("td", {"class": "views-field-title"}))
			title = event.find("td", {"class": "views-field-title"}).a.text
			datetimeTag = event.find("td", {"class": "views-field-field-event-datetime"}).span
			if datetimeTag.div:
				start = datetimeTag.div.find("span", {"class": "date-display-start"})["content"]
				end = datetimeTag.div.find("span", {"class": "date-display-end"})["content"]
				all_day = False
			else:
				start = datetimeTag["content"]
				end = None
				all_day = True
			urlTag = event.find("td", {"class": "views-field-view-node"})
			base_domain = "https://makilab.org" if not urlTag.a["href"].startswith("http") else ""
			url = (base_domain + urlTag.a["href"]) if urlTag.a else "https://makilab.org"
			print("title: {}".format(title.encode('utf-8')))
			print("start: {}".format(start))
			print("end: {}".format(end))
			print("all_day: {}".format(all_day))
			print("url: {}".format(url))
			print("")
		except TypeError:
			pass

	next_page_links = soup('li', 'pager-next')
	if next_page_links and next_page_links[0].text:
		href = "https://makilab.org/" + next_page_links[0]('a')[0]['href']
		soup = BeautifulSoup(requests.get(href).content)
	else:
		soup = None
	# encoding: utf-8
	from __future__ import print_function

	import re
	import requests

	from bs4 import BeautifulSoup
	from HTMLParser import HTMLParser

	html_parser = HTMLParser()
	soup = BeautifulSoup(requests.get("https://makilab.org/events-list").content, 'html.parser')

	while soup:
	for event in soup.find("div", {"class": "view-events-list"}).find("tbody"):
	try:
	#print(event.tr.string.find("td", {"class": "views-field-title"}))
	title = event.find("td", {"class": "views-field-title"}).a.text
	datetimeTag = event.find("td", {"class": "views-field-field-event-datetime"}).span
	if datetimeTag.div:
	start = datetimeTag.div.find("span", {"class": "date-display-start"})["content"]
	end = datetimeTag.div.find("span", {"class": "date-display-end"})["content"]
	all_day = False
	else:
	start = datetimeTag["content"]
	end = None
	all_day = True
	urlTag = event.find("td", {"class": "views-field-view-node"})
	base_domain = "https://makilab.org" if not urlTag.a["href"].startswith("http") else ""
	url = (base_domain + urlTag.a["href"]) if urlTag.a else "https://makilab.org"
	print("title: {}".format(title.encode('utf-8')))
	print("start: {}".format(start))
	print("end: {}".format(end))
	print("all_day: {}".format(all_day))
	print("url: {}".format(url))
	print("")
	except TypeError:
	pass

	next_page_links = soup('li', 'pager-next')
	if next_page_links and next_page_links[0].text:
	href = "https://makilab.org/" + next_page_links[0]('a')[0]['href']
	soup = BeautifulSoup(requests.get(href).content)
	else:
	soup = None