Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bigwestern/e56a308a34410e762e1cefaff8a2df44 to your computer and use it in GitHub Desktop.
Save bigwestern/e56a308a34410e762e1cefaff8a2df44 to your computer and use it in GitHub Desktop.
Scrape the conference panels from nomadit.co.uk hosted event. Output is html
#
# c:\Python36\python junkcode\nomadit.py > abstracts.html
#
# or: c:\Python36\python junkcode\nomadit.py --help
#
import requests
import argparse
from bs4 import BeautifulSoup
from jinja2 import Template
HTML = """
<html>
<head>
<title>Abstracts</title>
<style>
.panel-title {
font-size: 2em;
margin-top: 0.67em;
margin-bottom: 0.67em;
margin-left: 0;
margin-right: 0;
font-weight: bold;
}
</style>
</head>
<body>
{% for title, convenors, abstract in items %}
<div>
{{ title }}
{{ convenors }}
{{ abstract }}
</div>
{% endfor %}
</body>
</html>
"""
def join_url(a, b):
return "/".join([a.rstrip("/"), b.lstrip("/")])
def get_panel_anchors(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
anchors = soup.findAll("a", { "class":'panel_list_entry'})
return anchors
def get_panel_abstracts(base_url, anchors):
panels = []
for anchor in anchors:
a = requests.get(join_url(base_url, anchor["href"]))
a_soup = BeautifulSoup(a.text, 'html.parser')
abstract = a_soup.find("div", {"class": "panel-longabstract"})
title = a_soup.find("div", {"class": "panel-title"})
convenors = a_soup.find("div", {"class": "panel-convenors"})
panels.append((title, convenors, abstract))
return panels
def render(panels):
print(Template(HTML).render(items=panels))
def main(args):
all_panels_url = join_url(args.base_url, '/conferencesuite.php/panels/Views/allpanels')
anchors = get_panel_anchors(all_panels_url)
panels = get_panel_abstracts(args.base_url, anchors)
render(panels)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Scrape the panels from an nomadit.co.uk hosted conference.')
parser.add_argument('--base-url', '-b', dest='base_url', default='http://nomadit.co.uk/shiftingstates',
help='Set the base conference url')
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment