Skip to content

Instantly share code, notes, and snippets.

@niyaton
Last active December 2, 2022 06:41
Show Gist options
  • Save niyaton/0c6ea8e1b2c3be419c613c4e6461dbf5 to your computer and use it in GitHub Desktop.
Save niyaton/0c6ea8e1b2c3be419c613c4e6461dbf5 to your computer and use it in GitHub Desktop.
Using this scripts, you can get slack message for each session from researchr.org. This script is customized for APSEC2022.
from bs4 import BeautifulSoup
track2track = {
'EDU - Software Engineering Education': 'EDU',
'ERA - Early Research Achievements': 'ERA',
'SEIP - Software Engineering in Practice': 'SEIP',
'Technical Track': 'Technical Track'
}
# This file includes appended modal data.
# To create this file, you need to use "show_click_all_papers_command.py".
with open('apsec-program-expanded.html') as f:
program_text = f.read()
soup = BeautifulSoup(program_text, 'html.parser')
sessions = []
# loop for each session
for table in soup.find_all('table'):
# ignore non-target tables
if not table.has_attr('data-facet-date'):
continue
event_type = table.find('div', attrs={'class': 'event-type'})
if not event_type or not event_type.text == 'Paper':
continue
skip_flag = True
papers = []
for tr in table.find_all('tr'):
# <tr class="firefox-fix"> is very useful as the anchor.
# each paper information is in the <tr> which place under the "firefox-fix".
if tr.attrs['class'][0] == 'firefox-fix':
skip_flag = False
continue
if skip_flag:
continue
start_time = tr.find('div', attrs={'class' : 'start-time'}).text
title_str = tr.find('a', {"data-event-modal": True})
# modal_id is unique ID of the paper.
modal_id = title_str.attrs['data-event-modal']
title_str = title_str.text
authors = []
for author in tr.find('div', attrs={'class': 'performers'}).find_all('a'):
# we ignore author's affliation and link to the author information.
authors.append(author.text)
# convert track full name to abbrev.
track = track2track[tr.find('div', attrs={'class': 'prog-track'}).text]
if len(papers) != 0:
papers[-1]['end_time'] = start_time
papers.append({
'modal_id': modal_id,
'start_time': start_time,
'title': title_str,
'authors': authors,
'track': track
})
session_title = table.find('div', attrs={'class': 'session-info-in-table'}).contents[0]
room = table.attrs['data-facet-room']
# add the end time of final presentation.
session_end_time = table.find('div', attrs={'class': 'slot-label'}).text.split('-')[-1].lstrip()
papers[-1]['end_time'] = session_end_time
sessions.append({
'date': table.attrs['data-facet-date'],
'title': session_title,
'room': room,
'papers': papers
})
# collect link URL to the each detailed paper information page
modal2link = {}
for modal_span in soup.find_all('span', attrs={'class': 'appended'}):
modal_id = modal_span.find('a', {'data-event-star': True})
modal_id = modal_id.attrs['data-event-star']
link = modal_span.find_all('a')[-2].attrs['href']
modal2link[modal_id] = link
# print all session information
for session in sessions:
print(session['date'])
print(f'This channel is for *{session["title"]}* session at *{session["room"]}*.')
print('The titles and presenters are')
for paper in session['papers']:
link = modal2link[paper['modal_id']]
title = f'[{paper["title"]}]({link}) ({paper["track"]})'
print(f'* {paper["start_time"]} - {paper["end_time"]} {title}')
print(' * ', end='')
print(', '.join(paper['authors']))
print()
from bs4 import BeautifulSoup
# run this command and paste ouput to the javascript console at APSEC program page.
# after run the javascript, please save modified HTML by using broweser's inspection feature.
# This file is just downloaded html.
# You can get this file by using curl or wget.
with open('apsec-program.html') as f:
program_text = f.read()
soup = BeautifulSoup(program_text, 'html.parser')
for table in soup.find_all('table'):
if not table.has_attr('data-facet-date'):
continue
event_type = table.find('div', attrs={'class': 'event-type'})
if not event_type or not event_type.text == 'Paper':
continue
skip_flag = True
for tr in table.find_all('tr'):
if tr.attrs['class'][0] == 'firefox-fix':
skip_flag = False
continue
if skip_flag:
continue
start_time = tr.find('div', attrs={'class' : 'start-time'}).text
modal_id = tr.find('a', {"data-event-modal": True}).attrs['data-event-modal']
print(f'document.querySelector("a[data-event-modal=\'{modal_id}\'").click()')
print('modals = document.querySelectorAll("a[data-dismiss=\'modal\']")')
print('modals[modals.length - 1].click()')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment