import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import date
Can't use requests for whatever reason (403 denied). Instead, in console:
getEventsBlocks = function() {
return $.ajax({
url: ajax_obj.ajaxurl,
type: 'POST',
dataType: 'html',
data: {
action: 'get_events_blocks'
},
success: function(html) {
return html;
},
error: function(jqXHR, textStatus, errorThrown) {
return console.log(jqXHR, textStatus, errorThrown);
}
});
};
zz = getEventsBlocks()
with open('archtober_events.html', 'r') as f:
html = f.read()
soup = BeautifulSoup(html)
events_raw = soup.find_all('div','event-block')
len(events_raw)
181
event_dict_list = []
for event in events_raw:
event_dict = {
'day': event.attrs['data-days'],
'id': event.attrs['data-id'],
'event_type': event.attrs['data-event-type'],
'time': event.find('div','time').text,
'title': event.find('div', 'event-title').text,
'url': event.find('a').attrs['href']
}
event_dict_list.append(event_dict)
event_df = pd.DataFrame(event_dict_list)
event_df.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
day | event_type | id | time | title | url | |
---|---|---|---|---|---|---|
0 | 1 | special-event | 232 | 8am–8pm | World Habitat Day | https://archtober.org/events/world-habitat-day/ |
1 | 9 | conference | 298 | 8am–7pm | 2018 MAS Summit for New York City: Shaping the... | https://archtober.org/events/2018-mas-summit-f... |
2 | 31 | special-event | 382 | 8am–8pm | World Cities Day | https://archtober.org/events/world-cities-day/ |
3 | 18 | conference | 401 | 8:30am–12:30pm | Weathering the Storm: The Intersection of Fina... | https://archtober.org/events/weathering-the-st... |
4 | 12 | panel | 508 | 8–9am | Leadership Breakfast with Andrea Lamberti | https://archtober.org/events/leadership-breakf... |
event_df.to_csv('archtober.csv')