Skip to content

Instantly share code, notes, and snippets.

@hamletbatista
Last active September 28, 2019 16:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hamletbatista/5f3b61008fa978f66ff75ecf21af85f8 to your computer and use it in GitHub Desktop.
Save hamletbatista/5f3b61008fa978f66ff75ecf21af85f8 to your computer and use it in GitHub Desktop.
script_sel="#new_gchart_slideshow > div.forecast-viewport > ul > li > script"
all_rides = r.html.find(script_sel)
def extract_dates(data):
start_row = "dateData.addRows\("
end_row = "\);"
columns = []
title = None
results = re.search(start_row+"([^;]+)"+end_row, data)
if results != None:
columns = results.group(1)
start_title = "title: \""
end_title = "\","
results = re.search(start_title+'([^"]+)'+end_title, data)
if results != None:
title = results.group(1)
return (title, columns)
#all_rides raw extracted text looks like this
print(all_rides[0].text)
#'$(function() { google.load("visualization", "1", {packages:["corechart"], callback: drawChart137}); }); function drawChart137() { // Create and populate the data table. var dateData = new google.visualization.DataTable(); dateData.addColumn(\'datetime\', \'time\'); //dateData.addColumn(\'number\', \'null\'); dateData.addColumn(\'number\', \'Wait Times the Crowd Calendar Predicted\'); dateData.addRows([[new Date( 2019,6,15,08,00,00 ), 4],[new Date( 2019,6,15,08,15,00 ), 4],[new Date( 2019,6,15,08,30,00 ), 4],[new Date( 2019,6,15,08,45,00 ), 4],[new Date( 2019,6,15,09,00,00 ), 12],[new Date( 2019,6,15,09,15,00 ), 6],[new Date( 2019,6,15,09,30,00 ), 7],[new Date( 2019,6,15,09,45,00 ), 14],[new Date( 2019,6,15,10,00,00 ), 16],[new Date( 2019,6,15,10,15,00 ), 16],[new Date( 2019,6,15,10,30,00 ), 16],[new Date( 2019,6,15,10,45,00 ), 16],[new Date( 2019,6,15,11,00,00 ), 16],[new Date( 2019,6,15,11,15,00 ), 23],[new Date( 2019,6,15,11,30,00 ), 21],[new Date( 2019,6,15,11,45,00 ), 16],[new Date( 2019,6,15,12,00,00 ), 26],[new Date( 2019,6,15,12,15,00 ), 26],[new Date( 2019,6,15,12,30,00 ), 26],[new Date( 2019,6,15,12,45,00 ), 25],[new Date( 2019,6,15,13,00,00 ), 25],[new Date( 2019,6,15,14,30,00 ), 18],[new Date( 2019,6,15,14,45,00 ), 23],[new Date( 2019,6,15,15,00,00 ), 25],[new Date( 2019,6,15,15,15,00 ), 20],[new Date( 2019,6,15,15,30,00 ), 18],[new Date( 2019,6,15,15,45,00 ), 16],[new Date( 2019,6,15,16,00,00 ), 16],[new Date( 2019,6,15,16,15,00 ), 18],[new Date( 2019,6,15,16,30,00 ), 18],[new Date( 2019,6,15,16,45,00 ), 18],[new Date( 2019,6,15,17,00,00 ), 18],[new Date( 2019,6,15,17,15,00 ), 19],[new Date( 2019,6,15,17,30,00 ), 19],[new Date( 2019,6,15,17,45,00 ), 19],[new Date( 2019,6,15,18,00,00 ), 19],[new Date( 2019,6,15,18,15,00 ), 20],[new Date( 2019,6,15,18,30,00 ), 20],[new Date( 2019,6,15,18,45,00 ), 24],[new Date( 2019,6,15,19,00,00 ), 24],[new Date( 2019,6,15,19,15,00 ), 25],[new Date( 2019,6,15,19,30,00 ), 21],[new Date( 2019,6,15,19,45,00 ), 21],[new Date( 2019,6,15,20,00,00 ), 21],[new Date( 2019,6,15,20,15,00 ), 21],[new Date( 2019,6,15,20,30,00 ), 21],[new Date( 2019,6,15,20,45,00 ), 21],[new Date( 2019,6,15,21,00,00 ), 21],[new Date( 2019,6,15,21,15,00 ), 21],[new Date( 2019,6,15,21,30,00 ), 21],[new Date( 2019,6,15,21,45,00 ), 21],[new Date( 2019,6,15,22,00,00 ), 21],[new Date( 2019,6,15,22,15,00 ), 25],[new Date( 2019,6,15,22,30,00 ), 12],[new Date( 2019,6,15,22,45,00 ), 12],[new Date( 2019,6,15,23,00,00 ), 12],[new Date( 2019,6,15,23,15,00 ), 12],[new Date( 2019,6,15,23,30,00 ), 12],[new Date( 2019,6,15,23,45,00 ), 12]]); var userOptions = { title: "Alice in Wonderland - 7/15/19", series: [ // CC Predicted forecasts {color: \'blue\', pointSize: 0, visibleInLegend: true, lineWidth:2 }, ], width: $(\'div#center.column\').width(), height: 320, chartArea:{left:30,top:50,width:"80%",height:"70%"}, legend: {position: \'top\', textStyle: {color: \'black\', fontSize: 11}}, fontSize:10, hAxis: { slantedText: true, slantedTextAngle: 45, viewWindowMode:\'pretty\', format: \'h aa\', maxValue: new Date(2019,6,16,00,00,00) }, vAxis: {format: \'0\', title: \'minutes\', maxValue: 31} // Nothing specified for axis 0 }; var userChart = new google.visualization.AreaChart(document.getElementById(\'google_chart_137\')); userChart.draw(dateData, userOptions); } $("#new_gchart_slideshow").width( $("div#center.column").width() );'
#After we process it, we get a cleaner dataset
print(extract_dates(all_rides[0].text))
#('Alice in Wonderland - 7/15/19',
#'[[new Date( 2019,6,15,08,00,00 ), 4],[new Date( 2019,6,15,08,15,00 ), 4],[new Date( 2019,6,15,08,30,00 ), 4],[new Date( 2019,6,15,08,45,00 ), 4],[new Date( 2019,6,15,09,00,00 ), 12],[new Date( 2019,6,15,09,15,00 ), 6],[new Date( 2019,6,15,09,30,00 ), 7],[new Date( 2019,6,15,09,45,00 ), 14],[new Date( 2019,6,15,10,00,00 ), 16],[new Date( 2019,6,15,10,15,00 ), 16],[new Date( 2019,6,15,10,30,00 ), 16],[new Date( 2019,6,15,10,45,00 ), 16],[new Date( 2019,6,15,11,00,00 ), 16],[new Date( 2019,6,15,11,15,00 ), 23],[new Date( 2019,6,15,11,30,00 ), 21],[new Date( 2019,6,15,11,45,00 ), 16],[new Date( 2019,6,15,12,00,00 ), 26],[new Date( 2019,6,15,12,15,00 ), 26],[new Date( 2019,6,15,12,30,00 ), 26],[new Date( 2019,6,15,12,45,00 ), 25],[new Date( 2019,6,15,13,00,00 ), 25],[new Date( 2019,6,15,14,30,00 ), 18],[new Date( 2019,6,15,14,45,00 ), 23],[new Date( 2019,6,15,15,00,00 ), 25],[new Date( 2019,6,15,15,15,00 ), 20],[new Date( 2019,6,15,15,30,00 ), 18],[new Date( 2019,6,15,15,45,00 ), 16],[new Date( 2019,6,15,16,00,00 ), 16],[new Date( 2019,6,15,16,15,00 ), 18],[new Date( 2019,6,15,16,30,00 ), 18],[new Date( 2019,6,15,16,45,00 ), 18],[new Date( 2019,6,15,17,00,00 ), 18],[new Date( 2019,6,15,17,15,00 ), 19],[new Date( 2019,6,15,17,30,00 ), 19],[new Date( 2019,6,15,17,45,00 ), 19],[new Date( 2019,6,15,18,00,00 ), 19],[new Date( 2019,6,15,18,15,00 ), 20],[new Date( 2019,6,15,18,30,00 ), 20],[new Date( 2019,6,15,18,45,00 ), 24],[new Date( 2019,6,15,19,00,00 ), 24],[new Date( 2019,6,15,19,15,00 ), 25],[new Date( 2019,6,15,19,30,00 ), 21],[new Date( 2019,6,15,19,45,00 ), 21],[new Date( 2019,6,15,20,00,00 ), 21],[new Date( 2019,6,15,20,15,00 ), 21],[new Date( 2019,6,15,20,30,00 ), 21],[new Date( 2019,6,15,20,45,00 ), 21],[new Date( 2019,6,15,21,00,00 ), 21],[new Date( 2019,6,15,21,15,00 ), 21],[new Date( 2019,6,15,21,30,00 ), 21],[new Date( 2019,6,15,21,45,00 ), 21],[new Date( 2019,6,15,22,00,00 ), 21],[new Date( 2019,6,15,22,15,00 ), 25],[new Date( 2019,6,15,22,30,00 ), 12],[new Date( 2019,6,15,22,45,00 ), 12],[new Date( 2019,6,15,23,00,00 ), 12],[new Date( 2019,6,15,23,15,00 ), 12],[new Date( 2019,6,15,23,30,00 ), 12],[new Date( 2019,6,15,23,45,00 ), 12]]')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment