Last active
August 15, 2018 14:26
-
-
Save brucemcpherson/3623968 to your computer and use it in GitHub Desktop.
scraperwiki data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Option Explicit | |
Public Sub ousefulMashup() | |
' thanks to tony hirst for the data and method. | |
' http://blog.ouseful.info/2013/05/05/questioning-election-data-to-see-if-it-has-a-story-to-tell/ | |
Dim ds As cDataSet, dr As cDataRow, a As Variant, _ | |
worksheetName As String, scraperName As String, _ | |
job As cJobject, joc As cJobject, inWard As String, _ | |
n As Long | |
worksheetName = "questionElection" | |
scraperName = "iw_poll_notices_scrape" | |
' get data from Tony's scraperwiki and populate sheet | |
With scraperWikiStuff(scraperName, worksheetName) | |
Set ds = New cDataSet | |
ds.load worksheetName | |
End With | |
' add extra columns | |
With lastCell(ds.headingRow.where) | |
.Offset(, 1).value = "postcode" | |
.Offset(, 2).value = "in ward" | |
End With | |
ds.tearDown | |
' repopulate with new columns | |
Set ds = New cDataSet | |
With ds.load(worksheetName) | |
' extract post code | |
For Each dr In ds.rows | |
a = Split(dr.toString("address"), ",") | |
If arrayLength(a) > 0 Then | |
dr.cell("postcode").value = Trim(CStr(a(UBound(a)))) | |
End If | |
Next dr | |
.bigCommit | |
.tearDown | |
End With | |
' use mysociety api to get ward info | |
' these options will not bother trying to populate | |
With restQuery(worksheetName, "my society", , "postcode", _ | |
, , , False, False) | |
' check for jobjects of type UTE | |
n = 0 | |
For Each job In .jObjects | |
n = n + 1 | |
inWard = "out" | |
If Not job.childExists("areas") Is Nothing Then | |
For Each joc In job.child("areas").children | |
If Not joc.childExists("type") Is Nothing Then | |
If joc.child("type").value = "UTE" Then | |
' we have the right type, check name matches | |
If makeKey(joc.child("name").value) = _ | |
makeKey(.dSet.value(n, "ward")) Then | |
inWard = "in" | |
Exit For | |
End If | |
End If | |
End If | |
Next joc | |
' mark whether its in our out | |
.dSet.cell(n, "in ward").value = inWard | |
End If | |
Next job | |
.dSet.bigCommit | |
.tearDown | |
End With | |
End Sub | |
Public Sub swSeewhatworks() | |
Dim ds As New cDataSet, dr As cDataRow | |
ds.populateData wholeSheet("scraperwiki"), , , , , , True | |
Application.Calculation = xlCalculationManual | |
For Each dr In ds.rows | |
dr.where.Resize(, 1).Offset(, dr.columns.count).value = _ | |
swGetDefaultTableSql(dr.toString("short_name"), False) | |
Next dr | |
Application.Calculation = xlCalculationAutomatic | |
Set ds = Nothing | |
End Sub | |
Public Sub testScraperWikiInput() | |
testScraperWikiData InputBox("shortname?") | |
End Sub | |
Public Sub testScraperWikiData(shortName As String) | |
scraperWikiStuff shortName, "scraperwikidata" | |
End Sub | |
Private Function swGetTables(shortName As String) As cRest | |
Const tableDirectory = "SELECT name FROM sqlite_master " & _ | |
"WHERE type IN ('table','view') AND name NOT LIKE 'sqlite_%' " & _ | |
"Union all " & _ | |
"SELECT name FROM sqlite_temp_master " & _ | |
"WHERE type IN ('table','view') " & _ | |
"ORDER BY 1" | |
' lets see if we can get the tables that exist in this shaperwiki | |
Set swGetTables = restQuery(, "scraperwikidata", _ | |
shortName & "&query=" & tableDirectory, , , , , False) | |
End Function | |
Private Function swGetDefaultTableSql(shortName As String, Optional complain As Boolean = True) As String | |
' this will look up to see what tables are defined in a given scraperwiki | |
Dim s As String, cr As cRest | |
Set cr = swGetTables(shortName) | |
If cr Is Nothing Then | |
MsgBox ("could get info on " & shortName) | |
Else | |
If cr.jObject.hasChildren Then | |
' this is hokey - for the moment just take from the first table found | |
swGetDefaultTableSql = "select * from '" & _ | |
cr.jObject.children(1).child("name").toString & "'" | |
Else | |
If complain Then MsgBox ("could not find any valid tables for " & _ | |
shortName & "(" & cr.jObject.serialize & ")") | |
End If | |
End If | |
End Function | |
Private Function scraperWikiStuff(shortName As String, ws As String, _ | |
Optional optSql As String = vbNullString, Optional optLimit As Long = 0) As cDataSet | |
Dim cr As cRest, ds As cDataSet, job As cJobject, r As Range, _ | |
cj As cJobject, headJob As cJobject, sql As String, limit As String | |
sql = optSql | |
If sql = vbNullString Then | |
sql = swGetDefaultTableSql(shortName) | |
End If | |
If optLimit <> 0 Then | |
limit = "&limit=" & CStr(optLimit) | |
End If | |
' get the data | |
Set cr = restQuery(, "scraperwikidata", _ | |
shortName & "&query=" & sql & limit, , , , , False) | |
' now organize it | |
If Not cr Is Nothing Then | |
' get the unique headers and put them to a clean data set | |
Set headJob = swGetHeaders(cr.jObject) | |
If headJob Is Nothing Then | |
MsgBox ("didnt work at all " & cr.jObject.serialize) | |
Else | |
Set ds = swCleanSheet(headJob, ws) | |
If ds Is Nothing Then | |
MsgBox ("failed to get the expected data " & cr.jObject.serialize) | |
Else | |
Application.Calculation = xlCalculationManual | |
With ds | |
Set r = firstCell(.headingRow.where) | |
' this is the data returned - each array member is a row | |
For Each cj In cr.jObject.children | |
Set r = r.Offset(1) | |
' each child is a column | |
For Each job In cj.children | |
r.Offset(, .headingRow.exists(job.key).column - 1).value = job.value | |
Next job | |
Next cj | |
' repopulate | |
Set scraperWikiStuff = .rePopulate | |
End With | |
Application.Calculation = xlCalculationAutomatic | |
End If | |
End If | |
End If | |
End Function | |
Private Function swCleanSheet(job As cJobject, ws As String) As cDataSet | |
' put headers to a clean sheet | |
Dim ds As New cDataSet, cj As cJobject, r As Range | |
Set r = firstCell(wholeSheet(ws)) | |
r.Worksheet.Cells.ClearContents | |
' these are the headings | |
If job.children.count > 0 Then | |
For Each cj In job.children | |
r.Offset(, cj.childIndex - 1).value = cj.key | |
Next cj | |
' create a data set | |
Set swCleanSheet = ds.populateData(r.Resize(1, job.children.count)) | |
End If | |
End Function | |
Private Function swGetHeaders(job As cJobject) As cJobject | |
' take scraper wiki data and generate an organized dataset using the headers found | |
Dim cj As cJobject, jo As cJobject, cjKeys As New cJobject | |
With cjKeys.init(Nothing) | |
For Each cj In job.children | |
For Each jo In cj.children | |
' we can use a cjobject as a collection | |
.add jo.key | |
Next jo | |
Next cj | |
End With | |
Set swGetHeaders = cjKeys | |
End Function | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For more information on this , see http://ramblings.mcpher.com/Home/excelquirks/json/rest/scraperwiki