Skip to content

Instantly share code, notes, and snippets.

@brucemcpherson
Last active August 15, 2018 14:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save brucemcpherson/3623968 to your computer and use it in GitHub Desktop.
Save brucemcpherson/3623968 to your computer and use it in GitHub Desktop.
scraperwiki data
Option Explicit
Public Sub ousefulMashup()
' thanks to tony hirst for the data and method.
' http://blog.ouseful.info/2013/05/05/questioning-election-data-to-see-if-it-has-a-story-to-tell/
Dim ds As cDataSet, dr As cDataRow, a As Variant, _
worksheetName As String, scraperName As String, _
job As cJobject, joc As cJobject, inWard As String, _
n As Long
worksheetName = "questionElection"
scraperName = "iw_poll_notices_scrape"
' get data from Tony's scraperwiki and populate sheet
With scraperWikiStuff(scraperName, worksheetName)
Set ds = New cDataSet
ds.load worksheetName
End With
' add extra columns
With lastCell(ds.headingRow.where)
.Offset(, 1).value = "postcode"
.Offset(, 2).value = "in ward"
End With
ds.tearDown
' repopulate with new columns
Set ds = New cDataSet
With ds.load(worksheetName)
' extract post code
For Each dr In ds.rows
a = Split(dr.toString("address"), ",")
If arrayLength(a) > 0 Then
dr.cell("postcode").value = Trim(CStr(a(UBound(a))))
End If
Next dr
.bigCommit
.tearDown
End With
' use mysociety api to get ward info
' these options will not bother trying to populate
With restQuery(worksheetName, "my society", , "postcode", _
, , , False, False)
' check for jobjects of type UTE
n = 0
For Each job In .jObjects
n = n + 1
inWard = "out"
If Not job.childExists("areas") Is Nothing Then
For Each joc In job.child("areas").children
If Not joc.childExists("type") Is Nothing Then
If joc.child("type").value = "UTE" Then
' we have the right type, check name matches
If makeKey(joc.child("name").value) = _
makeKey(.dSet.value(n, "ward")) Then
inWard = "in"
Exit For
End If
End If
End If
Next joc
' mark whether its in our out
.dSet.cell(n, "in ward").value = inWard
End If
Next job
.dSet.bigCommit
.tearDown
End With
End Sub
Public Sub swSeewhatworks()
Dim ds As New cDataSet, dr As cDataRow
ds.populateData wholeSheet("scraperwiki"), , , , , , True
Application.Calculation = xlCalculationManual
For Each dr In ds.rows
dr.where.Resize(, 1).Offset(, dr.columns.count).value = _
swGetDefaultTableSql(dr.toString("short_name"), False)
Next dr
Application.Calculation = xlCalculationAutomatic
Set ds = Nothing
End Sub
Public Sub testScraperWikiInput()
testScraperWikiData InputBox("shortname?")
End Sub
Public Sub testScraperWikiData(shortName As String)
scraperWikiStuff shortName, "scraperwikidata"
End Sub
Private Function swGetTables(shortName As String) As cRest
Const tableDirectory = "SELECT name FROM sqlite_master " & _
"WHERE type IN ('table','view') AND name NOT LIKE 'sqlite_%' " & _
"Union all " & _
"SELECT name FROM sqlite_temp_master " & _
"WHERE type IN ('table','view') " & _
"ORDER BY 1"
' lets see if we can get the tables that exist in this shaperwiki
Set swGetTables = restQuery(, "scraperwikidata", _
shortName & "&query=" & tableDirectory, , , , , False)
End Function
Private Function swGetDefaultTableSql(shortName As String, Optional complain As Boolean = True) As String
' this will look up to see what tables are defined in a given scraperwiki
Dim s As String, cr As cRest
Set cr = swGetTables(shortName)
If cr Is Nothing Then
MsgBox ("could get info on " & shortName)
Else
If cr.jObject.hasChildren Then
' this is hokey - for the moment just take from the first table found
swGetDefaultTableSql = "select * from '" & _
cr.jObject.children(1).child("name").toString & "'"
Else
If complain Then MsgBox ("could not find any valid tables for " & _
shortName & "(" & cr.jObject.serialize & ")")
End If
End If
End Function
Private Function scraperWikiStuff(shortName As String, ws As String, _
Optional optSql As String = vbNullString, Optional optLimit As Long = 0) As cDataSet
Dim cr As cRest, ds As cDataSet, job As cJobject, r As Range, _
cj As cJobject, headJob As cJobject, sql As String, limit As String
sql = optSql
If sql = vbNullString Then
sql = swGetDefaultTableSql(shortName)
End If
If optLimit <> 0 Then
limit = "&limit=" & CStr(optLimit)
End If
' get the data
Set cr = restQuery(, "scraperwikidata", _
shortName & "&query=" & sql & limit, , , , , False)
' now organize it
If Not cr Is Nothing Then
' get the unique headers and put them to a clean data set
Set headJob = swGetHeaders(cr.jObject)
If headJob Is Nothing Then
MsgBox ("didnt work at all " & cr.jObject.serialize)
Else
Set ds = swCleanSheet(headJob, ws)
If ds Is Nothing Then
MsgBox ("failed to get the expected data " & cr.jObject.serialize)
Else
Application.Calculation = xlCalculationManual
With ds
Set r = firstCell(.headingRow.where)
' this is the data returned - each array member is a row
For Each cj In cr.jObject.children
Set r = r.Offset(1)
' each child is a column
For Each job In cj.children
r.Offset(, .headingRow.exists(job.key).column - 1).value = job.value
Next job
Next cj
' repopulate
Set scraperWikiStuff = .rePopulate
End With
Application.Calculation = xlCalculationAutomatic
End If
End If
End If
End Function
Private Function swCleanSheet(job As cJobject, ws As String) As cDataSet
' put headers to a clean sheet
Dim ds As New cDataSet, cj As cJobject, r As Range
Set r = firstCell(wholeSheet(ws))
r.Worksheet.Cells.ClearContents
' these are the headings
If job.children.count > 0 Then
For Each cj In job.children
r.Offset(, cj.childIndex - 1).value = cj.key
Next cj
' create a data set
Set swCleanSheet = ds.populateData(r.Resize(1, job.children.count))
End If
End Function
Private Function swGetHeaders(job As cJobject) As cJobject
' take scraper wiki data and generate an organized dataset using the headers found
Dim cj As cJobject, jo As cJobject, cjKeys As New cJobject
With cjKeys.init(Nothing)
For Each cj In job.children
For Each jo In cj.children
' we can use a cjobject as a collection
.add jo.key
Next jo
Next cj
End With
Set swGetHeaders = cjKeys
End Function
@brucemcpherson
Copy link
Author

@brucemcpherson
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment