Created
September 5, 2012 15:10
-
-
Save brucemcpherson/3638087 to your computer and use it in GitHub Desktop.
scraper wiki code for google apps script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** @description | |
* get data from scraperwiki into google apps script | |
* See http://ramblings.mcpher.com/Home/excelquirks/codeuse for more details | |
* @author <a href="mailto:bruce@mcpher.com">Bruce McPherson</a><a href="http://ramblings.mcpher.com"> ramblings.mcpher.com</a> | |
*/ | |
/** | |
* swSeewhatworks see which scraperwikis have tables and update a list (as returned by rest entry scraperwiki) with default sql | |
* @param {string} ws the worksheet name with the scraperwiki list of shortnames | |
* @return {void} null | |
*/ | |
function swSeewhatworks(ws) { | |
var ds = new cDataSet().populateData (wholeSheet(ws), | |
undefined,undefined ,undefined ,undefined , undefined, true); | |
var cache = sheetCache(ds.headingRow().where()); | |
ds.rows().forEach( | |
function (dr) { | |
cache.setValue(swGetDefaultTableSql(dr.cell("short_name").toString(), false), | |
dr.where().getRow(), dr.columns().count()+1 ); | |
} | |
); | |
cache.close(); | |
} | |
/** | |
* swGetTables return the cRest result of query for table names | |
* @param {string} shortName the scraperWiki key | |
* @return {cRest} the result of the query for table names | |
*/ | |
function swGetTables(shortName){ | |
var tableDirectory = "SELECT name FROM sqlite_master " + | |
"WHERE type IN ('table','view') AND name NOT LIKE 'sqlite_%' " + | |
"Union all " + | |
"SELECT name FROM sqlite_temp_master " + | |
"WHERE type IN ('table','view') " + | |
"ORDER BY 1"; | |
return restQuery(undefined, "scraperwikidata", | |
shortName + "&query=" + tableDirectory,undefined ,undefined ,undefined ,undefined , false); | |
} | |
/** | |
* swGetDefaultTableSql look up to see what tables are defined in a given scraperwiki and return sql to get the first one | |
* @param {string} shortName the scraperWiki key | |
* @param {boolean} optComplain whether to complain if there is a problem | |
* @return {string} the sql query to get data from first table | |
*/ | |
function swGetDefaultTableSql(shortName, optComplain){ | |
var complain = fixOptional (optComplain,true); | |
var cr = swGetTables(shortName); | |
if (!cr) { | |
MsgBox ("could get no info on " + shortName); | |
} | |
else { | |
var job = cr.jObjects().count() ? cr.jObjects().item(1) : null; | |
if (job && job.hasChildren()) { | |
// this is hokey - for the moment just take from the first table found | |
return "select * from '" + | |
job.children(1).child("name").toString() + "'"; | |
} | |
else { | |
DebugPrint(shortName," did not return any tables: got this:", cr.responseData()); | |
if (complain) MsgBox ("could not find any valid tables for " + | |
shortName + "(" + (job ? job.serialize() : "no data") + ")") | |
} | |
} | |
return ""; | |
} | |
/** | |
* swCleanSheet create a clean results sheet with column headings | |
* @param {cJobject} job contains the list of columns headings as keys | |
* @param {string} ws the worksheet name to populate | |
* @return {cDataSet} the dataset with the headings populated | |
*/ | |
function swCleanSheet(job, ws) { | |
// put headers to a clean sheet | |
var ds = null; | |
var cache = sheetCache(ws); | |
cache.clear(); | |
if (job.hasChildren()) { | |
job.children().forEach( | |
function (cj,n) { | |
cache.setValue(cj.key(),1,n); | |
} | |
); | |
ds= new cDataSet().populateData( vResize (wholeSheet(ws), 1, job.children().count())); | |
} | |
cache.commit(); | |
return ds; | |
} | |
/** | |
* swGetHeaders organize what headers are needed given the scraperWIki response | |
* @param {cJobject} job contains the query response | |
* @return {cJobject} a jobject with a list of keys for column headings | |
*/ | |
function swGetHeaders(job) { | |
// take scraper wiki data and generate an organized dataset using the headers found | |
var cjKeys = new cJobject().init(null); | |
job.children().forEach( | |
function(cj) { | |
cj.children().forEach( | |
function (jo) { | |
cjKeys.add(jo.key()); | |
} | |
); | |
} | |
); | |
return cjKeys; | |
} | |
/** | |
* scraperWikiStuff do the query and populate the data | |
* @param {string} shortName the scraperwiki key | |
* @param {string} ws the worksheet name to populate | |
* @param {string} optSql the optional sql string to get the data | |
* @param {number} optLimit the optional limit to number of rows to get | |
* @return {cDataSet} the finished data | |
*/ | |
function scraperWikiStuff(shortName, ws , optSql, optLimit) { | |
// sort out the optional args | |
var sql = fixOptional (optSql, swGetDefaultTableSql(shortName)); | |
var limit = IsMissing(optLimit) ? "" : "&limit=" + CStr(optLimit); | |
var ds = null; | |
// get the data | |
var cr = restQuery(undefined, "scraperwikidata", | |
shortName + "&query=" + sql + limit,undefined ,undefined ,undefined ,undefined , false); | |
//now organize it | |
if(cr) { | |
// get the unique headers and put them to a clean data set | |
var crj = cr.jObject(); | |
var headJob = swGetHeaders(crj); | |
if (!headJob) { | |
MsgBox ("didnt work at all " + crj.serialize()) | |
} | |
else { | |
ds = swCleanSheet(headJob, ws); | |
if (!ds) { | |
MsgBox ("failed to get the expected data " & crj.serialize()) | |
} | |
else { | |
var cache = sheetCache(ds.headingRow().where()); | |
var r = ds.headingRow().where().getRow(); | |
// we know how big the cache needs to be so do it once off | |
cache.extend(crj.children().count()+1, ds.columns().count()); | |
// this is the data returned - each array member is a row | |
crj.children().forEach( | |
function (cj,rIndex) { | |
cj.children().forEach ( | |
function (job,cIndex) { | |
cache.setValue(job.value(), r + rIndex, cIndex); | |
} | |
); | |
} | |
); | |
cache.close(); | |
} | |
} | |
} | |
return ds; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment