Skip to content

Instantly share code, notes, and snippets.

@timtomch
Last active July 2, 2016 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timtomch/9df3bb7026393ccdf1432e4aaed87789 to your computer and use it in GitHub Desktop.
Save timtomch/9df3bb7026393ccdf1432e4aaed87789 to your computer and use it in GitHub Desktop.
Queries the swissbib SRU within OpenRefine - see http://make.opendata.ch/wiki/project:hds_out_of_the_box#openrefine_workflow
[
{
"op": "core/column-addition",
"description": "Create column query string at index 5 based on column NOTICE - PUB using expression grel:join(with(value.split(\" \"),a,forEach(a,v,v.chomp(\",\").match(/([a-zA-Z\\u00C0-\\u017F-']{4,}|\\d{4})/)[0])),\" \")+\" \"+forNonBlank(cells[\"NOTICE - AUT\"],v,v.value.match(/(.* |)(\\w{2,})/)[1],\" \")",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "lausanne",
"name": "query string year",
"type": "text",
"columnName": "query string year"
},
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "NOTICE - CONTRIB",
"omitBlank": false,
"type": "list",
"columnName": "NOTICE - CONTRIB"
}
]
},
"newColumnName": "query string",
"columnInsertIndex": 5,
"baseColumnName": "NOTICE - PUB",
"expression": "grel:join(with(value.split(\" \"),a,forEach(a,v,v.chomp(\",\").match(/([a-zA-Z\\u00C0-\\u017F-']{4,}|\\d{4})/)[0])),\" \")+\" \"+forNonBlank(cells[\"NOTICE - AUT\"],v,v.value.match(/(.* |)(\\w{2,})/)[1],\" \")",
"onError": "set-to-blank"
},
{
"op": "core/column-addition-by-fetching-urls",
"description": "Create column swissbib dc data at index 6 by fetching URLs based on column query string using expression grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asru%2Fschema%2F1%2Fdc-v1.1-light&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "lausanne",
"name": "query string",
"type": "text",
"columnName": "query string"
},
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "NOTICE - CONTRIB",
"omitBlank": false,
"type": "list",
"columnName": "NOTICE - CONTRIB"
}
]
},
"newColumnName": "swissbib dc data",
"columnInsertIndex": 6,
"baseColumnName": "query string",
"urlExpression": "grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asru%2Fschema%2F1%2Fdc-v1.1-light&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))",
"onError": "set-to-blank",
"delay": 100
},
{
"op": "core/column-addition-by-fetching-urls",
"description": "Create column swissbib json data at index 6 by fetching URLs based on column query string using expression grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asru%2Fschema%2Fjson&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "lausanne",
"name": "query string",
"type": "text",
"columnName": "query string"
},
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "NOTICE - CONTRIB",
"omitBlank": false,
"type": "list",
"columnName": "NOTICE - CONTRIB"
}
]
},
"newColumnName": "swissbib json data",
"columnInsertIndex": 6,
"baseColumnName": "query string",
"urlExpression": "grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asru%2Fschema%2Fjson&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))",
"onError": "set-to-blank",
"delay": 100
},
{
"op": "core/column-addition-by-fetching-urls",
"description": "Create column swissbib marcxml data at index 6 by fetching URLs based on column query string using expression grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asrw%2Fschema%2F1%2Fmarcxml-v1.1-light&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "lausanne",
"name": "query string",
"type": "text",
"columnName": "query string"
},
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "NOTICE - CONTRIB",
"omitBlank": false,
"type": "list",
"columnName": "NOTICE - CONTRIB"
}
]
},
"newColumnName": "swissbib marcxml data",
"columnInsertIndex": 6,
"baseColumnName": "query string",
"urlExpression": "grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asrw%2Fschema%2F1%2Fmarcxml-v1.1-light&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))",
"onError": "set-to-blank",
"delay": 100
},
{
"op": "core/column-addition",
"description": "Create column title match at index 9 based on column swissbib dc data using expression grel:if(value.parseHtml().select(\"numberOfRecords\")[0].htmlText().toNumber()>0,value.parseHtml().select(\"recordData\")[0].select(\"dc|title\")[0].htmlText(), null)",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "lausanne",
"name": "query string",
"type": "text",
"columnName": "query string"
},
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "NOTICE - CONTRIB",
"omitBlank": false,
"type": "list",
"columnName": "NOTICE - CONTRIB"
}
]
},
"newColumnName": "title match",
"columnInsertIndex": 9,
"baseColumnName": "swissbib dc data",
"expression": "grel:if(value.parseHtml().select(\"numberOfRecords\")[0].htmlText().toNumber()>0,value.parseHtml().select(\"recordData\")[0].select(\"dc|title\")[0].htmlText(), null)",
"onError": "set-to-blank"
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment