Last active
July 2, 2016 14:10
-
-
Save timtomch/9df3bb7026393ccdf1432e4aaed87789 to your computer and use it in GitHub Desktop.
Queries the swissbib SRU within OpenRefine - see http://make.opendata.ch/wiki/project:hds_out_of_the_box#openrefine_workflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"op": "core/column-addition", | |
"description": "Create column query string at index 5 based on column NOTICE - PUB using expression grel:join(with(value.split(\" \"),a,forEach(a,v,v.chomp(\",\").match(/([a-zA-Z\\u00C0-\\u017F-']{4,}|\\d{4})/)[0])),\" \")+\" \"+forNonBlank(cells[\"NOTICE - AUT\"],v,v.value.match(/(.* |)(\\w{2,})/)[1],\" \")", | |
"engineConfig": { | |
"mode": "row-based", | |
"facets": [ | |
{ | |
"mode": "text", | |
"caseSensitive": false, | |
"query": "lausanne", | |
"name": "query string year", | |
"type": "text", | |
"columnName": "query string year" | |
}, | |
{ | |
"omitError": false, | |
"expression": "isBlank(value)", | |
"selectBlank": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": false, | |
"l": "false" | |
} | |
} | |
], | |
"selectError": false, | |
"invert": false, | |
"name": "NOTICE - CONTRIB", | |
"omitBlank": false, | |
"type": "list", | |
"columnName": "NOTICE - CONTRIB" | |
} | |
] | |
}, | |
"newColumnName": "query string", | |
"columnInsertIndex": 5, | |
"baseColumnName": "NOTICE - PUB", | |
"expression": "grel:join(with(value.split(\" \"),a,forEach(a,v,v.chomp(\",\").match(/([a-zA-Z\\u00C0-\\u017F-']{4,}|\\d{4})/)[0])),\" \")+\" \"+forNonBlank(cells[\"NOTICE - AUT\"],v,v.value.match(/(.* |)(\\w{2,})/)[1],\" \")", | |
"onError": "set-to-blank" | |
}, | |
{ | |
"op": "core/column-addition-by-fetching-urls", | |
"description": "Create column swissbib dc data at index 6 by fetching URLs based on column query string using expression grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asru%2Fschema%2F1%2Fdc-v1.1-light&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))", | |
"engineConfig": { | |
"mode": "row-based", | |
"facets": [ | |
{ | |
"mode": "text", | |
"caseSensitive": false, | |
"query": "lausanne", | |
"name": "query string", | |
"type": "text", | |
"columnName": "query string" | |
}, | |
{ | |
"omitError": false, | |
"expression": "isBlank(value)", | |
"selectBlank": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": false, | |
"l": "false" | |
} | |
} | |
], | |
"selectError": false, | |
"invert": false, | |
"name": "NOTICE - CONTRIB", | |
"omitBlank": false, | |
"type": "list", | |
"columnName": "NOTICE - CONTRIB" | |
} | |
] | |
}, | |
"newColumnName": "swissbib dc data", | |
"columnInsertIndex": 6, | |
"baseColumnName": "query string", | |
"urlExpression": "grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asru%2Fschema%2F1%2Fdc-v1.1-light&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))", | |
"onError": "set-to-blank", | |
"delay": 100 | |
}, | |
{ | |
"op": "core/column-addition-by-fetching-urls", | |
"description": "Create column swissbib json data at index 6 by fetching URLs based on column query string using expression grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asru%2Fschema%2Fjson&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))", | |
"engineConfig": { | |
"mode": "row-based", | |
"facets": [ | |
{ | |
"mode": "text", | |
"caseSensitive": false, | |
"query": "lausanne", | |
"name": "query string", | |
"type": "text", | |
"columnName": "query string" | |
}, | |
{ | |
"omitError": false, | |
"expression": "isBlank(value)", | |
"selectBlank": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": false, | |
"l": "false" | |
} | |
} | |
], | |
"selectError": false, | |
"invert": false, | |
"name": "NOTICE - CONTRIB", | |
"omitBlank": false, | |
"type": "list", | |
"columnName": "NOTICE - CONTRIB" | |
} | |
] | |
}, | |
"newColumnName": "swissbib json data", | |
"columnInsertIndex": 6, | |
"baseColumnName": "query string", | |
"urlExpression": "grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asru%2Fschema%2Fjson&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))", | |
"onError": "set-to-blank", | |
"delay": 100 | |
}, | |
{ | |
"op": "core/column-addition-by-fetching-urls", | |
"description": "Create column swissbib marcxml data at index 6 by fetching URLs based on column query string using expression grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asrw%2Fschema%2F1%2Fmarcxml-v1.1-light&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))", | |
"engineConfig": { | |
"mode": "row-based", | |
"facets": [ | |
{ | |
"mode": "text", | |
"caseSensitive": false, | |
"query": "lausanne", | |
"name": "query string", | |
"type": "text", | |
"columnName": "query string" | |
}, | |
{ | |
"omitError": false, | |
"expression": "isBlank(value)", | |
"selectBlank": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": false, | |
"l": "false" | |
} | |
} | |
], | |
"selectError": false, | |
"invert": false, | |
"name": "NOTICE - CONTRIB", | |
"omitBlank": false, | |
"type": "list", | |
"columnName": "NOTICE - CONTRIB" | |
} | |
] | |
}, | |
"newColumnName": "swissbib marcxml data", | |
"columnInsertIndex": 6, | |
"baseColumnName": "query string", | |
"urlExpression": "grel:replace(\"http://sru.swissbib.ch/sru/search/defaultdb?query=+dc.anywhere+%3D+{QUERY}&operation=searchRetrieve&recordSchema=info%3Asrw%2Fschema%2F1%2Fmarcxml-v1.1-light&maximumRecords=10&startRecord=0&recordPacking=XML&availableDBs=defaultdb&sortKeys=Submit+query\", \"{QUERY}\", escape(replace(value,/[\\.\\']/,\"\"),'url'))", | |
"onError": "set-to-blank", | |
"delay": 100 | |
}, | |
{ | |
"op": "core/column-addition", | |
"description": "Create column title match at index 9 based on column swissbib dc data using expression grel:if(value.parseHtml().select(\"numberOfRecords\")[0].htmlText().toNumber()>0,value.parseHtml().select(\"recordData\")[0].select(\"dc|title\")[0].htmlText(), null)", | |
"engineConfig": { | |
"mode": "row-based", | |
"facets": [ | |
{ | |
"mode": "text", | |
"caseSensitive": false, | |
"query": "lausanne", | |
"name": "query string", | |
"type": "text", | |
"columnName": "query string" | |
}, | |
{ | |
"omitError": false, | |
"expression": "isBlank(value)", | |
"selectBlank": false, | |
"selection": [ | |
{ | |
"v": { | |
"v": false, | |
"l": "false" | |
} | |
} | |
], | |
"selectError": false, | |
"invert": false, | |
"name": "NOTICE - CONTRIB", | |
"omitBlank": false, | |
"type": "list", | |
"columnName": "NOTICE - CONTRIB" | |
} | |
] | |
}, | |
"newColumnName": "title match", | |
"columnInsertIndex": 9, | |
"baseColumnName": "swissbib dc data", | |
"expression": "grel:if(value.parseHtml().select(\"numberOfRecords\")[0].htmlText().toNumber()>0,value.parseHtml().select(\"recordData\")[0].select(\"dc|title\")[0].htmlText(), null)", | |
"onError": "set-to-blank" | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment