Created
February 8, 2013 15:52
-
-
Save gloriousgeneralist/4739876 to your computer and use it in GitHub Desktop.
JSON for OpenRefine to transform a CV in APA style into columns and query SHERPA/RoMEO for publisher rights. See this blog post for full details: http://www.gloriousgeneralist.com/2013/02/analyzing-cvs-for-publisher-copyrights-and-self-archiving-with-openrefine/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": " (", | |
"regex": false, | |
"maxColumns": 2 | |
}, | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 2 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1 2", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": "). ", | |
"regex": false, | |
"maxColumns": 2 | |
}, | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 2 2 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1 2 2", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": ". ", | |
"regex": false, | |
"maxColumns": 2 | |
}, | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 2 2 2 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1 2 2 2", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": ", ", | |
"regex": false, | |
"maxColumns": 2 | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 1 to Date", | |
"oldColumnName": "Column 1 2 1", | |
"newColumnName": "Date" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 1 to Author", | |
"oldColumnName": "Column 1 1", | |
"newColumnName": "Author" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 2 1 to Article", | |
"oldColumnName": "Column 1 2 2 1", | |
"newColumnName": "Article" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 2 2 1 to Journal", | |
"oldColumnName": "Column 1 2 2 2 1", | |
"newColumnName": "Journal" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 2 2 2 to Bib", | |
"oldColumnName": "Column 1 2 2 2 2", | |
"newColumnName": "Bib" | |
}, | |
{ | |
"op": "core/column-addition-by-fetching-urls", | |
"description": "Create column SHERPA at index 4 by fetching URLs based on column Journal using expression grel:'http://www.sherpa.ac.uk/romeo/api29.php?ak=[YOUR API KEY HERE]&qtype=starts&jtitle=' + escape(value,'url')", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"newColumnName": "SHERPA", | |
"columnInsertIndex": 4, | |
"baseColumnName": "Journal", | |
"urlExpression": "grel:'http://www.sherpa.ac.uk/romeo/api29.php?ak=[YOUR API KEY HERE]&qtype=starts&jtitle=' + escape(value,'url')", | |
"onError": "set-to-blank", | |
"delay": 1000 | |
}, | |
{ | |
"op": "core/column-addition", | |
"description": "Create column Prearchiving at index 5 based on column SHERPA using expression grel:value.parseHtml().select(\"prearchiving\")[0].htmlText()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"newColumnName": "Prearchiving", | |
"columnInsertIndex": 5, | |
"baseColumnName": "SHERPA", | |
"expression": "grel:value.parseHtml().select(\"prearchiving\")[0].htmlText()", | |
"onError": "set-to-blank" | |
}, | |
{ | |
"op": "core/column-addition", | |
"description": "Create column Postarchiving at index 5 based on column SHERPA using expression grel:value.parseHtml().select(\"postarchiving\")[0].htmlText()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"newColumnName": "Postarchiving", | |
"columnInsertIndex": 5, | |
"baseColumnName": "SHERPA", | |
"expression": "grel:value.parseHtml().select(\"postarchiving\")[0].htmlText()", | |
"onError": "set-to-blank" | |
}, | |
{ | |
"op": "core/column-addition", | |
"description": "Create column Conditions at index 5 based on column SHERPA using expression grel:forEach(value.parseHtml().select(\"condition\"),v,v.htmlText()).join(\". \")\"", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"newColumnName": "Conditions", | |
"columnInsertIndex": 5, | |
"baseColumnName": "SHERPA", | |
"expression": "grel:forEach(value.parseHtml().select(\"condition\"),v,v.htmlText()).join(\". \")\"", | |
"onError": "set-to-blank" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column SHERPA", | |
"columnName": "SHERPA" | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment