Skip to content

Instantly share code, notes, and snippets.

@gloriousgeneralist
Created February 8, 2013 15:52
Show Gist options
  • Save gloriousgeneralist/4739876 to your computer and use it in GitHub Desktop.
Save gloriousgeneralist/4739876 to your computer and use it in GitHub Desktop.
JSON for OpenRefine to transform a CV in APA style into columns and query SHERPA/RoMEO for publisher rights. See this blog post for full details: http://www.gloriousgeneralist.com/2013/02/analyzing-cvs-for-publisher-copyrights-and-self-archiving-with-openrefine/
[
{
"op": "core/column-split",
"description": "Split column Column 1 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": " (",
"regex": false,
"maxColumns": 2
},
{
"op": "core/column-split",
"description": "Split column Column 1 2 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1 2",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "). ",
"regex": false,
"maxColumns": 2
},
{
"op": "core/column-split",
"description": "Split column Column 1 2 2 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1 2 2",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": ". ",
"regex": false,
"maxColumns": 2
},
{
"op": "core/column-split",
"description": "Split column Column 1 2 2 2 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1 2 2 2",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": ", ",
"regex": false,
"maxColumns": 2
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 1 to Date",
"oldColumnName": "Column 1 2 1",
"newColumnName": "Date"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 1 to Author",
"oldColumnName": "Column 1 1",
"newColumnName": "Author"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 2 1 to Article",
"oldColumnName": "Column 1 2 2 1",
"newColumnName": "Article"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 2 2 1 to Journal",
"oldColumnName": "Column 1 2 2 2 1",
"newColumnName": "Journal"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 2 2 2 to Bib",
"oldColumnName": "Column 1 2 2 2 2",
"newColumnName": "Bib"
},
{
"op": "core/column-addition-by-fetching-urls",
"description": "Create column SHERPA at index 4 by fetching URLs based on column Journal using expression grel:'http://www.sherpa.ac.uk/romeo/api29.php?ak=[YOUR API KEY HERE]&qtype=starts&jtitle=' + escape(value,'url')",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"newColumnName": "SHERPA",
"columnInsertIndex": 4,
"baseColumnName": "Journal",
"urlExpression": "grel:'http://www.sherpa.ac.uk/romeo/api29.php?ak=[YOUR API KEY HERE]&qtype=starts&jtitle=' + escape(value,'url')",
"onError": "set-to-blank",
"delay": 1000
},
{
"op": "core/column-addition",
"description": "Create column Prearchiving at index 5 based on column SHERPA using expression grel:value.parseHtml().select(\"prearchiving\")[0].htmlText()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"newColumnName": "Prearchiving",
"columnInsertIndex": 5,
"baseColumnName": "SHERPA",
"expression": "grel:value.parseHtml().select(\"prearchiving\")[0].htmlText()",
"onError": "set-to-blank"
},
{
"op": "core/column-addition",
"description": "Create column Postarchiving at index 5 based on column SHERPA using expression grel:value.parseHtml().select(\"postarchiving\")[0].htmlText()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"newColumnName": "Postarchiving",
"columnInsertIndex": 5,
"baseColumnName": "SHERPA",
"expression": "grel:value.parseHtml().select(\"postarchiving\")[0].htmlText()",
"onError": "set-to-blank"
},
{
"op": "core/column-addition",
"description": "Create column Conditions at index 5 based on column SHERPA using expression grel:forEach(value.parseHtml().select(\"condition\"),v,v.htmlText()).join(\". \")\"",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"newColumnName": "Conditions",
"columnInsertIndex": 5,
"baseColumnName": "SHERPA",
"expression": "grel:forEach(value.parseHtml().select(\"condition\"),v,v.htmlText()).join(\". \")\"",
"onError": "set-to-blank"
},
{
"op": "core/column-removal",
"description": "Remove column SHERPA",
"columnName": "SHERPA"
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment