Skip to content

Instantly share code, notes, and snippets.

@gauden
Created March 7, 2013 09:25
Show Gist options
  • Save gauden/5106729 to your computer and use it in GitHub Desktop.
Save gauden/5106729 to your computer and use it in GitHub Desktop.
OpenRefine Script to Extract Authors and EntrezUIDs from a PubMed CSV file
[
{
"op": "core/column-removal",
"description": "Remove column URL",
"columnName": "URL"
},
{
"op": "core/column-removal",
"description": "Remove column Details",
"columnName": "Details"
},
{
"op": "core/column-removal",
"description": "Remove column ShortDetails",
"columnName": "ShortDetails"
},
{
"op": "core/column-removal",
"description": "Remove column Resource",
"columnName": "Resource"
},
{
"op": "core/column-removal",
"description": "Remove column Type",
"columnName": "Type"
},
{
"op": "core/column-removal",
"description": "Remove column Identifiers",
"columnName": "Identifiers"
},
{
"op": "core/column-removal",
"description": "Remove column Db",
"columnName": "Db"
},
{
"op": "core/column-removal",
"description": "Remove column Properties",
"columnName": "Properties"
},
{
"op": "core/column-removal",
"description": "Remove column Column 12",
"columnName": "Column 12"
},
{
"op": "core/row-removal",
"description": "Remove rows",
"engineConfig": {
"facets": [
{
"query": "Descri",
"name": "Description",
"caseSensitive": false,
"columnName": "Description",
"type": "text",
"mode": "text"
}
],
"mode": "row-based"
}
},
{
"op": "core/multivalued-cell-split",
"description": "Split multi-valued cells in column Description",
"columnName": "Description",
"keyColumnName": "Description",
"separator": ",",
"mode": "plain"
},
{
"op": "core/multivalued-cell-split",
"description": "Split multi-valued cells in column Description",
"columnName": "Description",
"keyColumnName": "Description",
"separator": ";",
"mode": "plain"
},
{
"op": "core/fill-down",
"description": "Fill down cells in column EntrezUID",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "EntrezUID"
},
{
"op": "core/mass-edit",
"description": "Mass edit cells in column Description",
"engineConfig": {
"facets": [
{
"query": "marmot",
"name": "Description",
"caseSensitive": false,
"columnName": "Description",
"type": "text",
"mode": "text"
}
],
"mode": "row-based"
},
"columnName": "Description",
"expression": "value",
"edits": [
{
"fromBlank": false,
"fromError": false,
"from": [
"Marmot MG",
"Marmot MG."
],
"to": "Marmot MG"
},
{
"fromBlank": false,
"fromError": false,
"from": [
"Marmot M",
"Marmot M."
],
"to": "Marmot MG"
}
]
},
{
"op": "core/mass-edit",
"description": "Mass edit cells in column Description",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Description",
"expression": "value",
"edits": [
{
"fromBlank": false,
"fromError": false,
"from": [
"Kivimaki M",
"Kivimäki M",
"Kivimäki M.",
"Kivimaki M."
],
"to": "Kivimaki M"
},
{
"fromBlank": false,
"fromError": false,
"from": [
"Ferrie JE",
"Ferrie JE."
],
"to": "Ferrie JE"
},
{
"fromBlank": false,
"fromError": false,
"from": [
"Lawlor DA",
"Lawlor DA."
],
"to": "Lawlor DA"
},
{
"fromBlank": false,
"fromError": false,
"from": [
"Bobak M.",
"Bobak M"
],
"to": "Bobak M"
},
{
"fromBlank": false,
"fromError": false,
"from": [
"Adamkova V",
"Adamková V"
],
"to": "Adamková V"
}
]
},
{
"op": "core/row-removal",
"description": "Remove rows",
"engineConfig": {
"facets": [
{
"query": "et al",
"name": "Description",
"caseSensitive": false,
"columnName": "Description",
"type": "text",
"mode": "text"
}
],
"mode": "row-based"
}
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Description using expression grel:value.substring(0,-1)",
"engineConfig": {
"facets": [
{
"query": ".",
"name": "Description",
"caseSensitive": false,
"columnName": "Description",
"type": "text",
"mode": "text"
}
],
"mode": "row-based"
},
"columnName": "Description",
"expression": "grel:value.substring(0,-1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-rename",
"description": "Rename column Description to CoAuthors",
"oldColumnName": "Description",
"newColumnName": "CoAuthors"
},
{
"op": "core/column-removal",
"description": "Remove column Title",
"columnName": "Title"
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment