Instantly share code, notes, and snippets.
Created
March 7, 2013 09:25
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save gauden/5106729 to your computer and use it in GitHub Desktop.
OpenRefine Script to Extract Authors and EntrezUIDs from a PubMed CSV file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column URL", | |
"columnName": "URL" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Details", | |
"columnName": "Details" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column ShortDetails", | |
"columnName": "ShortDetails" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Resource", | |
"columnName": "Resource" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Type", | |
"columnName": "Type" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Identifiers", | |
"columnName": "Identifiers" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Db", | |
"columnName": "Db" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Properties", | |
"columnName": "Properties" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Column 12", | |
"columnName": "Column 12" | |
}, | |
{ | |
"op": "core/row-removal", | |
"description": "Remove rows", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"query": "Descri", | |
"name": "Description", | |
"caseSensitive": false, | |
"columnName": "Description", | |
"type": "text", | |
"mode": "text" | |
} | |
], | |
"mode": "row-based" | |
} | |
}, | |
{ | |
"op": "core/multivalued-cell-split", | |
"description": "Split multi-valued cells in column Description", | |
"columnName": "Description", | |
"keyColumnName": "Description", | |
"separator": ",", | |
"mode": "plain" | |
}, | |
{ | |
"op": "core/multivalued-cell-split", | |
"description": "Split multi-valued cells in column Description", | |
"columnName": "Description", | |
"keyColumnName": "Description", | |
"separator": ";", | |
"mode": "plain" | |
}, | |
{ | |
"op": "core/fill-down", | |
"description": "Fill down cells in column EntrezUID", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "EntrezUID" | |
}, | |
{ | |
"op": "core/mass-edit", | |
"description": "Mass edit cells in column Description", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"query": "marmot", | |
"name": "Description", | |
"caseSensitive": false, | |
"columnName": "Description", | |
"type": "text", | |
"mode": "text" | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "Description", | |
"expression": "value", | |
"edits": [ | |
{ | |
"fromBlank": false, | |
"fromError": false, | |
"from": [ | |
"Marmot MG", | |
"Marmot MG." | |
], | |
"to": "Marmot MG" | |
}, | |
{ | |
"fromBlank": false, | |
"fromError": false, | |
"from": [ | |
"Marmot M", | |
"Marmot M." | |
], | |
"to": "Marmot MG" | |
} | |
] | |
}, | |
{ | |
"op": "core/mass-edit", | |
"description": "Mass edit cells in column Description", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Description", | |
"expression": "value", | |
"edits": [ | |
{ | |
"fromBlank": false, | |
"fromError": false, | |
"from": [ | |
"Kivimaki M", | |
"Kivimäki M", | |
"Kivimäki M.", | |
"Kivimaki M." | |
], | |
"to": "Kivimaki M" | |
}, | |
{ | |
"fromBlank": false, | |
"fromError": false, | |
"from": [ | |
"Ferrie JE", | |
"Ferrie JE." | |
], | |
"to": "Ferrie JE" | |
}, | |
{ | |
"fromBlank": false, | |
"fromError": false, | |
"from": [ | |
"Lawlor DA", | |
"Lawlor DA." | |
], | |
"to": "Lawlor DA" | |
}, | |
{ | |
"fromBlank": false, | |
"fromError": false, | |
"from": [ | |
"Bobak M.", | |
"Bobak M" | |
], | |
"to": "Bobak M" | |
}, | |
{ | |
"fromBlank": false, | |
"fromError": false, | |
"from": [ | |
"Adamkova V", | |
"Adamková V" | |
], | |
"to": "Adamková V" | |
} | |
] | |
}, | |
{ | |
"op": "core/row-removal", | |
"description": "Remove rows", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"query": "et al", | |
"name": "Description", | |
"caseSensitive": false, | |
"columnName": "Description", | |
"type": "text", | |
"mode": "text" | |
} | |
], | |
"mode": "row-based" | |
} | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Description using expression grel:value.substring(0,-1)", | |
"engineConfig": { | |
"facets": [ | |
{ | |
"query": ".", | |
"name": "Description", | |
"caseSensitive": false, | |
"columnName": "Description", | |
"type": "text", | |
"mode": "text" | |
} | |
], | |
"mode": "row-based" | |
}, | |
"columnName": "Description", | |
"expression": "grel:value.substring(0,-1)", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Description to CoAuthors", | |
"oldColumnName": "Description", | |
"newColumnName": "CoAuthors" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Title", | |
"columnName": "Title" | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment