Created
April 7, 2017 17:56
-
-
Save rhhernandes/80f18ea986c25e6ec663a06c6a6e69dd to your computer and use it in GitHub Desktop.
OpenRefine script for cleaning Crowdtangle historical data CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Created using expression value.trim()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Created", | |
"expression": "value.trim()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/column-addition", | |
"description": "Create column Created2 at index 3 based on column Created using expression grel:value", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"newColumnName": "Created2", | |
"columnInsertIndex": 3, | |
"baseColumnName": "Created", | |
"expression": "grel:value", | |
"onError": "set-to-blank" | |
}, | |
{ | |
"op": "core/column-split", | |
"description": "Split column Created2 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Created2", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": " ", | |
"regex": false, | |
"maxColumns": 0 | |
}, | |
{ | |
"op": "core/column-addition", | |
"description": "Create column Date at index 4 based on column Created2 1 using expression grel:cells[\"Created2 1\"].value + \" \" + cells[\"Created2 2\"].value", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"newColumnName": "Date", | |
"columnInsertIndex": 4, | |
"baseColumnName": "Created2 1", | |
"expression": "grel:cells[\"Created2 1\"].value + \" \" + cells[\"Created2 2\"].value", | |
"onError": "set-to-blank" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Created2 1", | |
"columnName": "Created2 1" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Created2 2", | |
"columnName": "Created2 2" | |
}, | |
{ | |
"op": "core/column-removal", | |
"description": "Remove column Created2 3", | |
"columnName": "Created2 3" | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Type using expression value.trim()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Type", | |
"expression": "value.trim()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Message using expression value.replace(/\\s+/,' ')", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Message", | |
"expression": "value.replace(/\\s+/,' ')", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Message using expression value.trim()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Message", | |
"expression": "value.trim()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Link Text using expression value.replace(/\\s+/,' ')", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Link Text", | |
"expression": "value.replace(/\\s+/,' ')", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Link Text using expression value.trim()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Link Text", | |
"expression": "value.trim()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Description using expression value.trim()", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Description", | |
"expression": "value.trim()", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column Description using expression value.replace(/\\s+/,' ')", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Description", | |
"expression": "value.replace(/\\s+/,' ')", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment