Skip to content

Instantly share code, notes, and snippets.

@rhhernandes
Created April 7, 2017 17:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rhhernandes/80f18ea986c25e6ec663a06c6a6e69dd to your computer and use it in GitHub Desktop.
Save rhhernandes/80f18ea986c25e6ec663a06c6a6e69dd to your computer and use it in GitHub Desktop.
OpenRefine script for cleaning Crowdtangle historical data CSV
[
{
"op": "core/text-transform",
"description": "Text transform on cells in column Created using expression value.trim()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Created",
"expression": "value.trim()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-addition",
"description": "Create column Created2 at index 3 based on column Created using expression grel:value",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"newColumnName": "Created2",
"columnInsertIndex": 3,
"baseColumnName": "Created",
"expression": "grel:value",
"onError": "set-to-blank"
},
{
"op": "core/column-split",
"description": "Split column Created2 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Created2",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": " ",
"regex": false,
"maxColumns": 0
},
{
"op": "core/column-addition",
"description": "Create column Date at index 4 based on column Created2 1 using expression grel:cells[\"Created2 1\"].value + \" \" + cells[\"Created2 2\"].value",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"newColumnName": "Date",
"columnInsertIndex": 4,
"baseColumnName": "Created2 1",
"expression": "grel:cells[\"Created2 1\"].value + \" \" + cells[\"Created2 2\"].value",
"onError": "set-to-blank"
},
{
"op": "core/column-removal",
"description": "Remove column Created2 1",
"columnName": "Created2 1"
},
{
"op": "core/column-removal",
"description": "Remove column Created2 2",
"columnName": "Created2 2"
},
{
"op": "core/column-removal",
"description": "Remove column Created2 3",
"columnName": "Created2 3"
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Type using expression value.trim()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Type",
"expression": "value.trim()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Message using expression value.replace(/\\s+/,' ')",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Message",
"expression": "value.replace(/\\s+/,' ')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Message using expression value.trim()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Message",
"expression": "value.trim()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Link Text using expression value.replace(/\\s+/,' ')",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Link Text",
"expression": "value.replace(/\\s+/,' ')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Link Text using expression value.trim()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Link Text",
"expression": "value.trim()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Description using expression value.trim()",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Description",
"expression": "value.trim()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Description using expression value.replace(/\\s+/,' ')",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Description",
"expression": "value.replace(/\\s+/,' ')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment