Skip to content

Instantly share code, notes, and snippets.

@magdmartin
Created October 10, 2014 03:47
Show Gist options
  • Save magdmartin/3df6bd062ae729917eb8 to your computer and use it in GitHub Desktop.
Save magdmartin/3df6bd062ae729917eb8 to your computer and use it in GitHub Desktop.
OpenRefine recipe to parse Apache log
[
{
"op": "core/column-split",
"description": "Split column Column 1 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "-",
"regex": false,
"maxColumns": 2
},
{
"op": "core/column-split",
"description": "Split column Column 1 2 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1 2",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "]",
"regex": false,
"maxColumns": 2
},
{
"op": "core/column-split",
"description": "Split column Column 1 2 1 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1 2 1",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "[",
"regex": false,
"maxColumns": 2
},
{
"op": "core/column-split",
"description": "Split column Column 1 2 2 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1 2 2",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "\"",
"regex": false,
"maxColumns": 0
},
{
"op": "core/column-split",
"description": "Split column Column 1 2 2 3 by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1 2 2 3",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": " ",
"regex": false,
"maxColumns": 0
},
{
"op": "core/column-reorder",
"description": "Reorder columns",
"columnNames": [
"Column 1 1",
"Column 1 2 1 1",
"Column 1 2 1 2",
"Column 1 2 2 2",
"Column 1 2 2 3 2",
"Column 1 2 2 3 3",
"Column 1 2 2 4",
"Column 1 2 2 6"
]
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 1 to client IP address",
"oldColumnName": "Column 1 1",
"newColumnName": " client IP address"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 1 1 to htaccess userid",
"oldColumnName": "Column 1 2 1 1",
"newColumnName": "htaccess userid"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 1 2 to timestamp",
"oldColumnName": "Column 1 2 1 2",
"newColumnName": "timestamp"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 2 2 to request line",
"oldColumnName": "Column 1 2 2 2",
"newColumnName": "request line"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 2 3 3 to object size",
"oldColumnName": "Column 1 2 2 3 3",
"newColumnName": "object size"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 2 4 to HTTP request header",
"oldColumnName": "Column 1 2 2 4",
"newColumnName": "HTTP request header"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 2 3 2 to Status Code",
"oldColumnName": "Column 1 2 2 3 2",
"newColumnName": "Status Code"
},
{
"op": "core/column-rename",
"description": "Rename column Column 1 2 2 6 to User-Agent HTTP request header",
"oldColumnName": "Column 1 2 2 6",
"newColumnName": "User-Agent HTTP request header"
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column timestamp using expression grel:value.replace('Jan','01').replace('Feb','02').replace('Mar','03').replace('Apr','04').replace('May','05').replace('Jun','06').replace('Jui','07').replace('Aug','08').replace('Sep','09').replace('Oct','10').replace('Nov','11').replace('Dec','12').toDate('dd/MM/yyyy:hh:mm:ss')",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "timestamp",
"expression": "grel:value.replace('Jan','01').replace('Feb','02').replace('Mar','03').replace('Apr','04').replace('May','05').replace('Jun','06').replace('Jui','07').replace('Aug','08').replace('Sep','09').replace('Oct','10').replace('Nov','11').replace('Dec','12').toDate('dd/MM/yyyy:hh:mm:ss')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment