Skip to content

Instantly share code, notes, and snippets.

@chadfennell
Created July 27, 2017 15:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chadfennell/1bf24f37dccdcdb15e22b7a2bf3695ac to your computer and use it in GitHub Desktop.
Save chadfennell/1bf24f37dccdcdb15e22b7a2bf3695ac to your computer and use it in GitHub Desktop.
ETL Hub Mappings for MPR
{
"dataProvider": {
"origins": [
{
"value": "Minnesota Public Radio"
}
]
},
"format": {
"origins": [
{
"value": " news bulletin"
}
]
},
"identifier": {
"origins": [
{
"path": "guid[last()]"
}
]
},
"isShownAt": {
"origins": [
{
"path": "uri"
}
]
},
"object": {
"origins": [
{
"path": "image[first()]"
}
]
},
"provider": {
"origins": [
{
"value": "Minnesota Digital Library"
}
]
},
"record_hash": {
"origins": [
{
"path": "guid[last()]"
},
{
"path": "metadata/dc/source"
}
],
"processors": [
{
"args": [
""
],
"process": "join"
},
{
"process": "to_sha1_hex"
}
]
},
"sourceResource/rights": {
"origins": [
{
"value": "http://minnesota.publicradio.org/about/site/terms/"
}
]
},
"sourceResource/creator": {
"origins": [
{
"path": "author"
}
],
"processors": [
{
"process": "compact"
},
{
"process": "nil_if_empty"
}
]
},
"sourceResource/date/begin": {
"origins": [
{
"path": "publish_date[last()]"
}
],
"processors": [
{
"args": [
"-"
],
"process": "split"
},
{
"process": "first"
}
]
},
"sourceResource/date/displayDate": {
"origins": [
{
"path": "publish_date[last()]"
}
]
},
"sourceResource/date/end": {
"origins": [
{
"path": "publish_date[last()]"
}
],
"processors": [
{
"args": [
"-"
],
"process": "split"
},
{
"process": "first"
}
]
},
"sourceResource/description": {
"origins": [
{
"path": "summary"
}
],
"processors": [
{
"process": "strip"
},
{
"args": [
"\\n"
],
"process": "split"
},
{
"args": [
" "
],
"process": "join"
},
{
"args": {
"pattern": "{%.*%}",
"replacement": ""
},
"process": "gsub"
},
{
"args": {
"pattern": "\\s{2,}",
"replacement": ""
},
"process": "gsub"
},
{
"process": "html_decode"
},
{
"process": "strip_html"
},
{
"process": "truncate"
}
]
},
"sourceResource/language": {
"origins": [
{
"value": [
{
"iso639_3": "eng",
"name": "English"
}
]
}
]
},
"sourceResource/publisher": {
"origins": [
{
"path": "metadata/dc/source"
}
]
},
"sourceResource/subject": {
"origins": [
{
"path": "tags"
}
],
"processors": [
{
"args": [
"; "
],
"process": "split"
},
{
"process": "flatten"
},
{
"process": "unique"
},
{
"process": "strip"
},
{
"args": {
"pattern": "\\s--\\s",
"replacement": "--"
},
"process": "gsub"
},
{
"args": "name",
"process": "apply_label"
}
]
},
"sourceResource/title": {
"origins": [
{
"path": "title"
}
],
"processors": [
{
"process": "html_decode"
},
{
"process": "strip_html"
}
],
"record_filters": [
{
"run": "after_processors",
"reject_unless": "has_value?"
}
]
},
"title": {
"origins": [
{
"path": "title"
}
],
"processors": [
{
"process": "html_decode"
},
{
"process": "strip_html"
}
]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment