Skip to content

Instantly share code, notes, and snippets.

@semio
Last active May 20, 2019 06:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save semio/63bdc3414336ed6e0be164e115d04169 to your computer and use it in GitHub Desktop.
Save semio/63bdc3414336ed6e0be164e115d04169 to your computer and use it in GitHub Desktop.
DDF Recipe Example
info:
id: ddf--your_company--oil_per_person
author: your_company
version: v1
license: MIT
language: en
base:
- ddf--gapminder--population
- ddf--gapminder--geo_entity_domain
- ddf--bp--energy
config:
ddf_dir: /Users/semio/src/work/Gapminder
ingredients:
- id: oil-consumption-datapoints
dataset: ddf--bp--energy
key: geo, year
value:
- oil_consumption_tonnes
- id: population-datapoints
dataset: ddf--gapminder--population
key: country, year
value: "*" # note that the * symbol is reserved symbol in yaml,
# we should quote it if we mean a string
- id: bp-geo-entities
dataset: ddf--bp--energy
key: geo
value: "*"
- id: gapminder-country-entities-synonyms
dataset: ddf--gapminder--geo_entity_domain
key: country, synonym
value: "*"
cooking:
datapoints:
# change dimension for bp
- procedure: translate_header
ingredients:
- oil-consumption-datapoints
options:
dictionary:
geo: country
result: oil-consumption-datapoints-translated
# align bp geo to gapminder country
- procedure: translate_column
ingredients:
- bp-geo-entities
result: bp-geo-translated
options:
column: geo_name # the procedure will search for values in this column
target_column: country # ... and put the matched value in this column
dictionary:
base: gapminder-country-entities-synonyms
# key is the columns to search for match of geo names
key: synonym
# value is the column to get new value
value: country
# align bp datapoints to new bp entities
- procedure: translate_column
ingredients:
- oil-consumption-datapoints-translated
result: oil-consumption-datapoints-aligned
options:
column: country
target_column: country
dictionary:
base: bp-geo-translated
key: geo
value: country
# merge bp/gapminder data and calculate the result
- procedure: merge
ingredients:
- oil-consumption-datapoints-aligned
- population-datapoints
result: merged-datapoints
- procedure: run_op
ingredients:
- merged-datapoints
options:
op:
oil_consumption_per_captia: |
oil_consumption_tonnes / population
result: datapoints-final
concepts:
- procedure: extract_concepts
ingredients:
- datapoints-final
- gapminder-country-entities
result: concepts-final
options:
overwrite: # manually set some concept_types
year: time
country: entity_domain
serving:
- id: concepts-final
- id: gapminder-country-entities
- id: datapoints-final
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment