This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Not Recommended Approach: Scan the entire table and rebuild everyday | |
INSERT OVERWRITE TABLE dim_total_bookings PARTITION (ds = '{{ ds }}') | |
SELECT | |
dim_market | |
, SUM(m_bookings) AS m_bookings | |
FROM | |
fct_bookings | |
WHERE | |
ds <= '{{ ds }}' -- this is expensive, and can quickly run into scale issue | |
GROUP BY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
transforms = [] | |
transforms.append( | |
('select_binary', ColumnSelector(features=binary)) | |
) | |
transforms.append( | |
('numeric', ExtendedPipeline([ | |
('select', ColumnSelector(features=numeric)), | |
('impute', Imputer(missing_values='NaN', strategy='mean', axis=0)), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A DAG definition file in Airflow, written in Python. | |
""" | |
from datetime import datetime, timedelta | |
from airflow.models import DAG # Import the DAG class | |
from airflow.operators.bash_operator import BashOperator | |
from airflow.operators.sensors import TimeDeltaSensor | |
default_args = { | |
'owner': 'you', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import google_auth_oauthlib.flow | |
import googleapiclient.discovery | |
import googleapiclient.errors | |
import matplotlib | |
scopes = ["https://www.googleapis.com/auth/youtube.readonly"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE IF NOT EXISTS fct_bookings ( | |
id_listing BIGINT COMMENT 'Unique ID of the listing' | |
, id_host BIGINT COMMENT 'Unique ID of the host who owns the listing' | |
, m_bookings BIGINT COMMENT 'Denoted 1 if a booking transaction occurred' | |
) | |
PARTITION BY ( -- this is how we define partition keys | |
ds STRING | |
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{%- if backfill %} | |
INSERT OVERWRITE TABLE bookings_summary PARTITION (ds) | |
{%- else %} | |
INSERT OVERWRITE TABLE bookings_summary PARTITION (ds = '{{ ds }}') | |
{%- endif %} | |
SELECT | |
dim_market | |
, SUM(m_bookings) AS m_bookings | |
{%- if backfill %} | |
, ds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define the CREATE TABLE statement here | |
{%- macro create_table() %} | |
... | |
{%- endmacro %} | |
# Main ETL logic, insert the results into a STAGING table | |
{%- macro main() %} | |
... | |
{%- endmacro %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
b.dim_market | |
, SUM(a.m_bookings) AS m_bookings | |
FROM ( | |
SELECT | |
id_listing | |
, 1 AS m_bookings | |
, m_a # not used (for illustration only) | |
, m_b # not used (for illustration only) | |
, m_c # not used (for illustration only) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A DAG docstring might be a good way to explain at a high level | |
what problem space the DAG is looking at. | |
Links to design documents, upstream dependencies etc | |
are highly recommended. | |
""" | |
from datetime import datetime, timedelta | |
from airflow.models import DAG # Import the DAG class | |
from airflow.operators.sensors import NamedHivePartitionSensor | |
from airflow.operators.hive_operator import HiveOperator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# API endpoints | |
@webapp.route('/api/all') | |
def api_all(): | |
events = Events.query.all() | |
return jsonify(json_list = [event.serialize() for event in events]) | |
@webapp.route('/api/<event_type>') | |
def api_by_event_type(event_type): | |
events = Events.query.filter_by(event_type = event_type).all() | |
return jsonify(json_list = [event.serialize() for event in events]) |
NewerOlder