rasmi/time_series.sql

## time_series.sql
# Copyright 2019 Google LLC.
# SPDX-License-Identifier: Apache-2.0

WITH

# Generate a sample time series from the taxi trips dataset.
sample_time_series AS (
  SELECT
    SUBSTR(taxi_id, 16, 24) AS taxi_id, # Truncate for readability.
    trip_start_timestamp,
    trip_seconds
  FROM
    `bigquery-public-data.chicago_taxi_trips.taxi_trips`
  WHERE
    SUBSTR(taxi_id, 0, 2) = 'cc' # Downsample data based on hash
  ORDER BY
    taxi_id,
    trip_start_timestamp
)

# Predict the trip seconds for the next ride.
# Note that time intervals here are not regular (ex. hourly).
# Some cleanup still required, such as removing next_trip_seconds nulls
# and sequences that are too short.
# Can create a second window for a different level of aggregation for
# time series history.
SELECT
  taxi_id,
  trip_start_timestamp,
  trip_seconds,
  LEAD(trip_seconds, 1) OVER (
    PARTITION BY
      taxi_id
    ORDER BY
      trip_start_timestamp
  ) AS next_trip_seconds,
  # ARRAY_AGG cannot aggreggate nulls.
  ARRAY_AGG(IFNULL(trip_seconds, 0)) OVER (
    PARTITION BY
      taxi_id
    ORDER BY
      # AutoML Tables will treat as an ordered array
      # OFFSET(0) in array should be the oldest element
      trip_start_timestamp
    ROWS BETWEEN 10 PRECEDING AND CURRENT ROW
  ) AS trip_history
FROM
  sample_time_series
	# Copyright 2019 Google LLC.
	# SPDX-License-Identifier: Apache-2.0

	WITH

	# Generate a sample time series from the taxi trips dataset.
	sample_time_series AS (
	SELECT
	SUBSTR(taxi_id, 16, 24) AS taxi_id, # Truncate for readability.
	trip_start_timestamp,
	trip_seconds
	FROM
	`bigquery-public-data.chicago_taxi_trips.taxi_trips`
	WHERE
	SUBSTR(taxi_id, 0, 2) = 'cc' # Downsample data based on hash
	ORDER BY
	taxi_id,
	trip_start_timestamp
	)

	# Predict the trip seconds for the next ride.
	# Note that time intervals here are not regular (ex. hourly).
	# Some cleanup still required, such as removing next_trip_seconds nulls
	# and sequences that are too short.
	# Can create a second window for a different level of aggregation for
	# time series history.
	SELECT
	taxi_id,
	trip_start_timestamp,
	trip_seconds,
	LEAD(trip_seconds, 1) OVER (
	PARTITION BY
	taxi_id
	ORDER BY
	trip_start_timestamp
	) AS next_trip_seconds,
	# ARRAY_AGG cannot aggreggate nulls.
	ARRAY_AGG(IFNULL(trip_seconds, 0)) OVER (
	PARTITION BY
	taxi_id
	ORDER BY
	# AutoML Tables will treat as an ordered array
	# OFFSET(0) in array should be the oldest element
	trip_start_timestamp
	ROWS BETWEEN 10 PRECEDING AND CURRENT ROW
	) AS trip_history
	FROM
	sample_time_series