Skip to content

Instantly share code, notes, and snippets.

@rafburzy
Created March 19, 2022 10:41
Show Gist options
  • Save rafburzy/2f4dec480ec3c7e315fb8cb6af559e2b to your computer and use it in GitHub Desktop.
Save rafburzy/2f4dec480ec3c7e315fb8cb6af559e2b to your computer and use it in GitHub Desktop.
SQL queries for Bike Sharing Data Analysis Project
-- Queries used on bike sharing ride data
-- query used to check if there are no empty records
SELECT
COUNT(*) AS empty_records
FROM
`cyclisticdataanalysis-344115.usage_table.*`
WHERE
started_at IS NULL OR ended_at IS NULL OR ride_id IS NULL
-------------------------------------------------------------------
-- query used to check if there are no duplicates
SELECT
COUNT(DISTINCT(ride_id)) AS distinct_entries,
COUNT(*) AS all_rows
FROM
`cyclisticdataanalysis-344115.usage_table.*`
-------------------------------------------------------------------
-- query used to calculate the ride length and day of week of a trip
-- this query uses temporary table to link rides per month tables into one
WITH all_rides_table AS
(
SELECT
ride_id,
rideable_type,
started_at,
ended_at,
member_casual
FROM
`cyclisticdataanalysis-344115.usage_table.*`
)
SELECT
ride_id,
started_at,
ended_at,
member_casual,
ended_at - started_at AS ride_length,
EXTRACT(DAYOFWEEK FROM started_at) AS start_day -- Sunday as day 1
FROM
all_rides_table
-------------------------------------------------------------------
-- query used to calculate mean and max trip lengths by year and month
WITH all_rides_table AS
(
SELECT
ride_id,
rideable_type,
started_at,
ended_at,
member_casual
FROM
`cyclisticdataanalysis-344115.usage_table.*`
)
SELECT
member_casual,
EXTRACT(YEAR FROM started_at) AS year,
EXTRACT(MONTH FROM started_at) AS month,
AVG(ended_at - started_at) AS avg_ride_length,
MAX(ended_at - started_at) AS max_ride_length,
MIN(ended_at - started_at) AS min_ride_length
FROM
all_rides_table
GROUP BY
member_casual,
year,
month
ORDER BY
year,
month
-- addition of a condition to a temporary table for a cut off at certain ride length
WITH all_rides_table AS
(
SELECT
ride_id,
rideable_type,
started_at,
ended_at,
member_casual
FROM
`cyclisticdataanalysis-344115.usage_table.*`
WHERE
(ended_at - started_at) < MAKE_INTERVAL(hour => 100)
)
SELECT
member_casual,
EXTRACT(YEAR FROM started_at) AS year,
EXTRACT(MONTH FROM started_at) AS month,
AVG(ended_at - started_at) AS avg_ride_length,
MAX(ended_at - started_at) AS max_ride_length,
MIN(ended_at - started_at) AS min_ride_length
FROM
all_rides_table
GROUP BY
member_casual,
year,
month
ORDER BY
year,
month
-- query used to calculate mean and max trip lengths by day of week
WITH all_rides_table AS
(
SELECT
ride_id,
rideable_type,
started_at,
ended_at,
member_casual
FROM
`cyclisticdataanalysis-344115.usage_table.*`
)
SELECT
member_casual,
EXTRACT(DAYOFWEEK FROM started_at) AS start_day,
AVG(ended_at - started_at) AS avg_ride_length,
MAX(ended_at - started_at) AS max_ride_length,
MIN(ended_at - started_at) AS min_ride_length
FROM
all_rides_table
GROUP BY
member_casual,
start_day
ORDER BY
start_day
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment