Created
March 19, 2022 10:41
-
-
Save rafburzy/2f4dec480ec3c7e315fb8cb6af559e2b to your computer and use it in GitHub Desktop.
SQL queries for Bike Sharing Data Analysis Project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Queries used on bike sharing ride data | |
-- query used to check if there are no empty records | |
SELECT | |
COUNT(*) AS empty_records | |
FROM | |
`cyclisticdataanalysis-344115.usage_table.*` | |
WHERE | |
started_at IS NULL OR ended_at IS NULL OR ride_id IS NULL | |
------------------------------------------------------------------- | |
-- query used to check if there are no duplicates | |
SELECT | |
COUNT(DISTINCT(ride_id)) AS distinct_entries, | |
COUNT(*) AS all_rows | |
FROM | |
`cyclisticdataanalysis-344115.usage_table.*` | |
------------------------------------------------------------------- | |
-- query used to calculate the ride length and day of week of a trip | |
-- this query uses temporary table to link rides per month tables into one | |
WITH all_rides_table AS | |
( | |
SELECT | |
ride_id, | |
rideable_type, | |
started_at, | |
ended_at, | |
member_casual | |
FROM | |
`cyclisticdataanalysis-344115.usage_table.*` | |
) | |
SELECT | |
ride_id, | |
started_at, | |
ended_at, | |
member_casual, | |
ended_at - started_at AS ride_length, | |
EXTRACT(DAYOFWEEK FROM started_at) AS start_day -- Sunday as day 1 | |
FROM | |
all_rides_table | |
------------------------------------------------------------------- | |
-- query used to calculate mean and max trip lengths by year and month | |
WITH all_rides_table AS | |
( | |
SELECT | |
ride_id, | |
rideable_type, | |
started_at, | |
ended_at, | |
member_casual | |
FROM | |
`cyclisticdataanalysis-344115.usage_table.*` | |
) | |
SELECT | |
member_casual, | |
EXTRACT(YEAR FROM started_at) AS year, | |
EXTRACT(MONTH FROM started_at) AS month, | |
AVG(ended_at - started_at) AS avg_ride_length, | |
MAX(ended_at - started_at) AS max_ride_length, | |
MIN(ended_at - started_at) AS min_ride_length | |
FROM | |
all_rides_table | |
GROUP BY | |
member_casual, | |
year, | |
month | |
ORDER BY | |
year, | |
month | |
-- addition of a condition to a temporary table for a cut off at certain ride length | |
WITH all_rides_table AS | |
( | |
SELECT | |
ride_id, | |
rideable_type, | |
started_at, | |
ended_at, | |
member_casual | |
FROM | |
`cyclisticdataanalysis-344115.usage_table.*` | |
WHERE | |
(ended_at - started_at) < MAKE_INTERVAL(hour => 100) | |
) | |
SELECT | |
member_casual, | |
EXTRACT(YEAR FROM started_at) AS year, | |
EXTRACT(MONTH FROM started_at) AS month, | |
AVG(ended_at - started_at) AS avg_ride_length, | |
MAX(ended_at - started_at) AS max_ride_length, | |
MIN(ended_at - started_at) AS min_ride_length | |
FROM | |
all_rides_table | |
GROUP BY | |
member_casual, | |
year, | |
month | |
ORDER BY | |
year, | |
month | |
-- query used to calculate mean and max trip lengths by day of week | |
WITH all_rides_table AS | |
( | |
SELECT | |
ride_id, | |
rideable_type, | |
started_at, | |
ended_at, | |
member_casual | |
FROM | |
`cyclisticdataanalysis-344115.usage_table.*` | |
) | |
SELECT | |
member_casual, | |
EXTRACT(DAYOFWEEK FROM started_at) AS start_day, | |
AVG(ended_at - started_at) AS avg_ride_length, | |
MAX(ended_at - started_at) AS max_ride_length, | |
MIN(ended_at - started_at) AS min_ride_length | |
FROM | |
all_rides_table | |
GROUP BY | |
member_casual, | |
start_day | |
ORDER BY | |
start_day |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment