Skip to content

Instantly share code, notes, and snippets.

@PN1019
Last active September 20, 2021 12:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PN1019/7bdad48f9caeaa5be0edf8d5e2612a79 to your computer and use it in GitHub Desktop.
Save PN1019/7bdad48f9caeaa5be0edf8d5e2612a79 to your computer and use it in GitHub Desktop.
PractiseForSQLCodingRounds with SQL notes,cheatsheet

Given the following two tables:

CREATE TABLE accounts ( account_number int, group_name varchar(28), country varchar(5), account_status varchar(8), registration_date_utc datetime, upfront_price int, unlock_price int, product_name varchar(25));

INSERT INTO accounts (account_number, group_name, country, account_status, registration_date_utc, upfront_price, unlock_price, product_name) VALUES (3311339, 'Radio SHS 60 Easy Buy Siaya', 'Kenya', 'UNLOCKED', '2017-04-01 03:41:44', 2600, 12750, 'Sun King Home 60 Easy Buy'), (1417336, 'Pro Easy Buy Kisumu', 'Kenya', 'ENABLED', '2017-04-01 04:31:10', 1195, 4695, 'Sun King Pro Easy Buy'), (2990935, 'SHS 60 Easy Buy Kakamega', 'Kenya', 'DISABLED', '2017-04-01 04:54:17', 1600, 11750, 'Sun King Home 60 Easy Buy'), (2692440, 'SHS 60 Easy Buy Eldoret', 'Kenya', 'UNLOCKED', '2017-04-01 05:15:24', 1600, 11750, 'Sun King Home 60 Easy Buy'), (2733848, 'SHS 60 Easy Buy Kitui', 'Kenya', 'ENABLED', '2017-04-01 05:22:16', 1600, 11750, 'Sun King Home 60 Easy Buy'), (3866241, 'SHS 60 Easy Buy Eldoret', 'Kenya', 'DISABLED', '2017-04-01 05:48:31', 1600, 11750, 'Sun King Home 60 Easy Buy'), (1143437, 'Pro Easy Buy Kapsabet', 'Kenya', 'UNLOCKED', '2017-04-01 06:11:52', 1195, 4695, 'Sun King Pro Easy Buy'), (3241841, 'SHS 60 Easy Buy Eldoret', 'Kenya', 'DISABLED', '2017-04-01 06:13:57', 1600, 11750, 'Sun King Home 60 Easy Buy'), (32513806,'SHS 60 Easy Buy Kisumu', 'Kenya', 'UNLOCKED', '2017-04-01 06:16:05', 1600, 11750, 'Sun King Home 60 Easy Buy'), (1498930, 'Pro Easy Buy Eldoret', 'Kenya', 'UNLOCKED', '2017-04-01 06:19:02', 1195, 4695, 'Sun King Pro Easy Buy'), (2983633, 'SHS 60 Easy Buy Kakamega', 'Kenya', 'UNLOCKED', '2017-04-01 06:38:53', 1600, 11750, 'Sun King Home 60 Easy Buy'), (3310638, 'SHS 60 Easy Buy Homa Bay', 'Kenya', 'UNLOCKED', '2017-04-01 07:40:54', 1600, 11750, 'Sun King Home 60 Easy Buy'), (1420231, 'Pro Easy Buy Kakamega', 'Kenya', 'UNLOCKED', '2017-04-01 07:50:59', 1195, 4695, 'Sun King Pro Easy Buy'), (3231743, 'SHS 60 Easy Buy Kapsabet', 'Kenya', 'UNLOCKED', '2017-04-01 07:51:10', 1600, 11750, 'Sun King Home 60 Easy Buy'), (3240645, 'SHS 60 Easy Buy Kakamega', 'Kenya', 'UNLOCKED', '2017-04-01 08:07:40', 1600, 11750, 'Sun King Home 60 Easy Buy'), (3477742, 'SHS 60 Easy Buy Bungoma', 'Kenya', 'UNLOCKED', '2017-04-01 08:22:40', 1600, 11750, 'Sun King Home 60 Easy Buy'), (3822343, 'SHS 60 Easy Buy Eldoret', 'Kenya', 'UNLOCKED', '2017-04-01 08:54:02', 1600, 11750, 'Sun King Home 60 Easy Buy')

CREATE TABLE payments (payment_id varchar(9), transaction_date datetime, account_number int, group_name varchar(28), amount int, down_payment varchar(1));

Note: we don't have any key constrains here in table specifically mentioned

INSERT INTO payments (payment_id, transaction_date, account_number, group_name, amount, down_payment) VALUES ('PA685882', '2017-04-22 08:33:10', 3311339, 'Radio SHS 60 Easy Buy Siaya', 100, 'f'), ('PA1072129', '2017-06-13 17:13:42', 3311339, 'Radio SHS 60 Easy Buy Siaya', 100, 'f'), ('PA1123806', '2017-06-19 17:04:07', 3311339, 'Radio SHS 60 Easy Buy Siaya', 50, 'f'), ('PA1360329', '2017-07-13 18:25:52', 3311339, 'Radio SHS 60 Easy Buy Siaya', 50, 'f'), ('PA1403215', '2017-07-17 18:13:10', 3311339, 'Radio SHS 60 Easy Buy Siaya', 100, 'f'), ('PA2000354', '2017-08-30 18:54:44', 3311339, 'Radio SHS 60 Easy Buy Siaya', 50, 'f'), ('PA2092940', '2017-09-05 17:03:24', 3311339, 'Radio SHS 60 Easy Buy Siaya', 100, 'f'), ('PA2312120', '2017-09-18 16:03:54', 3311339, 'Radio SHS 60 Easy Buy Siaya', 50, 'f'), ('PA1553076', '2017-07-30 16:50:45', 2990935, 'SHS 60 Easy Buy Kakamega', 70, 'f'), ('PA2172469', '2017-09-10 16:15:35', 2990935, 'SHS 60 Easy Buy Kakamega', 50, 'f'), ('PA566919', '2017-04-01 05:22:16', 2733848, 'SHS 60 Easy Buy Kitui', 1650, 't'), ('PA1392738', '2017-07-17 06:38:04', 2733848, 'SHS 60 Easy Buy Kitui', 50, 'f'), ('PA1421230', '2017-07-19 15:43:42', 2733848, 'SHS 60 Easy Buy Kitui', 50, 'f'), ('PA1616542', '2017-08-04 14:30:19', 2733848, 'SHS 60 Easy Buy Kitui', 50, 'f'), ('PA1671189', '2017-08-08 14:53:01', 2733848, 'SHS 60 Easy Buy Kitui', 100, 'f'), ('PA733906', '2017-04-29 17:14:40', 1143437, 'Pro Easy Buy Kapsabet', 75, 'f'), ('PA1097075', '2017-06-16 16:24:35', 1143437, 'Pro Easy Buy Kapsabet', 100, 'f'), ('PA1480639', '2017-07-24 16:59:27', 3310638, 'SHS 60 Easy Buy Homa Bay', 100, 'f'), ('PA1592349', '2017-08-02 16:22:36', 3310638, 'SHS 60 Easy Buy Homa Bay', 100, 'f'), ('PA605714', '2017-04-08 11:33:36', 1420231, 'Pro Easy Buy Kakamega', 450, 'f'), ('PA1296011', '2017-07-07 17:35:33', 3231743, 'SHS 60 Easy Buy Kapsabet', 50, 'f'), ('PA1930767', '2017-08-26 15:20:44', 3231743, 'SHS 60 Easy Buy Kapsabet', 50, 'f'), ('PA2489852', '2017-09-27 17:13:26', 3231743, 'SHS 60 Easy Buy Kapsabet', 50, 'f'), ('PA2530419', '2017-09-29 17:26:14', 3231743, 'SHS 60 Easy Buy Kapsabet', 50, 'f'), ('PA746761', '2017-05-01 17:41:31', 3477742, 'SHS 60 Easy Buy Bungoma', 200, 'f'), ('PA784063', '2017-05-07 13:31:15', 3477742, 'SHS 60 Easy Buy Bungoma', 100, 'f'), ('PA965171', '2017-05-31 17:56:39', 3477742, 'SHS 60 Easy Buy Bungoma', 50, 'f') ;

Now Lets the task be

-- /TASK 1. Write a query to list count of unique accounts by group_name/

Query:

SELECT COUNT(DISTINCT account_number) AS Count,group_name FROM accounts WHERE account_number is not null GROUP BY group_name

-- /TASK 2. accounts table has the amount to be paid to unlock every

-- account (unlock_price). And payments table has the amount -- paid by the accounts in every installment.

Write a query to

-- get percentage of total amount paid out of the -- total unlock_price amount for every type of -- product(product_name). There is a one to many relationship -- between accounts and payments /

Query:

SELECT a.account_number, a.product_name, SUM(CASE WHEN amount_total IS NOT NULL THEN amount_total ELSE 0 END) AS amount_total, SUM(unlock_price) AS unlock_price, CAST(COALESCE(SUM(amount_total / unlock_price * 100),0) AS DECIMAL(5,2)) AS covered_percentage FROM ( SELECT account_number, group_name, product_name, CAST(unlock_price AS DECIMAL) AS unlock_price FROM accounts ) AS a LEFT JOIN ( SELECT account_number, group_name, CAST(SUM(amount) AS DECIMAL) AS amount_total FROM payments GROUP BY account_number, group_name ) AS p ON p.account_number = a.account_number AND p.group_name = a.group_name GROUP BY a.product_name, a.account_number, a.group_name ORDER BY covered_percentage DESC

Query Using IsNull:

SELECT a.account_number, a.product_name, isNull(SUM(amount_total),0) AS amount_total, SUM(unlock_price) AS unlock_price, CAST(IsNull(SUM(amount_total / unlock_price * 100),0) AS DECIMAL(5,2)) AS covered_percentage FROM ( SELECT account_number, group_name, product_name, CAST(unlock_price AS DECIMAL) AS unlock_price FROM accounts ) AS a LEFT JOIN ( SELECT account_number, group_name, CAST(SUM(amount) AS DECIMAL) AS amount_total FROM payments GROUP BY account_number, group_name ) AS p ON p.account_number = a.account_number AND p.group_name = a.group_name GROUP BY a.product_name, a.account_number, a.group_name ORDER BY covered_percentage DESC

Query Using Coalesce:

SELECT a.account_number, a.product_name, COALESCE(SUM(amount_total),0) AS amount_total, SUM(unlock_price) AS unlock_price, CAST(COALESCE(SUM(amount_total / unlock_price * 100),0) AS DECIMAL(5,2)) AS covered_percentage FROM ( SELECT account_number, group_name, product_name, CAST(unlock_price AS DECIMAL) AS unlock_price FROM accounts ) AS a LEFT JOIN ( SELECT account_number, group_name, CAST(SUM(amount) AS DECIMAL) AS amount_total FROM payments GROUP BY account_number, group_name ) AS p ON p.account_number = a.account_number AND p.group_name = a.group_name GROUP BY a.product_name, a.account_number, a.group_name ORDER BY covered_percentage DESC

Things To ponder upon here for using Coalesce or isnull:

  • ISNULL Accepts 2 Arguments. COALESCE Accepts a List of Arguments ISNULL(NULL, 1) returned 1 because the first argument is NULL. COALESCE(NULL, 1) returned 1 because 1 is the first non-null value in the list.
  • replacing nulls using coalesce for it’s more compatible. it can also be used inside window function over to achieve e.g. nulls_last.
  • isnull determines the output datatype based on the first argument. Coalesce return data type will change depending on what arguments you provide.in case of CAST
  • isnull will carry 'not null' metadata as long as the second argument is a literal/not null. coalesce result is null-able regardless of the arguments - this changes how "into #table" creates tables, for example.
  • coalesce is a case statement with the memory and cpu usage that entails, ifnull / isnull is for the engines I use, a more optimised statement.

-- /TASK 3. Write a query to calculate the cumulative amount paid with every payment by the accounts along with the %age of the total amount paid i.e (cumulative amount/unlock_price) by the accounts till date /

Query:

SELECT Convert(date,transaction_date,120) As Transaction_Date,a.account_number, a.product_name, SUM(Coalesce(amount_total,0)) AS amount_total, SUM(unlock_price) AS unlock_price, CAST(Coalesce(SUM(SUM(p.amount_total / a.unlock_price * 100))over(partition by a.account_number order by Transaction_Date),0) AS DECIMAL(5,2)) AS covered_percentage FROM ( SELECT account_number, group_name, product_name, CAST(unlock_price AS DECIMAL) AS unlock_price FROM accounts ) AS a LEFT JOIN ( SELECT account_number, group_name,Convert(date,transaction_date,120) As Transaction_Date, CAST(SUM(amount) AS DECIMAL) AS amount_total FROM payments GROUP BY account_number, group_name,Transaction_Date ) AS p ON p.account_number = a.account_number AND p.group_name = a.group_name GROUP BY a.product_name, a.account_number, a.group_name,p.Transaction_Date ORDER BY Transaction_Date DESC,covered_percentage

ImprovisedQuery:

SELECT CASE WHEN transaction_date IS NULL THEN '' ELSE Convert(date,transaction_date,120) END As Transaction_Date,a.account_number, a.product_name, SUM(Coalesce(amount_total,0)) AS amount_total, SUM(unlock_price) AS unlock_price, CAST(Coalesce(SUM(SUM(p.amount_total / a.unlock_price * 100))over(partition by a.account_number order by Transaction_Date),0) AS DECIMAL(5,2)) AS covered_percentage FROM ( SELECT account_number, group_name, product_name, CAST(unlock_price AS DECIMAL) AS unlock_price FROM accounts ) AS a LEFT JOIN ( SELECT account_number, group_name,Convert(date,transaction_date,120) As Transaction_Date, CAST(SUM(amount) AS DECIMAL) AS amount_total FROM payments GROUP BY account_number, group_name,Transaction_Date ) AS p ON p.account_number = a.account_number AND p.group_name = a.group_name GROUP BY a.product_name, a.account_number, a.group_name,p.Transaction_Date ORDER BY Transaction_Date DESC,covered_percentage

Things To Remember here:

- SUM = aggregate function

- SUM OVER != aggregate function

- you need to aggregate inside the window function as well so for example SUM(SUM(MyColumn)) OVER (…)

- The inner sum is aggregate, the outer sum is window function

TASK:

You have two tables:

"List of tracks" with the following columns: Track IDs, Genre, Label that owns rights to the track (note: here are 4 possible labels such as Company A, Company B, Company C, and Company X)

"List of playlists" with the following columns: Track IDs, Playlist IDs, Playlist name, Country of origin (where it was created), Streams

You need to output a table with following columns:

Playlist name

Playlist genre

Number of songs in playlist

Streams

Share of Company X in the playlist

Group/cluster of the playlist (it's up to you which parameters to use to group/cluster playlists based on similarities, you can do it in SQL or use Python or R or any other language to define groups; make any assumptions you need to).

Query:

SELECT LP.playlist_name

,LT.genre

,LP.streams

,COUNT(LP.track_id)

,(x_count/COUNT(LT.label))*100 AS company_x_share

FROM LP

LEFT JOIN LT

ON LP.track_id = LT.track_id

LEFT JOIN (

SELECT LP.playlist_name

,count(LT.label) AS x_count

FROM LP

JOIN LT

ON LP.track_id = LT.track_id

WHERE LT.label = 'company X'

GROUP BY LP.playlist_name

) AS company_x_table

ON LP.playlist_name = company_x_table.playlist_name

GROUP BY LP.playlist_name

TASK:

Problem Statement:

You are given the following set of relations for an Olympic database for events involving

competition among individual athletes. All events in this database measure success by the

shortest time (such as running).

Athlete(id, name, birthdate, country, height, weight)

Sports(id, name, description, category, mw)

Participate(athlete, sport, pr)

Records(type, sport, recordtime, athlete, date)

Sports.category is one of ”running”, ”swimming”, Sports.mw refers to ”men” or ”women” events.

Participate.pr is the personal record of an athlete for a sport. The ”pr” of an athlete may be

better than the current records (just not been set at a race).

Records.type is one of WR/OR for World and Olympic records.

Records.sport, Participate.sport are foreign keys pointing to Sports.id

Records.athlete, Participate.athlete are foreign keys pointing to Athlete.id

Write the following queries in SQL.

Find all athletes who participate in only one sport. Return their name.

Find all record holder athletes whose personal record time is greater than their record. Return

the name of the athletes.

  1. Find all athletes who are not record holders for a specific sport and are faster than at least

one of the current record holders for this sport. Return the name of the athlete and the name of

the sport.

  1. Find all pairs of sports S1, S2 such that there does not exist an athlete that participate in both

S1 and S2. Return pairs of sports ids. Do not return repetitive tuples such as ”(1,2)” and ”(2,1)”.

QUERY:

select a.name,a.id from Athlete a inner join Participate p where a.id=p.athlete group by p.athlete having count(p.sport)=1;

select p.athlete,p.pr,r.record_time from Participate p inner join Records r where p.sport=r.sport and p.athlete=r.athlete and p.pr>r.record_time

TASK :

two tables: vehicle_maint and vehicle_trips.

vehicle_maint has the columns

[VehicleNo]

[MaintenanceDate]

vehicle_trips has the columns

[VehicleNo]

[TripDate]

Using these two tables, Write a query which pulls every record in vehicle_maint and the most recent date a vehicle took a trip on or before the [MaintenanceDate].

CORRELATED QUERY:

SELECT [VehicleNo], [MaintenanceDate],

(SELECT MAX([TripDate]) FROM vehicle_trips WHERE vehicle_maint.[VehicleNo] = vehicle_trips.[VehicleNo] AND vehicle_trips.[TripDate] <= vehicle_maint.[MaintenanceDate]) AS MostRecentTripDate

FROM vehicle_maint

It works, but it can have scale problems as it will run the subquery once for each vehicle. Fine for, say, a few dozen maintenance events.

On a report pulling a few thousand records though, the performance will become a real problem.

Because there's no direct relationship between these two tables, making it a many:many result set, this gets extra tricky, and unfortunately this kind of correlated subquery might still be the best answer.

The alternative method is more complex, but will perform faster, and will highlight why you should store trips and maintenance in the same table. Of course, that has its whole other set of issues. In this method, we're just combining the two tables so we can window it and filter that. This is a bit memory intensive. Any report like this should have a date range filter on it (for sanity). The earlier that filter is applied the better.

I'm assigning Event Type to be: M = Maintenance, T = Trip.

QUERY :

Create Table #VehicleEvents (VehicleNo int, EventDate datetime, EventType char)

INSERT INTO #VehicleEvents SELECT VehicleNo, MaintenanceDate AS EventDate, 'M' FROM vehiclemaint

INSERT INTO #VehicleEvents SELECT VehicleNo, TripDate AS EventDate, 'T' FROM vehicletrips

;WITH CTE AS ( SELECT VehicleNo, EventDate, EventType, LAG(EventDate,1) OVER (PARTITION BY VehicleNo ORDER BY EventDate) AS PrevEventDate, LAG(EventType,1) OVER (PARTITION BY VehicleNo ORDER BY EventDate) AS PrevEventType FROM #VehicleEvents )

SELECT VehicleNo, EventDate AS MaintenanceDate, PrevEventDate AS TripDate FROM CTE WHERE EventType = 'M' AND PrevEventType = 'T'

Limitation: If there are two maintenance events with no trip in between, only the first one would show. An alternative would be to use a CASE statement to determine if PrevEventDate should be shown based on PrevEventType, removing if from the filter:

SELECT VehicleNo, EventDate AS MaintenanceDate, CASE WHEN PrevEventType = T THEN PrevEventDate END AS TripDate FROM CTE WHERE EventType = 'M'

This will return a NULL for the trip date if there was no trip.

QUERY:

SELECT VehicleNo, MaintenanceDate, MostRecentTripDate FROM vehicle_maint vm LEFT JOIN ( SELECT MAX(TripDate) AS MostRecentTripDate FROM vehicle_trips WHERE vehicle_trips.TripDate <= vehicle_maint.MaintenanceDate GROUP BY VehicleNo ) RT ON vehicle_maint.VehicleNo = RT.VehicleNo image image

Combine Two Tables Together get whole data row

example :SELECT * FROM car WHERE carid IN (SELECT Min(carid) FROM car UNION SELECT Max(carid) FROM car) **TASK : Write an SQL query to find how many users visited the bank and didn't do any transactions, how many visited the bank and did one transaction and so on.

-- The query result format is in the following example:

-- Visits table: -- +---------+------------+ -- | user_id | visit_date | -- +---------+------------+ -- | 1 | 2020-01-01 | -- | 2 | 2020-01-02 | -- | 12 | 2020-01-01 | -- | 19 | 2020-01-03 | -- | 1 | 2020-01-02 | -- | 2 | 2020-01-03 | -- | 1 | 2020-01-04 | -- | 7 | 2020-01-11 | -- | 9 | 2020-01-25 | -- | 8 | 2020-01-28 | -- +---------+------------+ -- Transactions table: -- +---------+------------------+--------+ -- | user_id | transaction_date | amount | -- +---------+------------------+--------+ -- | 1 | 2020-01-02 | 120 | -- | 2 | 2020-01-03 | 22 | -- | 7 | 2020-01-11 | 232 | -- | 1 | 2020-01-04 | 7 | -- | 9 | 2020-01-25 | 33 | -- | 9 | 2020-01-25 | 66 | -- | 8 | 2020-01-28 | 1 | -- | 9 | 2020-01-25 | 99 | -- +---------+------------------+--------+ -- Result table: -- +--------------------+--------------+ -- | transactions_count | visits_count | -- +--------------------+--------------+ -- | 0 | 4 | -- | 1 | 5 | -- | 2 | 0 | -- | 3 | 1 | -- +--------------------+--------------+ -- * For transactions_count = 0, The visits (1, "2020-01-01"), (2, "2020-01-02"), (12, "2020-01-01") and (19, "2020-01-03") did no transactions so visits_count = 4. -- * For transactions_count = 1, The visits (2, "2020-01-03"), (7, "2020-01-11"), (8, "2020-01-28"), (1, "2020-01-02") and (1, "2020-01-04") did one transaction so visits_count = 5. -- * For transactions_count = 2, No customers visited the bank and did two transactions so visits_count = 0. -- * For transactions_count = 3, The visit (9, "2020-01-25") did three transactions so visits_count = 1. -- * For transactions_count >= 4, No customers visited the bank and did more than three transactions so we will stop at transactions_count = 3

**Query: CTE

WITH RECURSIVE t1 AS( SELECT visit_date, COALESCE(num_visits,0) as num_visits, COALESCE(num_trans,0) as num_trans FROM (( SELECT visit_date, user_id, COUNT() as num_visits FROM visits GROUP BY 1, 2) AS a LEFT JOIN ( SELECT transaction_date, user_id, count() as num_trans FROM transactions GROUP BY 1, 2) AS b ON a.visit_date = b.transaction_date and a.user_id = b.user_id) ),

          t2 AS (
                  SELECT MAX(num_trans) as trans
                    FROM t1
                  UNION ALL
                  SELECT trans-1 
                    FROM t2
                  WHERE trans >= 1)

SELECT trans as transactions_count, COALESCE(visits_count,0) as visits_count FROM t2 LEFT JOIN ( SELECT num_trans as transactions_count, COALESCE(COUNT(*),0) as visits_count FROM t1 GROUP BY 1 ORDER BY 1) AS a ON a.transactions_count = t2.trans ORDER BY 1

**TASK:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment