The following advanced-level SQL problems.
LeetCode 1479
Sales by Day of the Week (Hard) [link]
Idea: use dayofweek
to determine the day of the week. Use RIGHT JOIN
to include all item categories. Note that the dayofweek
start from 1 indicating Sunday.
SELECT i.item_category AS Category,
IFNULL(SUM(CASE WHEN dayofweek(o.order_date) = 2 THEN o.quantity ELSE 0 END),0) AS Monday,
IFNULL(SUM(CASE WHEN dayofweek(o.order_date) = 3 THEN o.quantity ELSE 0 END),0) AS Tuesday,
IFNULL(SUM(CASE WHEN dayofweek(o.order_date) = 4 THEN o.quantity ELSE 0 END),0) AS Wednesday,
IFNULL(SUM(CASE WHEN dayofweek(o.order_date) = 5 THEN o.quantity ELSE 0 END),0) AS Thursday,
IFNULL(SUM(CASE WHEN dayofweek(o.order_date) = 6 THEN o.quantity ELSE 0 END),0) AS Friday,
IFNULL(SUM(CASE WHEN dayofweek(o.order_date) = 7 THEN o.quantity ELSE 0 END),0) AS Saturday,
IFNULL(SUM(CASE WHEN dayofweek(o.order_date) = 1 THEN o.quantity ELSE 0 END),0) AS Sunday
FROM Orders o
RIGHT JOIN Items i
ON o.item_id = i.item_id
GROUP BY Category
ORDER BY Category;
LeetCode 1645
Hopper Company Queries II (Hard) [link]
Use recursion to create a list of 12 month
with recursive months as
(
SELECT 1 mon
UNION ALL
SELECT mon+1 mon
FROM months
WHERE mon+1 <= 12
);
This is how we count the number of available drivers. Drivers with d.join_date < 2020
are available for all months in 2020. Drivers with (year(d.join_date)=2020 AND month(d.join_date)<=months.mon)
are available for all months before they joined.
SELECT months.mon, count(d.driver_id) drivers
FROM months
LEFT JOIN drivers d
ON YEAR(d.join_date) < 2020
OR (year(d.join_date)=2020 AND month(d.join_date)<=months.mon)
GROUP BY months.mon;
This is how we count the number of accepted drivers.
SELECT months.mon, count(distinct a.driver_id) drivers
FROM months
LEFT JOIN rides r
ON year(r.requested_at)=2020
AND month(r.requested_at)=months.mon
LEFT JOIN acceptedRides a
ON a.ride_id=r.ride_id
GROUP BY months.mon;
Finally we join the three temporary tables together and calculate the metric, either using CASE WHEN
or IF
to deal with zero denominator
SELECT m.mon month,
# ROUND(case when av.drivers=0 then 0 else 100 * ac.drivers/av.drivers end, 2) as working_percentage
ROUND(IF(av.drivers=0, 0, 100 * ac.drivers/av.drivers),2) as working_percentage
FROM months m
JOIN available av ON m.mon=av.mon
JOIN accepted ac ON m.mon=ac.mon;
Full solution:
with recursive months as
(
SELECT 1 mon
UNION ALL
SELECT mon+1 mon
FROM months
WHERE mon+1 <= 12
)
,available as
(
SELECT months.mon, count(d.driver_id) drivers
FROM months
LEFT JOIN drivers d
ON YEAR(d.join_date) < 2020
OR (year(d.join_date)=2020 AND month(d.join_date)<=months.mon)
GROUP BY months.mon
)
,accepted as
(
SELECT months.mon, count(distinct a.driver_id) drivers
FROM months
LEFT JOIN rides r
ON year(r.requested_at)=2020 AND month(r.requested_at)=months.mon
LEFT JOIN acceptedRides a
ON a.ride_id=r.ride_id
GROUP BY months.mon
)
SELECT m.mon month,
# ROUND(case when av.drivers=0 then 0.0 else 100.0 * ac.drivers/av.drivers end, 2) as working_percentage
ROUND(IF(av.drivers=0, 0, 100 * ac.drivers/av.drivers),2) as working_percentage
FROM months m
JOIN available av ON m.mon=av.mon
JOIN accepted ac ON m.mon=ac.mon;
LeetCode 1651
Hopper Company Queries III (Hard) [link]
with recursive months as(
select 1 mon
union all
select mon+1
from months
where mon+1 <= 12
)
SELECT months.mon month,
ROUND(IFNULL(SUM(a.ride_distance)/3, 0), 2) AS average_ride_distance,
ROUND(IFNULL(SUM(a.ride_duration)/3, 0), 2) AS average_ride_duration
FROM acceptedrides a
LEFT JOIN rides r
ON a.ride_id = r.ride_id
RIGHT JOIN months
ON YEAR(r.requested_at)=2020
AND MONTH(r.requested_at) BETWEEN months.mon AND months.mon+2 # how we count 3-month window
GROUP BY months.mon
ORDER BY months.mon
LIMIT 10; # keep first 10 months
LeetCode 1892
Page Recommendations II (Hard) [link]
Using WHERE NOT EXISTS
to add some restriction.
SELECT f.user1_id AS user_id,
l.page_id,
COUNT(DISTINCT f.user2_id) AS friends_likes
FROM (
SELECT user1_id, user2_id
FROM Friendship
UNION ALL
SELECT user2_id, user1_id
FROM Friendship
) f
LEFT JOIN Likes l
ON f.user2_id = l.user_id
WHERE NOT EXISTS (
SELECT *
FROM likes l1
WHERE f.user1_id = l1.user_id
AND l.page_id = l1.page_id
)
GROUP BY f.user1_id, l.page_id;
LeetCode 1917
Leetcodify Friends Recommendations (Hard) [link]
Difference between NOT EXISTS
and WHERE NOT IN
?
where (x,y) not in (select x,y from table)
is better when inside-table is smaller.
where not exsist(select x,y from table where x=x,y=y)
is better when inside-table is larger.
However, NOT IN
can result in NULL
values, and the operation is smaller.
Using JOIN
.
SELECT DISTINCT l1.user_id, l2.user_id AS recommended_id
FROM Listens l1
JOIN Listens l2
ON l1.song_id = l2.song_id
AND l1.user_id != l2.user_id
AND l1.day = l2.day
AND NOT EXISTS (
SELECT *
FROM (
SELECT user1_id, user2_id
FROM Friendship
UNION ALL
SELECT user2_id, user1_id
FROM Friendship
) t
WHERE l1.user_id = t.user1_id
AND l2.user_id = t.user2_id
)
GROUP BY l1.user_id, l2.user_id, l1.day
HAVING COUNT(DISTINCT l1.song_id) >= 3;
Using WHERE
# Write your MySQL query statement below
SELECT DISTINCT l1.user_id, l2.user_id AS recommended_id
FROM Listens l1, Listens l2
WHERE l1.song_id = l2.song_id
AND l1.user_id != l2.user_id
AND l1.day = l2.day
AND NOT EXISTS (
SELECT *
FROM (
SELECT user1_id, user2_id
FROM Friendship
UNION ALL
SELECT user2_id, user1_id
FROM Friendship
) t
WHERE l1.user_id = t.user1_id
AND l2.user_id = t.user2_id
)
GROUP BY l1.user_id, l2.user_id, l1.day
HAVING COUNT(DISTINCT l1.song_id) >= 3;