The following advanced-level SQL problems are about ‘Recursion, Dependency, Nesting’. Focusing on creating new tables.
LeetCode 1336
Number of Transactions per Visit (Hard) [link]
with Results as (
SELECT transactions_count, COUNT(user_id) AS visits_count
FROM(
SELECT v.user_id, COUNT(t.transaction_date) as transactions_count
FROM Visits v
LEFT JOIN Transactions t
ON v.visit_date = t.transaction_date
AND v.user_id = t.user_id
GROUP BY v.user_id, v.visit_date
) t
GROUP BY transactions_count
)
SELECT *
FROM(
with recursive Numbers(n) as(
SELECT 0
UNION ALL
SELECT n + 1
FROM Numbers
WHERE n < (
SELECT max(transactions_count)
FROM Results
)
)
SELECT n.n AS transactions_count, IFNULL(r.visits_count, 0) AS visits_count
FROM Numbers n
LEFT JOIN Results r
ON n.n = r.transactions_count
) t;
LeetCode 1384
Total Sales Amount by Year (Hard) [link]
with recursive t(n) as (
SELECT 0
UNION ALL
SELECT n+1
FROM t
WHERE n < (
SELECT max(DATEDIFF(period_end, period_start))
FROM Sales
)
)
SELECT
s.product_id,
p.product_name,
DATE_FORMAT(DATE_ADD(s.period_start, INTERVAL t.n DAY), '%Y') AS report_year,
SUM(s.average_daily_sales) AS total_amount
FROM Sales s
JOIN t
ON t.n <= DATEDIFF(s.period_end, s.period_start)
JOIN Product p
ON p.product_id = s.product_id
GROUP BY 1,2,3
ORDER BY 1,3;
LeetCode 1613
Find the Missing IDs (Medium) [link]
with recursive tmp as (
SELECT 1 AS n
UNION ALL
SELECT n+1
FROM tmp
WHERE n <= 100
)
SELECT n AS ids
FROM tmp
WHERE n NOT IN (
SELECT customer_id
FROM Customers
) AND n <= (
SELECT MAX(customer_id)
FROM Customers
);
LeetCode 1635
Hopper Company Queries I (Hard) [link]
with recursive tmp(month) as (
SELECT 1
UNION ALL
SELECT month+1
FROM tmp
WHERE month <= 11
)
SELECT
t1.month, IFNULL(t1.active_drivers,0) AS active_drivers, IFNULL(t2.accepted_rides, 0) AS accepted_rides
FROM (
SELECT tmp.month, COUNT(d.driver_id) AS active_drivers
FROM tmp
LEFT JOIN Drivers d
ON '202000' + tmp.month >= DATE_FORMAT(d.join_date, '%Y%m')
GROUP BY tmp.month
) t1
LEFT JOIN (
SELECT MONTH(r.requested_at) AS month, r.ride_id, COUNT(r.ride_id) AS accepted_rides
FROM Rides r
JOIN AcceptedRides a
ON r.ride_id = a.ride_id
WHERE YEAR(r.requested_at) = '2020'
GROUP BY 1
) t2
ON t1.month = t2.month;
LeetCode 1645
Hopper Company Queries II (Hard) [link]
Use recursion to create a list of 12 month
with recursive months as
(
SELECT 1 mon
UNION ALL
SELECT mon+1 mon
FROM months
WHERE mon+1 <= 12
);
This is how we count the number of available drivers. Drivers with d.join_date < 2020
are available for all months in 2020. Drivers with (year(d.join_date)=2020 AND month(d.join_date)<=months.mon)
are available for all months before they joined.
SELECT months.mon, count(d.driver_id) drivers
FROM months
LEFT JOIN drivers d
ON YEAR(d.join_date) < 2020
OR (year(d.join_date)=2020 AND month(d.join_date)<=months.mon)
GROUP BY months.mon;
This is how we count the number of accepted drivers.
SELECT months.mon, count(distinct a.driver_id) drivers
FROM months
LEFT JOIN rides r
ON year(r.requested_at)=2020
AND month(r.requested_at)=months.mon
LEFT JOIN acceptedRides a
ON a.ride_id=r.ride_id
GROUP BY months.mon;
Finally we join the three temporary tables together and calculate the metric, either using CASE WHEN
or IF
to deal with zero denominator
SELECT m.mon month,
# ROUND(case when av.drivers=0 then 0 else 100 * ac.drivers/av.drivers end, 2) as working_percentage
ROUND(IF(av.drivers=0, 0, 100 * ac.drivers/av.drivers),2) as working_percentage
FROM months m
JOIN available av ON m.mon=av.mon
JOIN accepted ac ON m.mon=ac.mon;
Full solution:
with recursive months as
(
SELECT 1 mon
UNION ALL
SELECT mon+1 mon
FROM months
WHERE mon+1 <= 12
)
,available as
(
SELECT months.mon, count(d.driver_id) drivers
FROM months
LEFT JOIN drivers d
ON YEAR(d.join_date) < 2020
OR (year(d.join_date)=2020 AND month(d.join_date)<=months.mon)
GROUP BY months.mon
)
,accepted as
(
SELECT months.mon, count(distinct a.driver_id) drivers
FROM months
LEFT JOIN rides r
ON year(r.requested_at)=2020 AND month(r.requested_at)=months.mon
LEFT JOIN acceptedRides a
ON a.ride_id=r.ride_id
GROUP BY months.mon
)
SELECT m.mon month,
# ROUND(case when av.drivers=0 then 0.0 else 100.0 * ac.drivers/av.drivers end, 2) as working_percentage
ROUND(IF(av.drivers=0, 0, 100 * ac.drivers/av.drivers),2) as working_percentage
FROM months m
JOIN available av ON m.mon=av.mon
JOIN accepted ac ON m.mon=ac.mon;
LeetCode 1651
Hopper Company Queries III (Hard) [link]
with recursive months as(
select 1 mon
union all
select mon+1
from months
where mon+1 <= 12
)
SELECT months.mon month,
ROUND(IFNULL(SUM(a.ride_distance)/3, 0), 2) AS average_ride_distance,
ROUND(IFNULL(SUM(a.ride_duration)/3, 0), 2) AS average_ride_duration
FROM acceptedrides a
LEFT JOIN rides r
ON a.ride_id = r.ride_id
RIGHT JOIN months
ON YEAR(r.requested_at)=2020
AND MONTH(r.requested_at) BETWEEN months.mon AND months.mon+2 # how we count 3-month window
GROUP BY months.mon
ORDER BY months.mon
LIMIT 10; # keep first 10 months
LeetCode 1767
Find the subtasks that did not execute (Hard) [link]
with recursive tmp(task_id, subtask_id) as (
SELECT task_id, subtasks_count FROM Tasks
UNION ALL
SELECT task_id, subtask_id - 1
FROM tmp
WHERE subtask_id - 1 > 0
)
SELECT tmp.task_id, tmp.subtask_id
FROM tmp
LEFT JOIN Executed e
ON tmp.task_id = e.task_id AND tmp.subtask_id = e.subtask_id
WHERE e.subtask_id IS NULL;
Template of Recursion
https://dev.mysql.com/doc/refman/8.0/en/with.html#common-table-expressions-recursive
WITH RECURSIVE cte (n) AS
(
SELECT 1
UNION ALL
SELECT n + 1 FROM cte WHERE n < 5
)
SELECT * FROM cte;
# +------+
# | n |
# +------+
# | 1 |
# | 2 |
# | 3 |
# | 4 |
# | 5 |
# +------+
WITH RECURSIVE cte AS
(
SELECT 1 AS n, CAST('abc' AS CHAR(20)) AS str
UNION ALL
SELECT n + 1, CONCAT(str, str) FROM cte WHERE n < 3
)
SELECT * FROM cte;
# +------+--------------+
# | n | str |
# +------+--------------+
# | 1 | abc |
# | 2 | abcabc |
# | 3 | abcabcabcabc |
# +------+--------------+
WITH RECURSIVE cte AS
(
SELECT 1 AS n, 1 AS p, -1 AS q
UNION ALL
SELECT n + 1, q * 2, p * 2 FROM cte WHERE n < 5
)
SELECT * FROM cte;
# +------+------+------+
# | n | p | q |
# +------+------+------+
# | 1 | 1 | -1 |
# | 2 | -2 | 2 |
# | 3 | 4 | -4 |
# | 4 | -8 | 8 |
# | 5 | 16 | -16 |
# +------+------+------+