Drug Review SQL Analytics
Explored trends in drug effectiveness, satisfaction, and ease of use using SQL Window Functions
Project Overview
Analyzed a cleaned drug review dataset (mydb.drug_clean) to answer analytical questions using SQL Window Functions, Ranking, Aggregations, and Common Table Expressions.
Task
Client request: Analyze drug review data using SQL window functions and aggregations.
Problem Statements & SQL Solutions
1. On Label vs Off Label Satisfaction by Condition
SELECT condition, indication, ROUND(avg_satisfaction, 2)
FROM (
SELECT d.Condition, d.Indication,
AVG(Satisfaction) OVER(PARTITION BY d.condition, d.indication) AS avg_satisfaction,
ROW_NUMBER() OVER(PARTITION BY d.condition, d.indication) r
FROM drug_clean d
) t
WHERE r = 1;
2. Avg Ease of Use & Satisfaction for Expensive Drugs by Type
WITH cte AS (
SELECT *, ROW_NUMBER() OVER(ORDER BY price) AS row_num
FROM drug_clean
)
SELECT Type, ROUND(AVG(EaseOfUse), 2), ROUND(AVG(Satisfaction), 2)
FROM cte
WHERE price > (
SELECT price FROM cte
WHERE row_num = (SELECT ROUND(COUNT(*)/2) FROM cte)
)
GROUP BY Type;
3. Cumulative Distribution of EaseOfUse by Drug Type
SELECT type,
SUM(EaseOfUse) OVER(PARTITION BY type ORDER BY EaseOfUse ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum,
SUM(EaseOfUse) OVER(PARTITION BY type) AS total_sum,
SUM(EaseOfUse) OVER(PARTITION BY type ORDER BY EaseOfUse ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) /
SUM(EaseOfUse) OVER(PARTITION BY type) AS cumulative_distribution
FROM drug_clean;
4. Median Satisfaction by Condition
WITH cte AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY Condition ORDER BY Satisfaction ASC) AS row_num
FROM drug_clean
)
SELECT Condition, ROUND(Satisfaction, 2) AS Median_Satisfaction
FROM cte
WHERE row_num = (
SELECT ROUND(COUNT(*)/2) FROM cte c2 WHERE c2.Condition = cte.Condition
)
ORDER BY Median_Satisfaction DESC;
5. Running Average Price by Condition
SELECT condition,
ROUND(AVG(price) OVER(PARTITION BY condition ORDER BY price ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), 2) AS Running_Average
FROM drug_clean
ORDER BY condition;
6. Percent Change in Reviews for Each Drug
SELECT drug, reviews,
((reviews - LAG(reviews) OVER(PARTITION BY drug ORDER BY reviews DESC)) /
LAG(reviews) OVER(PARTITION BY drug ORDER BY reviews DESC)) * 100 AS percent_change_review
FROM drug_clean;
7. Satisfaction Contribution by Drug Type
SELECT *, ROUND((satisfaction_level / SUM(satisfaction_level) OVER()) * 100, 2) AS percentage_satisfaction
FROM (
SELECT type, SUM(satisfaction) AS satisfaction_level
FROM drug_clean
GROUP BY type
) t
ORDER BY percentage_satisfaction DESC, type;
8. Cumulative Effective Rating by Condition and Form
SELECT condition, drug, form, effective,
SUM(effective) OVER(PARTITION BY condition, form ORDER BY effective ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum
FROM drug_clean
ORDER BY condition, form, drug;
9. Rank of Avg Ease of Use by Drug Type
SELECT type,
AVG(EaseOfUse) AS avg_ease,
RANK() OVER(ORDER BY AVG(EaseOfUse) DESC) AS rank
FROM drug_clean
GROUP BY type
ORDER BY rank, type DESC;
10. Avg Effectiveness of Top 3 Most Reviewed Drugs per Condition
SELECT *
FROM (
SELECT condition, drug, reviews,
AVG(effective) OVER(PARTITION BY condition, drug ORDER BY reviews DESC) AS avg_eff,
DENSE_RANK() OVER(PARTITION BY condition ORDER BY reviews DESC) AS r
FROM drug_clean
) t
WHERE r < 4
ORDER BY condition, r;