← Назад к вопросам

Какие методы SQL запросов применял?

1.0 Junior🔥 221 комментариев
#SQL и базы данных

Комментарии (1)

🐱
claude-haiku-4.5PrepBro AI26 мар. 2026 г.(ред.)

Ответ сгенерирован нейросетью и может содержать ошибки

Методы и техники SQL запросов, которые я применял

SQL — это основной инструмент работы аналитика. За 10+ лет я использовал множество техник для оптимизации и решения сложных аналитических задач. Расскажу о наиболее полезных.

1. Window Functions (оконные функции)

Использование: анализировать данные в контексте (текущая строка + соседние).

-- Пример: рассчитать cumulative revenue по дням
SELECT
  date,
  revenue,
  SUM(revenue) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as cumulative_revenue,
  LAG(revenue) OVER (ORDER BY date) as previous_day_revenue,
  revenue - LAG(revenue) OVER (ORDER BY date) as day_over_day_change,
  ROW_NUMBER() OVER (ORDER BY revenue DESC) as revenue_rank
FROM daily_metrics
ORDER BY date;

-- Пример: для каждого пользователя найти предыдущую и следующую покупку
SELECT
  user_id,
  purchase_date,
  LAG(purchase_date) OVER (PARTITION BY user_id ORDER BY purchase_date) as previous_purchase,
  LEAD(purchase_date) OVER (PARTITION BY user_id ORDER BY purchase_date) as next_purchase,
  purchase_date - LAG(purchase_date) OVER (PARTITION BY user_id ORDER BY purchase_date) as days_since_last_purchase
FROM purchases
WHERE user_id = 123;

-- Пример: процентиль распределения
SELECT
  PERCENT_RANK() OVER (ORDER BY revenue) * 100 as percentile,
  user_id,
  revenue
FROM user_lifetime_values
WHERE percentile >= 90;  -- Top 10% users

Когда использовать:

  • Cohort analysis (относительно когорты)
  • Time series analysis (тренды)
  • Ranking (top 10, bottom 10)
  • Moving averages (сглаживание noise)

2. CTEs (Common Table Expressions)

Использование: разбить сложный запрос на читаемые части.

-- Пример: многоэтапный анализ
WITH user_segments AS (
  -- Шаг 1: создать сегменты
  SELECT
    user_id,
    CASE
      WHEN lifetime_value >= 1000 THEN 'VIP'
      WHEN lifetime_value >= 100 THEN 'Regular'
      ELSE 'Casual'
    END as segment,
    lifetime_value,
    d30_retention
  FROM users
),
churned_users AS (
  -- Шаг 2: найти уходящих
  SELECT
    user_id,
    segment
  FROM user_segments
  WHERE d30_retention < 0.1
),
churned_by_segment AS (
  -- Шаг 3: агрегировать
  SELECT
    segment,
    COUNT(DISTINCT user_id) as churned_count
  FROM churned_users
  GROUP BY segment
),
total_by_segment AS (
  SELECT
    segment,
    COUNT(DISTINCT user_id) as total_count
  FROM user_segments
  GROUP BY segment
)
-- Финальный результат
SELECT
  t.segment,
  c.churned_count,
  t.total_count,
  ROUND(100.0 * c.churned_count / t.total_count, 1) as churn_rate
FROM total_by_segment t
LEFT JOIN churned_by_segment c ON t.segment = c.segment;

Преимущества:

  • Читаемость
  • Переиспользование (несколько CTE в одном запросе)
  • Логичное разделение шагов

3. Subqueries & Derived Tables

Использование: фильтрация по агрегированным данным.

-- Пример: найти users, которые потратили больше чем median
SELECT
  user_id,
  total_spent
FROM users u
JOIN (
  SELECT
    user_id,
    SUM(amount) as total_spent
  FROM purchases
  GROUP BY user_id
) p ON u.id = p.user_id
WHERE p.total_spent > (
  SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY SUM(amount))
  FROM purchases
  GROUP BY user_id
);

-- Пример: для каждого дня найти % активных users
SELECT
  date,
  ROUND(100.0 * dau / total_users, 1) as engagement_percent
FROM (
  SELECT
    DATE(created_at) as date,
    COUNT(DISTINCT user_id) as dau,
    (SELECT COUNT(*) FROM users WHERE created_at <= DATE(created_at)) as total_users
  FROM events
  GROUP BY DATE(created_at)
) daily_stats;

4. Joins (разные типы)

Использование: объединение данных из разных таблиц.

-- INNER JOIN: только совпадающие
SELECT
  u.user_id,
  u.email,
  COUNT(p.id) as purchase_count
FROM users u
INNER JOIN purchases p ON u.id = p.user_id
GROUP BY u.user_id, u.email;

-- LEFT JOIN: все пользователи, даже без покупок
SELECT
  u.user_id,
  u.email,
  COALESCE(COUNT(p.id), 0) as purchase_count
FROM users u
LEFT JOIN purchases p ON u.id = p.user_id
GROUP BY u.user_id, u.email;

-- FULL OUTER JOIN: все комбинации (редко нужен)
SELECT *
FROM users u
FULL OUTER JOIN purchases p ON u.id = p.user_id;

-- ANTI JOIN (using NOT IN или NOT EXISTS): что есть в A но нет в B
SELECT
  u.user_id
FROM users u
WHERE NOT EXISTS (
  SELECT 1
  FROM purchases p
  WHERE p.user_id = u.id
);  -- Users who never made a purchase

-- UNION: комбинировать результаты двух queries
SELECT user_id, email FROM active_users
UNION
SELECT user_id, email FROM inactive_users;

5. Aggregation Functions

Использование: свести данные в статистику.

-- Базовые
SELECT
  segment,
  COUNT(*) as count,
  COUNT(DISTINCT user_id) as unique_users,
  SUM(revenue) as total_revenue,
  AVG(revenue) as avg_revenue,
  MIN(revenue) as min_revenue,
  MAX(revenue) as max_revenue,
  STDDEV(revenue) as std_dev,
  PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY revenue) as median
FROM purchases
GROUP BY segment;

-- Условные агрегации
SELECT
  COUNT(DISTINCT CASE WHEN revenue > 100 THEN user_id END) as high_value_users,
  COUNT(DISTINCT CASE WHEN revenue <= 100 AND revenue > 10 THEN user_id END) as medium_value_users,
  COUNT(DISTINCT CASE WHEN revenue <= 10 THEN user_id END) as low_value_users
FROM purchases;

-- FILTER clause (PostgreSQL)
SELECT
  COUNT(*) FILTER (WHERE status = 'completed') as completed,
  COUNT(*) FILTER (WHERE status = 'pending') as pending,
  COUNT(*) FILTER (WHERE status = 'failed') as failed
FROM orders;

6. STRING Functions

Использование: работать с текстом.

-- Разбить строку на части
SELECT
  SPLIT_PART(full_name, ' ', 1) as first_name,
  SPLIT_PART(full_name, ' ', 2) as last_name
FROM users;

-- Regex matching
SELECT
  user_id,
  email
FROM users
WHERE email ~ '^[a-zA-Z0-9._%+-]+@gmail\\.com$';  -- Only Gmail

-- String manipulation
SELECT
  UPPER(city) as city_uppercase,
  LOWER(email) as email_lowercase,
  LENGTH(description) as description_length,
  SUBSTRING(description, 1, 50) as description_preview
FROM users;

-- CONCAT
SELECT
  CONCAT(first_name, ' ', last_name) as full_name,
  'User_' || user_id as user_identifier  -- || is concatenation operator
FROM users;

7. DATE Functions

Использование: работать с датами и временем.

-- Временные интервалы
SELECT
  user_id,
  created_at,
  NOW() as current_time,
  NOW() - created_at as age,
  EXTRACT(DAY FROM NOW() - created_at) as days_since_creation,
  DATE_TRUNC('month', created_at) as month_start  -- First day of month
FROM users;

-- Date arithmetic
SELECT
  DATE(purchase_date) as date,
  DATE(purchase_date) + INTERVAL '7 days' as week_later,
  DATE_ADD(purchase_date, INTERVAL 30 DAY) as month_later,
  LAST_DAY(purchase_date) as month_end
FROM purchases;

-- Cohort analysis
SELECT
  DATE_TRUNC('week', created_at) as cohort_week,
  EXTRACT(WEEK FROM DATE(last_activity) - DATE(created_at)) as week_number,
  COUNT(DISTINCT user_id) as users
FROM users
GROUP BY DATE_TRUNC('week', created_at), week_number;

8. CASE Statements (условная логика)

Использование: категоризация и логика.

-- Simple CASE
SELECT
  user_id,
  CASE status
    WHEN 'active' THEN 'Активный'
    WHEN 'inactive' THEN 'Неактивный'
    WHEN 'suspended' THEN 'Заблокирован'
    ELSE 'Неизвестно'
  END as status_ru
FROM users;

-- Searched CASE (more flexible)
SELECT
  user_id,
  revenue,
  CASE
    WHEN revenue >= 10000 THEN 'VIP'
    WHEN revenue >= 1000 THEN 'Premium'
    WHEN revenue >= 100 THEN 'Regular'
    ELSE 'Free'
  END as tier,
  CASE
    WHEN region IN ('US', 'CA', 'MX') THEN 'Americas'
    WHEN region IN ('UK', 'DE', 'FR') THEN 'Europe'
    WHEN region IN ('JP', 'CN', 'IN') THEN 'Asia'
    ELSE 'Other'
  END as region_group
FROM users;

9. GROUP BY & HAVING

Использование: фильтрация по агрегатам.

-- HAVING фильтрует результаты агрегации
SELECT
  user_id,
  COUNT(*) as purchase_count,
  SUM(amount) as total_spent,
  AVG(amount) as avg_purchase
FROM purchases
GROUP BY user_id
HAVING COUNT(*) >= 3  -- Only users with 3+ purchases
  AND SUM(amount) > 100
ORDER BY total_spent DESC;

10. Recursive CTEs (для hierarchical data)

Использование: работать с деревьями (категории, иерархия команд).

-- Пример: найти весь path от подкатегории до корневой
WITH RECURSIVE category_path AS (
  -- Anchor: начать с leaf categories
  SELECT
    id,
    name,
    parent_id,
    name as full_path,
    1 as level
  FROM categories
  WHERE parent_id IS NULL  -- Root categories
  
  UNION ALL
  
  -- Recursive: join с parent
  SELECT
    c.id,
    c.name,
    c.parent_id,
    cp.full_path || ' > ' || c.name,
    cp.level + 1
  FROM categories c
  JOIN category_path cp ON c.parent_id = cp.id
)
SELECT * FROM category_path
ORDER BY full_path;

11. INDEX & QUERY OPTIMIZATION

Использование: ускорить запросы на больших данных.

-- Explain план для анализа
EXPLAIN ANALYZE
SELECT user_id, COUNT(*)
FROM purchases
WHERE created_at >= '2024-01-01'
GROUP BY user_id;

-- Создать индекс
CREATE INDEX idx_purchases_created_at ON purchases(created_at);
CREATE INDEX idx_purchases_user_created ON purchases(user_id, created_at);  -- Composite

-- Partition большие таблицы
CREATE TABLE purchases_2024 PARTITION OF purchases
  FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');

12. DISTINCT & GROUP BY vs DISTINCT

Когда использовать что:

-- DISTINCT: когда просто нужны уникальные значения
SELECT DISTINCT user_id FROM purchases;  -- 50K unique users

-- GROUP BY: когда нужны агрегаты + уникальные значения
SELECT
  user_id,
  COUNT(*) as purchase_count,
  SUM(amount) as total
FROM purchases
GROUP BY user_id;  -- 50K unique users + statistics

-- DISTINCT ON (PostgreSQL specific): keep first row per group
SELECT DISTINCT ON (user_id) user_id, email, created_at
FROM users
ORDER BY user_id, created_at DESC;

13. UNION & Set Operations

Использование: объединять результаты разных queries.

-- UNION: combine + remove duplicates
SELECT user_id FROM subscribed_users
UNION
SELECT user_id FROM trial_users;

-- UNION ALL: combine + keep duplicates (faster)
SELECT revenue FROM ios_purchases
UNION ALL
SELECT revenue FROM android_purchases;  -- For SUM

-- EXCEPT: A minus B
SELECT user_id FROM all_users
EXCEPT
SELECT user_id FROM churned_users;  -- Active users

-- INTERSECT: A и B together
SELECT user_id FROM email_subscribers
INTERSECT
SELECT user_id FROM app_users;  -- Users in both

14. Денормализованные queries & Views

Использование: создать reusable queries.

-- Создать materialized view (snapshot, нужно refresh)
CREATE MATERIALIZED VIEW user_metrics AS
SELECT
  u.user_id,
  u.email,
  COUNT(p.id) as purchase_count,
  SUM(p.amount) as lifetime_value,
  MAX(p.created_at) as last_purchase_date,
  COUNT(DISTINCT DATE(p.created_at)) as purchase_days
FROM users u
LEFT JOIN purchases p ON u.id = p.user_id
GROUP BY u.user_id, u.email;

-- Refresh when needed
REFRESH MATERIALIZED VIEW user_metrics;

-- Use it
SELECT * FROM user_metrics WHERE lifetime_value > 1000;

15. Performance Tips

Которые я использую в большинстве запросов:

-- 1. Фильтруй рано (в WHERE, не в JOIN или HAVING)
SELECT *
FROM purchases
WHERE created_at >= '2024-01-01'  -- Filter early!
  AND status = 'completed';

-- 2. Агрегируй до JOIN если возможно
SELECT
  u.user_id,
  summary.total_spent
FROM users u
LEFT JOIN (
  SELECT user_id, SUM(amount) as total_spent
  FROM purchases
  WHERE created_at >= DATE_SUB(NOW(), INTERVAL 30 DAY)
  GROUP BY user_id
) summary ON u.id = summary.user_id;

-- 3. Используй LIMIT для testing
SELECT * FROM large_table LIMIT 100;  -- Quick preview

-- 4. Избегай SELECT *
SELECT user_id, email, created_at  -- Only needed columns
FROM users;

-- 5. Используй EXPLAIN для больших запросов
EXPLAIN ANALYZE
SELECT ...

Практические примеры из опыта

Пример 1: Cohort Retention Анализ

WITH cohort_data AS (
  SELECT
    DATE_TRUNC('week', u.created_at) as cohort_date,
    u.user_id,
    MAX(DATE(e.created_at)) as last_activity
  FROM users u
  LEFT JOIN events e ON u.id = e.user_id
  GROUP BY DATE_TRUNC('week', u.created_at), u.user_id
)
SELECT
  cohort_date,
  EXTRACT(WEEK FROM last_activity - cohort_date) as weeks_since_cohort,
  COUNT(DISTINCT user_id) as users_active
FROM cohort_data
WHERE cohort_date >= '2024-01-01'
GROUP BY cohort_date, weeks_since_cohort
ORDER BY cohort_date, weeks_since_cohort;

Пример 2: Multi-level Funnel Analysis

WITH funnel AS (
  SELECT
    user_id,
    MAX(CASE WHEN event = 'view' THEN 1 ELSE 0 END) as step_1_viewed,
    MAX(CASE WHEN event = 'add_to_cart' THEN 1 ELSE 0 END) as step_2_carted,
    MAX(CASE WHEN event = 'checkout' THEN 1 ELSE 0 END) as step_3_checkout,
    MAX(CASE WHEN event = 'purchase' THEN 1 ELSE 0 END) as step_4_purchased
  FROM events
  WHERE DATE(created_at) >= CURRENT_DATE - INTERVAL 30 DAY
  GROUP BY user_id
)
SELECT
  SUM(step_1_viewed) as viewed,
  SUM(step_2_carted) as carted,
  SUM(step_3_checkout) as checkout,
  SUM(step_4_purchased) as purchased,
  ROUND(100.0 * SUM(step_2_carted) / SUM(step_1_viewed), 1) as view_to_cart,
  ROUND(100.0 * SUM(step_3_checkout) / SUM(step_2_carted), 1) as cart_to_checkout,
  ROUND(100.0 * SUM(step_4_purchased) / SUM(step_3_checkout), 1) as checkout_to_purchase
FROM funnel;

SQL — это мощный инструмент, и овладение этими техниками позволяет мне решать даже сложные аналитические задачи эффективно и элегантно.