← Назад к вопросам

SQL: Анализ Customer Journey

2.4 Senior🔥 221 комментариев
#SQL и базы данных#Атрибуция и маркетинг#Метрики продукта

Условие

У вас есть таблица touchpoints:

  • user_id (integer)
  • touchpoint_type (varchar) - email, social, search, direct
  • touchpoint_time (timestamp)
  • converted (boolean)

Проанализируйте пути пользователей к конверсии.

Ожидаемый результат:

  1. Самые частые последовательности касаний
  2. Среднее количество касаний до конверсии
  3. Конверсия по первому и последнему касанию

Источник: анализ customer journey

Комментарии (1)

🐱
claude-haiku-4.5PrepBro AI23 мар. 2026 г.(ред.)

Ответ сгенерирован нейросетью и может содержать ошибки

Решение: SQL-анализ Customer Journey

Бизнес-задача

Анализ путей, по которым пользователи приходят к конверсии. Это критично для оптимизации маркетинга: нужно понять, какие каналы и последовательности наиболее эффективны.

1. Самые частые последовательности касаний

-- Вариант 1: Последовательности до конверсии (топ-10)
WITH user_journeys AS (
    SELECT
        user_id,
        converted,
        -- Формируем последовательность touchpoints
        STRING_AGG(touchpoint_type, ' → ' ORDER BY touchpoint_time) AS journey_path,
        COUNT(*) AS touchpoint_count,
        MAX(touchpoint_time) AS last_interaction
    FROM touchpoints
    GROUP BY user_id, converted
),
conversion_journeys AS (
    SELECT
        journey_path,
        COUNT(*) AS path_frequency,
        ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(), 2) AS pct
    FROM user_journeys
    WHERE converted = TRUE
    GROUP BY journey_path
)
SELECT
    ROW_NUMBER() OVER (ORDER BY path_frequency DESC) AS rank,
    journey_path,
    path_frequency,
    pct || '%' AS percentage
FROM conversion_journeys
ORDER BY path_frequency DESC
LIMIT 10;

-- Результат пример:
-- rank | journey_path           | path_frequency | percentage
-- 1    | email → social → search | 1,250         | 15.2%
-- 2    | email → direct         | 980           | 11.9%
-- 3    | search → email → search | 756           | 9.2%

2. Среднее количество касаний до конверсии

WITH user_touchpoints AS (
    SELECT
        user_id,
        converted,
        COUNT(*) AS total_touches
    FROM touchpoints
    GROUP BY user_id, converted
)
SELECT
    CASE converted
        WHEN TRUE THEN 'Converted'
        ELSE 'Not Converted'
    END AS segment,
    COUNT(*) AS user_count,
    ROUND(AVG(total_touches), 2) AS avg_touches,
    MIN(total_touches) AS min_touches,
    MAX(total_touches) AS max_touches,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total_touches) AS median_touches,
    PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_touches) AS p95_touches
FROM user_touchpoints
GROUP BY converted
ORDER BY converted DESC;

-- Результат пример:
-- segment        | user_count | avg_touches | min | max | median | p95
-- Converted      | 8,230      | 4.35       | 1   | 25  | 3      | 10
-- Not Converted  | 41,770     | 2.87       | 1   | 18  | 2      | 7

3. Конверсия по первому и последнему касанию

-- First-touch attribution
WITH first_touch AS (
    SELECT
        user_id,
        touchpoint_type AS first_channel,
        converted,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY touchpoint_time ASC) AS touch_rank
    FROM touchpoints
),
first_touch_only AS (
    SELECT
        first_channel,
        converted
    FROM first_touch
    WHERE touch_rank = 1
)
SELECT
    first_channel,
    COUNT(*) AS total_first_touches,
    COUNT(*) FILTER (WHERE converted = TRUE) AS conversions,
    ROUND(100.0 * COUNT(*) FILTER (WHERE converted = TRUE) / COUNT(*), 2) AS conversion_rate,
    ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(), 2) AS channel_share
FROM first_touch_only
GROUP BY first_channel
ORDER BY conversion_rate DESC;

-- Last-touch attribution
WITH last_touch AS (
    SELECT
        user_id,
        touchpoint_type AS last_channel,
        converted,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY touchpoint_time DESC) AS touch_rank
    FROM touchpoints
),
last_touch_only AS (
    SELECT
        last_channel,
        converted
    FROM last_touch
    WHERE touch_rank = 1
)
SELECT
    last_channel,
    COUNT(*) AS total_last_touches,
    COUNT(*) FILTER (WHERE converted = TRUE) AS conversions,
    ROUND(100.0 * COUNT(*) FILTER (WHERE converted = TRUE) / COUNT(*), 2) AS conversion_rate,
    ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(), 2) AS channel_share
FROM last_touch_only
GROUP BY last_channel
ORDER BY conversion_rate DESC;

-- Результат пример (First-touch):
-- channel | total_first_touches | conversions | conversion_rate | channel_share
-- email   | 18,500              | 3,050       | 16.5%          | 28.2%
-- search  | 15,200              | 1,880       | 12.4%          | 23.1%
-- social  | 12,300              | 1,230       | 10.0%          | 18.7%
-- direct  | 20,500              | 2,070       | 10.1%          | 31.2%

-- Результат пример (Last-touch):
-- channel | total_last_touches | conversions | conversion_rate | channel_share
-- email   | 12,100             | 4,200       | 34.7%          | 15.5%
-- search  | 20,300             | 3,100       | 15.3%          | 26.0%
-- social  | 8,500              | 950         | 11.2%          | 10.9%
-- direct  | 37,600             | 300         | 0.8%           | 48.2%

4. Расширенный анализ

Multi-touch Attribution (правильный подход)

-- Linear attribution (каждый канал получает равный вклад)
WITH user_journey_data AS (
    SELECT
        user_id,
        converted,
        touchpoint_type,
        touchpoint_time,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY touchpoint_time) AS touch_number,
        COUNT(*) OVER (PARTITION BY user_id) AS total_touches
    FROM touchpoints
),
linear_attribution AS (
    SELECT
        user_id,
        touchpoint_type,
        converted,
        1.0 / total_touches AS attribution_weight
    FROM user_journey_data
    WHERE converted = TRUE
)
SELECT
    touchpoint_type AS channel,
    COUNT(DISTINCT user_id) AS unique_users,
    ROUND(SUM(attribution_weight), 2) AS attributed_conversions,
    ROUND(100.0 * SUM(attribution_weight) / SUM(SUM(attribution_weight)) OVER(), 2) AS pct_of_total
FROM linear_attribution
GROUP BY touchpoint_type
ORDER BY attributed_conversions DESC;

Time Decay Attribution (последние касания важнее)

-- Weighting by position (последний касание весит 40%, первый 10%)
WITH weighted_attribution AS (
    SELECT
        user_id,
        touchpoint_type,
        converted,
        touch_number,
        total_touches,
        CASE
            WHEN touch_number = 1 THEN 0.10  -- First touch 10%
            WHEN touch_number = total_touches THEN 0.40  -- Last touch 40%
            ELSE 0.50 / (total_touches - 2)  -- Middle touches 50%
        END AS weight
    FROM user_journey_data
    WHERE converted = TRUE
)
SELECT
    touchpoint_type,
    ROUND(SUM(weight), 2) AS attributed_conversions,
    ROUND(100.0 * SUM(weight) / SUM(SUM(weight)) OVER(), 2) AS pct_of_total
FROM weighted_attribution
GROUP BY touchpoint_type
ORDER BY attributed_conversions DESC;

5. Канальное взаимодействие (Co-occurrence)

-- Какие каналы часто встречаются вместе у конвертирующихся пользователей?
WITH converted_users AS (
    SELECT DISTINCT user_id
    FROM touchpoints
    WHERE converted = TRUE
),
user_channels AS (
    SELECT
        t.user_id,
        ARRAY_AGG(DISTINCT t.touchpoint_type) AS channels
    FROM touchpoints t
    INNER JOIN converted_users c ON t.user_id = c.user_id
    GROUP BY t.user_id
)
SELECT
    channels,
    COUNT(*) AS frequency
FROM user_channels
GROUP BY channels
ORDER BY frequency DESC
LIMIT 20;

6. Time-to-Conversion анализ

-- Как долго занимает путь от первого до последнего касания?
WITH journey_duration AS (
    SELECT
        user_id,
        converted,
        MAX(touchpoint_time) - MIN(touchpoint_time) AS journey_duration_hours,
        COUNT(*) AS total_touches
    FROM touchpoints
    GROUP BY user_id, converted
)
SELECT
    CASE
        WHEN journey_duration_hours < 1 THEN '< 1 hour'
        WHEN journey_duration_hours < 24 THEN '1-24 hours'
        WHEN journey_duration_hours < 168 THEN '1-7 days'
        WHEN journey_duration_hours < 720 THEN '1-30 days'
        ELSE '> 30 days'
    END AS duration_bucket,
    COUNT(*) AS user_count,
    COUNT(*) FILTER (WHERE converted = TRUE) AS conversions,
    ROUND(100.0 * COUNT(*) FILTER (WHERE converted = TRUE) / COUNT(*), 2) AS conversion_rate,
    ROUND(AVG(total_touches), 2) AS avg_touches
FROM journey_duration
GROUP BY duration_bucket
ORDER BY MIN(journey_duration_hours);

-- Результат пример:
-- duration_bucket | user_count | conversions | conversion_rate | avg_touches
-- < 1 hour        | 5,200      | 1,560       | 30.0%          | 2.1
-- 1-24 hours      | 8,300      | 2,150       | 25.9%          | 3.2
-- 1-7 days        | 12,400     | 2,380       | 19.2%          | 4.5
-- 1-30 days       | 8,100      | 1,140       | 14.1%          | 5.1
-- > 30 days       | 8,770      | 1,000       | 11.4%          | 6.2

7. Выводы и рекомендации

На основе анализа:

  1. Email → Social → Search — самая эффективная последовательность (15.2%)
  2. Конвертирующиеся пользователи в среднем имеют 4.35 касаний (vs 2.87 не конвертирующихся)
  3. Email как первый касание конвертирует в 16.5%, как последний — в 34.7%
  4. Путь за 1 час имеет конверсию 30%, за > 30 дней — 11.4%

Рекомендации:

  • Приоритизировать email в начале воронки (высокая стартовая конверсия)
  • Направлять социальный трафик как второе касание
  • Усилить ретаргетинг для долгих путей (> 7 дней)
  • Увеличить frequency email касаний (3.2 на пути)

Этот анализ даёт data-driven основу для оптимизации маркетинга и понимания эффективности каналов.