← Назад к вопросам
Как проводить анализ оттока пользователей (churn analysis)?
2.0 Middle🔥 121 комментариев
#SQL и базы данных#Метрики продукта
Комментарии (1)
🐱
claude-haiku-4.5PrepBro AI26 мар. 2026 г.(ред.)
Ответ сгенерирован нейросетью и может содержать ошибки
Анализ оттока пользователей (Churn Analysis)
Churn — показатель, сколько пользователей прекратили использовать продукт в определённый период. Это критическая метрика для SaaS, подписочных и игровых сервисов.
Определение и расчёт Churn
Формула Churn Rate
Churn Rate = (Пользователи ушедшие в периоде / Активные пользователи в начале периода) × 100%
Примеры:
- Месячный churn (Monthly Active Churn): какой % подписчиков отменил подписку в месяц
- Дневной churn (Daily Active Churn): для игр — какой % ежедневных игроков не вернулся на следующий день
SQL: расчёт churn
WITH monthly_cohorts AS (
SELECT
DATE_TRUNC('month', subscription_start_date) as cohort_month,
user_id
FROM subscriptions
WHERE subscription_start_date < NOW()
),
churn_events AS (
SELECT
cohort_month,
COUNT(DISTINCT user_id) as users_at_start,
COUNT(DISTINCT CASE
WHEN cancellation_date IS NOT NULL
AND DATE_TRUNC('month', cancellation_date) = cohort_month + INTERVAL '1 month'
THEN user_id
END) as churned_users
FROM monthly_cohorts
GROUP BY cohort_month
)
SELECT
cohort_month,
users_at_start,
churned_users,
ROUND(100.0 * churned_users / users_at_start, 2) as churn_rate_pct
FROM churn_events
ORDER BY cohort_month DESC;
1. Когортный анализ (Cohort Analysis)
Проследить судьбу пользователей, вошедших в один период (когорта):
WITH user_cohorts AS (
SELECT
user_id,
DATE_TRUNC('month', created_at) as signup_cohort,
created_at as signup_date
FROM users
),
user_activity AS (
SELECT
uc.user_id,
uc.signup_cohort,
DATE_TRUNC('month', e.event_date) as activity_month,
(DATE_TRUNC('month', e.event_date) - DATE_TRUNC('month', uc.created_at)) / INTERVAL '1 month' as months_since_signup
FROM user_cohorts uc
LEFT JOIN events e ON uc.user_id = e.user_id
)
SELECT
signup_cohort,
months_since_signup,
COUNT(DISTINCT user_id) as active_users
FROM user_activity
WHERE months_since_signup >= 0
GROUP BY signup_cohort, months_since_signup
ORDER BY signup_cohort DESC, months_since_signup;
Результат (когортная таблица):
signup_cohort | Month 0 | Month 1 | Month 2 | Month 3 | Month 4 | Month 5
2024-01 | 1000 | 600 | 420 | 294 | 206 | 144
2024-02 | 1200 | 660 | 462 | 323 | 226 | 159
2024-03 | 1100 | 539 | 377 | 264 | 185 | 130
Можно увидеть:
- Какая когорта "лучше" удерживает пользователей
- Тренд: улучшается ли retention со временем
- На каком месяце самый большой отток (обычно месяц 1-3)
2. Анализ факторов оттока (Churn Drivers)
Сегментация по поведению
-- Сравнить поведение оставшихся vs ушедших пользователей
WITH user_stats AS (
SELECT
u.user_id,
CASE WHEN u.cancellation_date IS NOT NULL THEN 'churned' ELSE 'active' END as status,
COUNT(DISTINCT sessions.session_id) as total_sessions,
COUNT(DISTINCT DATE(sessions.session_start)) as active_days,
SUM(sessions.session_duration_sec) as total_time_spent_sec,
COUNT(DISTINCT CASE WHEN events.event_type = 'purchase' THEN events.event_id END) as purchases,
ROUND(EXTRACT(DAY FROM u.cancellation_date - u.created_at), 0) as days_active
FROM users u
LEFT JOIN sessions ON u.user_id = sessions.user_id
LEFT JOIN events ON sessions.session_id = events.session_id
WHERE u.created_at > NOW() - INTERVAL '6 months'
GROUP BY u.user_id, u.cancellation_date, u.created_at
)
SELECT
status,
COUNT(*) as user_count,
ROUND(AVG(total_sessions), 1) as avg_sessions,
ROUND(AVG(active_days), 1) as avg_active_days,
ROUND(AVG(total_time_spent_sec), 0) as avg_time_spent_sec,
ROUND(AVG(purchases), 2) as avg_purchases,
ROUND(AVG(days_active), 0) as avg_days_as_user
FROM user_stats
GROUP BY status;
Результат: insights
status | user_count | avg_sessions | avg_active_days | avg_time_spent_sec | avg_purchases
active | 5000 | 12.5 | 8.2 | 1800 | 2.3
churned | 1200 | 2.1 | 1.5 | 180 | 0.1
Вывод: неактивные пользователи (< 3 сессий, < 2 дней) — группа риска.
3. Модель прогнозирования оттока (Predictive Churn Model)
Создание признаков (features)
WITH user_features AS (
SELECT
u.user_id,
u.created_at as signup_date,
-- Поведение
COUNT(DISTINCT sessions.session_id) as sessions_last_30d,
COUNT(DISTINCT DATE(sessions.session_start)) as active_days_last_30d,
AVG(sessions.session_duration_sec) as avg_session_duration,
-- Взаимодействия
SUM(CASE WHEN events.event_type = 'feature_a' THEN 1 ELSE 0 END) as feature_a_clicks,
MAX(DATE(events.event_date)) as last_activity_date,
ROUND((NOW() - MAX(DATE(events.event_date)))::numeric, 0) as days_since_last_activity,
-- Покупки
COUNT(DISTINCT orders.order_id) as orders_last_30d,
SUM(COALESCE(orders.amount, 0)) as spend_last_30d,
-- Целевая переменная
CASE
WHEN u.cancellation_date IS NOT NULL
AND u.cancellation_date <= NOW()
THEN 1
ELSE 0
END as churned
FROM users u
LEFT JOIN sessions ON u.user_id = sessions.user_id
AND sessions.session_start >= NOW() - INTERVAL '30 days'
LEFT JOIN events ON sessions.session_id = events.session_id
LEFT JOIN orders ON u.user_id = orders.user_id
AND orders.created_at >= NOW() - INTERVAL '30 days'
WHERE u.created_at >= NOW() - INTERVAL '90 days'
AND u.created_at <= NOW() - INTERVAL '30 days' -- historical data only
GROUP BY u.user_id, u.created_at, u.cancellation_date
)
SELECT * FROM user_features;
Экспорт в Python для ML модели
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# Загрузить данные из SQL
df = pd.read_sql_query("""
SELECT * FROM user_features
""", connection)
# Заполнить NULL значения
df = df.fillna(0)
# Разделить на X (признаки) и y (целевая переменная)
X = df[['sessions_last_30d', 'active_days_last_30d', 'avg_session_duration',
'feature_a_clicks', 'days_since_last_activity', 'orders_last_30d', 'spend_last_30d']]
y = df['churned']
# Масштабирование
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Разделение train/test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
# Модель
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)
# Оценка
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy:.3f}")
# Важность признаков
feature_importance = pd.DataFrame({
'feature': X.columns,
'importance': model.feature_importances_
}).sort_values('importance', ascending=False)
print(feature_importance)
# Прогноз для текущих пользователей
current_users = pd.read_sql_query("""
SELECT user_id, sessions_last_30d, active_days_last_30d, ...
FROM user_features
WHERE user_id NOT IN (SELECT user_id FROM users WHERE cancellation_date IS NOT NULL)
""", connection)
current_users_scaled = scaler.transform(current_users[feature_columns])
churn_probability = model.predict_proba(current_users_scaled)[:, 1]
current_users['churn_risk'] = churn_probability
current_users['risk_level'] = pd.cut(churn_probability,
bins=[0, 0.2, 0.5, 1.0],
labels=['low', 'medium', 'high'])
print(current_users.nlargest(10, 'churn_risk')[['user_id', 'churn_risk', 'risk_level']])
4. Анализ причин оттока (Exit Surveys)
-- Таблица оттока с причинами (если есть)
SELECT
u.user_id,
u.cancellation_date,
EXTRACT(DAY FROM u.cancellation_date - u.created_at) as customer_lifetime_days,
cs.cancellation_reason,
COUNT(CASE WHEN cs.reason = 'price' THEN 1 END) as reason_price,
COUNT(CASE WHEN cs.reason = 'not_needed' THEN 1 END) as reason_not_needed,
COUNT(CASE WHEN cs.reason = 'poor_performance' THEN 1 END) as reason_performance,
COUNT(CASE WHEN cs.reason = 'switched_competitor' THEN 1 END) as reason_competitor
FROM users u
LEFT JOIN cancellation_surveys cs ON u.user_id = cs.user_id
WHERE u.cancellation_date > NOW() - INTERVAL '90 days'
GROUP BY u.user_id, u.cancellation_date, u.created_at, cs.cancellation_reason;
5. Винтовая диаграмма (Retention Chart)
-- Отточительный график: какой % когорты остаётся в каждый месяц
WITH cohorts AS (
SELECT
user_id,
DATE_TRUNC('month', created_at)::date as cohort_month
FROM users
),
retention AS (
SELECT
c.cohort_month,
DATE_TRUNC('month', u.last_activity_date)::date as activity_month,
(DATE_TRUNC('month', u.last_activity_date)::date - c.cohort_month) / INTERVAL '30 days' as months_since_signup,
COUNT(DISTINCT c.user_id) as retained_users
FROM cohorts c
JOIN users u ON c.user_id = u.user_id
GROUP BY c.cohort_month, DATE_TRUNC('month', u.last_activity_date), months_since_signup
),
cohort_size AS (
SELECT
cohort_month,
COUNT(DISTINCT user_id) as cohort_size
FROM cohorts
GROUP BY cohort_month
)
SELECT
r.cohort_month,
r.months_since_signup,
r.retained_users,
cs.cohort_size,
ROUND(100.0 * r.retained_users / cs.cohort_size, 1) as retention_pct
FROM retention r
JOIN cohort_size cs ON r.cohort_month = cs.cohort_month
WHERE r.months_since_signup >= 0
ORDER BY r.cohort_month DESC, r.months_since_signup;
6. Метрики для отслеживания
| Метрика | Формула | Интерпретация |
|---|---|---|
| Monthly Churn Rate | (ушедшие в месяц / активные в начале) × 100% | % подписчиков отменил подписку |
| Retention Rate | 100% - Churn Rate | % пользователей остался |
| Customer Lifetime (LTV) | ARPU / Monthly Churn Rate | Сколько месяцев пользователь платит |
| Payback Period | Customer Acquisition Cost / Monthly ARPU | За сколько месяцев окупится привлечение |
| D-Day Retention | % пользователей активен на день D | Удержание на конкретный день |
7. Рекомендации по снижению оттока
-- Найти пользователей в группе риска для проактивного действия
SELECT
u.user_id,
u.email,
u.created_at,
ROUND((NOW() - MAX(events.event_date))::numeric / 1, 0) as days_since_activity,
COUNT(DISTINCT sessions.session_id) as total_sessions,
-- Если не было активности 14+ дней И было < 5 сессий
CASE
WHEN (NOW() - MAX(events.event_date))::numeric >= 14
AND COUNT(DISTINCT sessions.session_id) < 5
THEN 'at_risk_inactive'
WHEN (NOW() - MAX(events.event_date))::numeric >= 7
AND COUNT(DISTINCT sessions.session_id) < 2
THEN 'at_risk_new_user'
WHEN (NOW() - MAX(events.event_date))::numeric >= 30
THEN 'dormant'
ELSE 'active'
END as risk_segment
FROM users u
LEFT JOIN sessions ON u.user_id = sessions.user_id
LEFT JOIN events ON sessions.session_id = events.session_id
WHERE u.cancellation_date IS NULL
GROUP BY u.user_id, u.email, u.created_at
HAVING (NOW() - MAX(events.event_date))::numeric >= 7 -- не активны 7+ дней
ORDER BY days_since_activity DESC;
Чеклист анализа оттока
- ☑ Определить определение churn (отмена подписки / отсутствие активности 30+ дней)
- ☑ Рассчитать churn rate по месяцам
- ☑ Провести когортный анализ retention
- ☑ Найти различия между churned и active пользователями
- ☑ Построить predictive churn model
- ☑ Определить группы риска для проактивных действий
- ☑ Собрать feedback от ушедших пользователей (survey)
- ☑ Отслеживать корреляцию: какой feature снижает churn
- ☑ A/B тестировать меры удержания (win-back campaigns, price adjustments)
- ☑ Мониторить trend: улучшается ли или ухудшается retention