Как сделаешь сервис устойчивым к перегрузкам

Question

claude-haiku-4.5 · Accepted Answer

## Обеспечение устойчивости сервиса к перегрузкам Устойчивость к перегрузкам (resilience) - это способность системы продолжать работу при высоких нагрузках и сбоях. Это критический аспект разработки высоконагруженных приложений. ### 1. Rate Limiting - ограничение частоты запросов ```java // Использование Bucket4j для Rate Limiting import io.github.bucket4j.*; @Configuration public class RateLimitConfig { @Bean public Bucket createBucket() { Bandwidth limit = Bandwidth.classic(100, Refill.intervally(100, Duration.ofMinutes(1))); return Bucket4j.builder() .addLimit(limit) .build(); } } @RestController @RequestMapping("/api/v1/users") public class UserController { @Autowired private Bucket bucket; @GetMapping("/{id}") public ResponseEntity getUser(@PathVariable Long id) { if (!bucket.tryConsume(1)) { return ResponseEntity .status(HttpStatus.TOO_MANY_REQUESTS) .body("Rate limit exceeded"); } return ResponseEntity.ok(userService.getUserById(id)); } } // Spring Cloud Config с параметризацией @Component public class AdvancedRateLimiting { @Value("${rate.limit.requests:100}") private int maxRequests; @Value("${rate.limit.window:60000}") private long windowMs; private final Map limiters = new ConcurrentHashMap<>(); public boolean allowRequest(String userId) { RateLimiter limiter = limiters.computeIfAbsent( userId, k -> RateLimiter.create(maxRequests / (windowMs / 1000.0)) ); return limiter.tryAcquire(); } } ``` ### 2. Circuit Breaker - прерывание цепи при сбоях ```java // Использование Resilience4j @Configuration public class CircuitBreakerConfig { @Bean public CircuitBreakerRegistry circuitBreakerRegistry() { return CircuitBreakerRegistry.ofDefaults(); } } @Service public class PaymentServiceWithCircuitBreaker { private final CircuitBreakerRegistry registry; private final PaymentAPI paymentAPI; @Autowired public PaymentServiceWithCircuitBreaker( CircuitBreakerRegistry registry, PaymentAPI paymentAPI ) { this.registry = registry; this.paymentAPI = paymentAPI; } public PaymentResult processPayment(Payment payment) { CircuitBreaker breaker = registry.circuitBreaker( "payment-service", CircuitBreakerConfig.custom() .failureRateThreshold(50.0f) // 50% ошибок .waitDurationInOpenState(Duration.ofSeconds(30)) .permittedNumberOfCallsInHalfOpenState(3) .slowCallRateThreshold(50.0f) .slowCallDurationThreshold(Duration.ofSeconds(5)) .build() ); try { return breaker.executeSupplier(() -> paymentAPI.charge(payment)); } catch (CallNotPermittedException e) { logger.warn("Circuit breaker is open for payment"); return PaymentResult.RETRY_LATER; } } } // Состояния Circuit Breaker: // CLOSED - нормальная работа // OPEN - блокирует все запросы // HALF_OPEN - пробует восстановление ``` ### 3. Timeout и Retry логика ```java @Service public class ResilientService { @Autowired private RestTemplate restTemplate; @Retry(value = 3, backoff = @Backoff(delay = 1000)) @Timeout(value = 5000, unit = ChronoUnit.MILLIS) public String callExternalAPI(String url) { try { return restTemplate.getForObject(url, String.class); } catch (ResourceAccessException e) { // Автоматический retry с экспоненциальной задержкой throw e; } } // Альтернатива с Resilience4j @Bean public Retry externalApiRetry() { return Retry.of("external-api", RetryConfig.custom() .maxAttempts(3) .intervalFunction(IntervalFunction.ofExponentialBackoff(1000, 2)) .build() ); } } ``` ### 4. Bulk Heading - изоляция потоков ```java // Разные потоки для разных операций @Configuration public class ThreadPoolConfig { @Bean(name = "paymentExecutor") public Executor paymentExecutor() { ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); executor.setCorePoolSize(20); executor.setMaxPoolSize(50); executor.setQueueCapacity(500); executor.setThreadNamePrefix("payment-"); executor.initialize(); return executor; } @Bean(name = "notificationExecutor") public Executor notificationExecutor() { ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); executor.setCorePoolSize(10); executor.setMaxPoolSize(30); executor.setQueueCapacity(200); executor.setThreadNamePrefix("notification-"); executor.initialize(); return executor; } } @Service public class BulkHeadingService { @Async("paymentExecutor") public CompletableFuture processPayment(Payment payment) { // Исполняется в paymentExecutor потоках return CompletableFuture.completedFuture(paymentAPI.charge(payment)); } @Async("notificationExecutor") public void sendNotification(Notification notification) { // Исполняется в notificationExecutor потоках // Не конкурирует с платежами } } ``` ### 5. Load Shedding - отброс низкоприоритетных запросов ```java @Component public class LoadShedding { private final AtomicInteger activeRequests = new AtomicInteger(0); private final int maxLoad = 1000; public boolean canAcceptRequest(RequestPriority priority) { int current = activeRequests.get(); if (current < maxLoad) { return true; // Приём запроса } // При перегрузке - отбираем низкоприоритетные запросы if (priority == RequestPriority.HIGH) { return true; // HIGH priority - пропускаем всегда } return false; // LOW priority - отбрасываем } } @RestController public class ResilientController { @Autowired private LoadShedding loadShedding; @GetMapping("/important-data") public ResponseEntity getImportantData() { if (!loadShedding.canAcceptRequest(RequestPriority.HIGH)) { return ResponseEntity .status(HttpStatus.SERVICE_UNAVAILABLE) .body("Service overloaded"); } // Обработка запроса return ResponseEntity.ok(data); } } enum RequestPriority { HIGH, MEDIUM, LOW } ``` ### 6. Асинхронная обработка с очередями ```java @Configuration public class QueueConfig { @Bean public Queue orderQueue() { return new LinkedBlockingQueue<>(10000); } } @Service public class OrderProcessing { private final Queue orderQueue; @PostMapping("/orders") public ResponseEntity submitOrder(@RequestBody Order order) { if (!orderQueue.offer(order)) { return ResponseEntity .status(HttpStatus.SERVICE_UNAVAILABLE) .body("Queue is full, try again later"); } return ResponseEntity.accepted().build(); } @Scheduled(fixedRate = 100) public void processOrders() { Order order = orderQueue.poll(); if (order != null) { try { orderService.process(order); } catch (Exception e) { // Возвращаем в очередь orderQueue.offer(order); logger.error("Failed to process order", e); } } } } ``` ### 7. Кеширование для снижения нагрузки ```java @Service @CacheConfig(cacheNames = "users") public class UserServiceWithCaching { @Cacheable(value = "users", key = "#id", unless = "#result == null") public User getUserById(Long id) { // Кешируется на 5 минут return userRepository.findById(id).orElse(null); } @CacheEvict(value = "users", key = "#user.id") public void updateUser(User user) { userRepository.save(user); } // Кеш с Redis для распределённой системы @Cacheable( value = "user-profiles", key = "#id", cacheManager = "redisCache" ) public UserProfile getUserProfile(Long id) { return loadUserProfile(id); } } ``` ### 8. Мониторинг и метрики ```java @Configuration public class MetricsConfig { @Bean public MeterBinder systemMetrics() { return (registry) -> { // Мониторим CPU, память, диск new FileDescriptorMetrics().bindTo(registry); new ProcessorMetrics().bindTo(registry); }; } } @RestController public class MonitoredController { @Autowired private MeterRegistry meterRegistry; @PostMapping("/process") public ResponseEntity processData(@RequestBody Data data) { Timer.Sample sample = Timer.start(meterRegistry); try { Result result = service.process(data); meterRegistry.counter("processing.success").increment(); return ResponseEntity.ok(result); } catch (Exception e) { meterRegistry.counter("processing.error").increment(); throw e; } finally { sample.stop(Timer.builder("processing.time") .publishPercentiles(0.5, 0.95, 0.99) .register(meterRegistry)); } } } ``` ### 9. Graceful Shutdown ```java @Configuration public class GracefulShutdownConfig { @Bean public TomcatServletWebServerFactory containerFactory() { TomcatServletWebServerFactory factory = new TomcatServletWebServerFactory(); factory.addConnectorCustomizers(connector -> connector.setProperty("connectionTimeout", "20000") ); return factory; } } // application.yml # server: # shutdown: graceful # tomcat: # threads: # max: 200 # min-spare: 10 # max-connections: 10000 # accept-count: 100 @Component public class GracefulShutdown { @PreDestroy public void shutdown() { logger.info("Application shutting down gracefully..."); // Завершение текущих запросов // Закрытие подключений к БД } } ``` ### 10. Пример полной конфигурации ```yaml # application.yml spring: application: name: resilient-service cache: type: redis redis: time-to-live: 300000 task: execution: pool: core-size: 20 max-size: 50 queue-capacity: 500 server: shutdown: graceful servlet: context-path: /api/v1 tomcat: threads: max: 200 min-spare: 10 max-connections: 5000 accept-count: 100 management: endpoints: web: exposure: include: health,metrics,prometheus metrics: export: prometheus: enabled: true resilience4j: circuitbreaker: instances: payment-service: registerHealthIndicator: true slidingWindowSize: 10 failureRateThreshold: 50 waitDurationInOpenState: 30000 retry: instances: external-api: maxAttempts: 3 waitDuration: 1000 timelimiter: instances: default: timeoutDuration: 5000 ``` ### Ключевые стратегии 1. **Rate Limiting** - ограничение частоты запросов 2. **Circuit Breaker** - прерывание при сбоях 3. **Timeout & Retry** - повторные попытки с таймаутом 4. **Bulkheading** - изоляция ресурсов 5. **Load Shedding** - отброс низкоприоритетных запросов 6. **Асинхронная обработка** - очереди для неспешных операций 7. **Кеширование** - снижение нагрузки на БД 8. **Мониторинг** - видимость в состояние системы 9. **Graceful Shutdown** - корректное завершение 10. **Правильная конфигурация** - потоки, соединения, очереди Устойчивость к перегрузкам требует комплексного подхода и постоянного мониторинга!

Как сделаешь сервис устойчивым к перегрузкам

Комментарии (1)

Обеспечение устойчивости сервиса к перегрузкам

1. Rate Limiting - ограничение частоты запросов

2. Circuit Breaker - прерывание цепи при сбоях

3. Timeout и Retry логика

4. Bulk Heading - изоляция потоков

5. Load Shedding - отброс низкоприоритетных запросов

6. Асинхронная обработка с очередями

7. Кеширование для снижения нагрузки

8. Мониторинг и метрики

9. Graceful Shutdown

10. Пример полной конфигурации

Ключевые стратегии