openzeppelin_relayer/services/provider/
retry.rs

1//! # RPC Provider Retry Module
2//!
3//! This module implements retry mechanisms for RPC calls with exponential backoff,
4//! jitter, and provider failover capabilities.
5//!
6//! ## Key Features
7//!
8//! - **Exponential Backoff**: Gradually increases retry delays to avoid overwhelming services
9//! - **Randomized Jitter**: Prevents retry storms by randomizing delay times
10//! - **Provider Failover**: Automatically switches to alternative providers when one fails
11//! - **Configurable Behavior**: Customizable retry counts, delays, and failover strategies
12//!
13//! ## Main Components
14//!
15//! - [`RetryConfig`]: Configuration parameters for retry behavior
16//! - [`retry_rpc_call`]: Core function that handles retry logic with provider failover
17//! - [`calculate_retry_delay`]: Function that calculates delay with exponential backoff and jitter
18//!
19//! ## Usage
20//!
21//! The retry mechanism works with any RPC provider type and automatically handles
22//! errors, maximizing the chances of successful operations.
23use rand::Rng;
24use std::future::Future;
25use std::time::Duration;
26
27use super::rpc_selector::RpcSelector;
28use crate::config::ServerConfig;
29use crate::constants::RETRY_JITTER_PERCENT;
30
31/// Calculate the retry delay using exponential backoff with jitter
32///
33/// # Arguments
34/// * `attempt` - The retry attempt number (0 = first attempt)
35/// * `base_delay_ms` - Base delay in milliseconds
36/// * `max_delay_ms` - Maximum delay in milliseconds
37///
38/// # Returns
39/// Duration to wait before the next retry
40pub fn calculate_retry_delay(attempt: u8, base_delay_ms: u64, max_delay_ms: u64) -> Duration {
41    if base_delay_ms == 0 || max_delay_ms == 0 {
42        return Duration::from_millis(0);
43    }
44
45    // Limit the max delay to 2^63 to avoid overflow. (u64::MAX is 2^64 - 1)
46    let exp_backoff = if attempt > 63 {
47        max_delay_ms
48    } else {
49        // 1u64 << attempt
50        let multiplier = 1u64.checked_shl(attempt as u32).unwrap_or(u64::MAX);
51        base_delay_ms.saturating_mul(multiplier)
52    };
53
54    let delay_ms = exp_backoff.min(max_delay_ms);
55
56    apply_jitter(delay_ms)
57}
58
59/// Applies jitter to a delay value based on RETRY_JITTER_PERCENT
60///
61/// This creates a randomized delay within the range:
62/// delay_ms × (1 ± RETRY_JITTER_PERCENT)
63///
64/// # Arguments
65/// * `delay_ms` - The base delay in milliseconds to apply jitter to
66///
67/// # Returns
68/// A Duration with jitter applied, guaranteed to be within
69/// the range [delay_ms × (1-RETRY_JITTER_PERCENT), delay_ms × (1+RETRY_JITTER_PERCENT)]
70fn apply_jitter(delay_ms: u64) -> Duration {
71    if delay_ms == 0 {
72        return Duration::from_millis(0);
73    }
74
75    // Calculate jitter range (how much we can add/subtract)
76    let jitter_range = (delay_ms as f64 * RETRY_JITTER_PERCENT).floor() as u64;
77
78    if jitter_range == 0 {
79        return Duration::from_millis(delay_ms);
80    }
81
82    let mut rng = rand::rng();
83    let jitter_value = rng.random_range(0..=jitter_range);
84
85    let final_delay = if rng.random_bool(0.5) {
86        delay_ms.saturating_add(jitter_value)
87    } else {
88        delay_ms.saturating_sub(jitter_value)
89    };
90
91    Duration::from_millis(final_delay)
92}
93
94/// Internal error type to distinguish specific retry outcomes
95#[derive(Debug)]
96enum InternalRetryError<E> {
97    NonRetriable(E),
98    RetriesExhausted(E),
99}
100
101/// Configuration for retry behavior
102#[derive(Debug, Clone)]
103pub struct RetryConfig {
104    /// Maximum number of retry attempts per provider
105    pub max_retries: u8,
106    /// Maximum number of provider failovers to attempt
107    pub max_failovers: u8,
108    /// Base delay in milliseconds for exponential backoff
109    pub base_delay_ms: u64,
110    /// Maximum delay in milliseconds for exponential backoff
111    pub max_delay_ms: u64,
112}
113
114impl RetryConfig {
115    /// Create a new RetryConfig with specified values
116    ///
117    /// # Arguments
118    /// * `max_retries` - Maximum number of retry attempts per provider (0-255)
119    /// * `max_failovers` - Maximum number of provider failovers (0-255)
120    /// * `base_delay_ms` - Base delay in milliseconds for exponential backoff
121    /// * `max_delay_ms` - Maximum delay in milliseconds (should be >= base_delay_ms)
122    ///
123    /// # Panics
124    /// * If `max_delay_ms` < `base_delay_ms` when both are non-zero
125    /// * If only one of the delay values is zero (both should be zero or both non-zero)
126    pub fn new(max_retries: u8, max_failovers: u8, base_delay_ms: u64, max_delay_ms: u64) -> Self {
127        // Validate delay consistency: both zero or both non-zero
128        if (base_delay_ms == 0) != (max_delay_ms == 0) {
129            panic!(
130                "Delay values must be consistent: both zero (no delays) or both non-zero. Got base_delay_ms={}, max_delay_ms={}",
131                base_delay_ms, max_delay_ms
132            );
133        }
134
135        // Validate delay ordering when both are non-zero
136        if base_delay_ms > 0 && max_delay_ms > 0 && max_delay_ms < base_delay_ms {
137            panic!(
138                "max_delay_ms ({}) must be >= base_delay_ms ({}) when both are non-zero",
139                max_delay_ms, base_delay_ms
140            );
141        }
142
143        Self {
144            max_retries,
145            max_failovers,
146            base_delay_ms,
147            max_delay_ms,
148        }
149    }
150
151    /// Create a RetryConfig from environment variables
152    pub fn from_env() -> Self {
153        let config = ServerConfig::from_env();
154        Self::new(
155            config.provider_max_retries,
156            config.provider_max_failovers,
157            config.provider_retry_base_delay_ms,
158            config.provider_retry_max_delay_ms,
159        )
160    }
161}
162
163/// Generic RPC call retry function that handles retrying operations with exponential backoff
164/// and provider failover.
165///
166/// This function will:
167/// 1. Get a provider using the provider_initializer
168/// 2. Try the operation up to provider_max_retries times with that provider
169///    (retrying only on retriable errors)
170/// 3. If all retries fail or a non-retriable error occurs, mark the provider as failed and get a new provider
171/// 4. Continue up to provider_max_failovers times (capped by total available providers)
172///
173/// # Type Parameters
174/// * `P` - The provider type
175/// * `T` - The result type of the operation
176/// * `E` - The error type that implements From<String>
177/// * `F` - The function type that takes a provider and returns a future
178/// * `Fut` - The future type returned by the operation
179/// * `I` - The provider initializer function type
180///
181/// # Arguments
182/// * `selector` - RPC selector for managing and selecting providers
183/// * `operation_name` - Name of the operation for logging
184/// * `is_retriable_error` - Function that determines if an error is retriable
185/// * `should_mark_provider_failed` - Function that determines if an error should mark the provider as failed
186/// * `provider_initializer` - Function that initializes a provider from a URL
187/// * `operation` - A future-returning closure that makes the RPC call
188/// * `config` - Optional configuration parameters for retry behavior
189///
190/// # Returns
191/// * The result of the operation if successful, or an error
192pub async fn retry_rpc_call<P, T, E, F, Fut, I>(
193    selector: &RpcSelector,
194    operation_name: &str,
195    is_retriable_error: impl Fn(&E) -> bool,
196    should_mark_provider_failed: impl Fn(&E) -> bool,
197    provider_initializer: I,
198    operation: F,
199    config: Option<RetryConfig>,
200) -> Result<T, E>
201where
202    P: Clone,
203    E: std::fmt::Display + From<String>,
204    F: Fn(P) -> Fut,
205    Fut: Future<Output = Result<T, E>>,
206    I: Fn(&str) -> Result<P, E>,
207{
208    let config = config.unwrap_or_else(RetryConfig::from_env);
209    let total_providers = selector.provider_count();
210    let max_failovers = std::cmp::min(config.max_failovers as usize, total_providers - 1);
211    let mut failover_count = 0;
212    let mut total_attempts = 0;
213    let mut last_error = None;
214
215    log::debug!(
216        "Starting RPC call '{}' with max_retries={}, max_failovers={}, available_providers={}",
217        operation_name,
218        config.max_retries,
219        max_failovers,
220        total_providers
221    );
222
223    while failover_count <= max_failovers && selector.available_provider_count() > 0 {
224        // Try to get and initialize a provider
225        let (provider, provider_url) =
226            match get_provider(selector, operation_name, &provider_initializer) {
227                Ok((provider, url)) => (provider, url),
228                Err(e) => {
229                    last_error = Some(e);
230                    failover_count += 1;
231
232                    // If we've exhausted all providers or reached max failovers, stop
233                    if failover_count > max_failovers || selector.available_provider_count() == 0 {
234                        break;
235                    }
236
237                    // Mark current as failed to get a different one next time
238                    selector.mark_current_as_failed();
239                    continue;
240                }
241            };
242
243        log::debug!(
244            "Selected provider: {} for operation '{}'",
245            provider_url,
246            operation_name
247        );
248
249        // Try the operation with this provider with retries
250        match try_with_retries(
251            &provider,
252            &provider_url,
253            operation_name,
254            &operation,
255            &is_retriable_error,
256            &config,
257            &mut total_attempts,
258        )
259        .await
260        {
261            Ok(result) => {
262                log::debug!(
263                    "RPC call '{}' succeeded with provider '{}' (total attempts: {})",
264                    operation_name,
265                    provider_url,
266                    total_attempts
267                );
268                return Ok(result);
269            }
270            Err(internal_err) => {
271                match internal_err {
272                    InternalRetryError::NonRetriable(original_err) => {
273                        // Check if this non-retriable error should mark the provider as failed
274                        if should_mark_provider_failed(&original_err)
275                            && selector.available_provider_count() > 1
276                        {
277                            log::warn!(
278                                "Non-retriable error '{}' for provider '{}' on operation '{}' should mark provider as failed. Marking as failed and switching to next provider...",
279                                original_err,
280                                provider_url,
281                                operation_name
282                            );
283                            selector.mark_current_as_failed();
284                        }
285                        return Err(original_err);
286                    }
287                    InternalRetryError::RetriesExhausted(original_err) => {
288                        last_error = Some(original_err);
289
290                        // If retries are exhausted, we always intend to mark the provider as failed,
291                        // unless it's the last available one.
292                        if selector.available_provider_count() > 1 {
293                            log::warn!(
294                                "All {} retry attempts failed for provider '{}' on operation '{}'. Error: {}. Marking as failed and switching to next provider (failover {}/{})...",
295                                config.max_retries,
296                                provider_url,
297                                operation_name,
298                                last_error.as_ref().unwrap(),
299                                failover_count + 1,
300                                max_failovers
301                            );
302                            selector.mark_current_as_failed();
303                            failover_count += 1;
304                        } else {
305                            log::warn!(
306                                "All {} retry attempts failed for provider '{}' on operation '{}'. Error: {}. This is the last available provider, not marking as failed.",
307                                config.max_retries,
308                                provider_url,
309                                operation_name,
310                                last_error.as_ref().unwrap()
311                            );
312                            break;
313                        }
314                    }
315                }
316            }
317        }
318    }
319
320    let error_message = match &last_error {
321        Some(e) => format!(
322            "RPC call '{}' failed after {} total attempts across {} providers: {}",
323            operation_name,
324            total_attempts,
325            failover_count,
326            e
327        ),
328        None => format!(
329            "RPC call '{}' failed after {} total attempts across {} providers with no error details",
330            operation_name,
331            total_attempts,
332            failover_count
333        )
334    };
335
336    log::error!("{}", error_message);
337
338    // If we're here, all retries with all attempted providers failed
339    Err(last_error.unwrap_or_else(|| E::from(error_message)))
340}
341
342/// Helper function to get and initialize a provider
343fn get_provider<P, E, I>(
344    selector: &RpcSelector,
345    operation_name: &str,
346    provider_initializer: &I,
347) -> Result<(P, String), E>
348where
349    E: std::fmt::Display + From<String>,
350    I: Fn(&str) -> Result<P, E>,
351{
352    // Get the next provider URL from the selector
353    let provider_url = selector
354        .get_client(|url| Ok::<_, eyre::Report>(url.to_string()))
355        .map_err(|e| {
356            let err_msg = format!("Failed to get provider URL for {}: {}", operation_name, e);
357            log::warn!("{}", err_msg);
358            E::from(err_msg)
359        })?;
360
361    // Initialize the provider
362    let provider = provider_initializer(&provider_url).map_err(|e| {
363        log::warn!(
364            "Failed to initialize provider '{}' for operation '{}': {}",
365            provider_url,
366            operation_name,
367            e
368        );
369        e
370    })?;
371
372    Ok((provider, provider_url))
373}
374
375/// Helper function to try an operation with retries
376async fn try_with_retries<P, T, E, F, Fut>(
377    provider: &P,
378    provider_url: &str,
379    operation_name: &str,
380    operation: &F,
381    is_retriable_error: &impl Fn(&E) -> bool,
382    config: &RetryConfig,
383    total_attempts: &mut usize,
384) -> Result<T, InternalRetryError<E>>
385where
386    P: Clone,
387    E: std::fmt::Display + From<String>,
388    F: Fn(P) -> Fut,
389    Fut: Future<Output = Result<T, E>>,
390{
391    // For max_retries of 0 or 1, we don't retry - just attempt once
392    if config.max_retries <= 1 {
393        *total_attempts += 1;
394        return operation(provider.clone())
395            .await
396            .map_err(InternalRetryError::NonRetriable);
397    }
398
399    for current_attempt_idx in 0..config.max_retries {
400        *total_attempts += 1;
401
402        match operation(provider.clone()).await {
403            Ok(result) => {
404                log::debug!(
405                    "RPC call '{}' succeeded with provider '{}' (attempt {}/{}, total attempts: {})",
406                    operation_name,
407                    provider_url,
408                    current_attempt_idx + 1,
409                    config.max_retries,
410                    *total_attempts
411                );
412                return Ok(result);
413            }
414            Err(e) => {
415                let is_retriable = is_retriable_error(&e);
416                let is_last_attempt = current_attempt_idx + 1 >= config.max_retries;
417
418                log::warn!(
419                    "RPC call '{}' failed with provider '{}' (attempt {}/{}): {} [{}]",
420                    operation_name,
421                    provider_url,
422                    current_attempt_idx + 1,
423                    config.max_retries,
424                    e,
425                    if is_retriable {
426                        "retriable"
427                    } else {
428                        "non-retriable"
429                    }
430                );
431
432                if !is_retriable {
433                    return Err(InternalRetryError::NonRetriable(e));
434                }
435
436                if is_last_attempt {
437                    log::warn!(
438                        "All {} retries exhausted for RPC call '{}' with provider '{}'. Last error: {}",
439                        config.max_retries, operation_name, provider_url, e
440                    );
441                    return Err(InternalRetryError::RetriesExhausted(e));
442                }
443
444                // Calculate and apply delay before next retry
445                let delay = calculate_retry_delay(
446                    current_attempt_idx + 1,
447                    config.base_delay_ms,
448                    config.max_delay_ms,
449                );
450
451                log::debug!(
452                    "Retrying RPC call '{}' with provider '{}' after {:?} delay (attempt {}/{})",
453                    operation_name,
454                    provider_url,
455                    delay,
456                    current_attempt_idx + 2,
457                    config.max_retries
458                );
459                tokio::time::sleep(delay).await;
460            }
461        }
462    }
463
464    unreachable!(
465        "Loop should have returned if max_retries > 1; max_retries=0 or 1 case is handled above."
466    );
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472    use crate::models::RpcConfig;
473    use lazy_static::lazy_static;
474    use std::cmp::Ordering;
475    use std::env;
476    use std::sync::atomic::{AtomicU8, Ordering as AtomicOrdering};
477    use std::sync::Arc;
478    use std::sync::Mutex;
479
480    // Use a mutex to ensure tests don't run in parallel when modifying env vars
481    lazy_static! {
482        static ref RETRY_TEST_ENV_MUTEX: Mutex<()> = Mutex::new(());
483    }
484
485    // Define a simple error type for testing
486    #[derive(Debug, Clone)]
487    struct TestError(String);
488
489    impl std::fmt::Display for TestError {
490        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
491            write!(f, "TestError: {}", self.0)
492        }
493    }
494
495    impl From<String> for TestError {
496        fn from(msg: String) -> Self {
497            TestError(msg)
498        }
499    }
500
501    // Helper struct to ensure environment variables are reset after tests
502    struct EnvGuard {
503        keys: Vec<String>,
504        old_values: Vec<Option<String>>,
505    }
506
507    impl EnvGuard {
508        fn new() -> Self {
509            Self {
510                keys: Vec::new(),
511                old_values: Vec::new(),
512            }
513        }
514
515        fn set(&mut self, key: &str, value: &str) {
516            let old_value = env::var(key).ok();
517            self.keys.push(key.to_string());
518            self.old_values.push(old_value);
519            env::set_var(key, value);
520        }
521    }
522
523    impl Drop for EnvGuard {
524        fn drop(&mut self) {
525            for i in 0..self.keys.len() {
526                match &self.old_values[i] {
527                    Some(value) => env::set_var(&self.keys[i], value),
528                    None => env::remove_var(&self.keys[i]),
529                }
530            }
531        }
532    }
533
534    // Set up test environment variables
535    fn setup_test_env() -> EnvGuard {
536        let mut guard = EnvGuard::new();
537        guard.set("API_KEY", "fake-api-key-for-tests-01234567890123456789");
538        guard.set("PROVIDER_MAX_RETRIES", "2");
539        guard.set("PROVIDER_MAX_FAILOVERS", "1");
540        guard.set("PROVIDER_RETRY_BASE_DELAY_MS", "1");
541        guard.set("PROVIDER_RETRY_MAX_DELAY_MS", "5");
542        guard.set("REDIS_URL", "redis://localhost:6379");
543        guard.set(
544            "RELAYER_PRIVATE_KEY",
545            "0x1234567890123456789012345678901234567890123456789012345678901234",
546        );
547        guard
548    }
549
550    #[test]
551    fn test_calculate_retry_delay() {
552        // Test exponential backoff pattern
553        let base_delay_ms = 10;
554        let max_delay_ms = 10000;
555
556        let expected_backoffs = [
557            10,  // 10 * 2^0
558            20,  // 10 * 2^1
559            40,  // 10 * 2^2
560            80,  // 10 * 2^3
561            160, // 10 * 2^4
562            320, // 10 * 2^5
563        ];
564
565        for (i, expected) in expected_backoffs.iter().enumerate() {
566            let attempt = i as u8;
567            let delay = calculate_retry_delay(attempt, base_delay_ms, max_delay_ms);
568
569            let min_expected = (*expected as f64 * (1.0 - RETRY_JITTER_PERCENT)).floor() as u128;
570            let max_expected = (*expected as f64 * (1.0 + RETRY_JITTER_PERCENT)).ceil() as u128;
571
572            assert!(
573                (min_expected..=max_expected).contains(&delay.as_millis()),
574                "Delay {} outside expected range {}..={}",
575                delay.as_millis(),
576                min_expected,
577                max_expected
578            );
579        }
580
581        // Test max delay capping
582        let base_delay_ms = 100;
583        let max_delay_ms = 1000;
584        let delay = calculate_retry_delay(4, base_delay_ms, max_delay_ms);
585        let min_expected = (max_delay_ms as f64 * (1.0 - RETRY_JITTER_PERCENT)).floor() as u128;
586        let max_expected = (max_delay_ms as f64 * (1.0 + RETRY_JITTER_PERCENT)).ceil() as u128;
587        assert!(
588            (min_expected..=max_expected).contains(&delay.as_millis()),
589            "Delay {} outside expected range {}..={}",
590            delay.as_millis(),
591            min_expected,
592            max_expected
593        );
594
595        // Test edge cases
596        assert_eq!(calculate_retry_delay(5, 0, 1000).as_millis(), 0);
597        assert_eq!(calculate_retry_delay(5, 100, 0).as_millis(), 0);
598        assert_eq!(calculate_retry_delay(5, 0, 0).as_millis(), 0);
599
600        // Test with max attempt (u8::MAX)
601        let max_delay_ms = 10_000;
602        let delay = calculate_retry_delay(u8::MAX, 1, max_delay_ms);
603        assert!(
604            delay.as_millis()
605                <= (max_delay_ms as f64 * (1.0 + RETRY_JITTER_PERCENT)).ceil() as u128
606        );
607    }
608
609    #[test]
610    fn test_apply_jitter() {
611        let base_delay = 1000;
612        let jittered = apply_jitter(base_delay);
613
614        let min_expected = (base_delay as f64 * (1.0 - RETRY_JITTER_PERCENT)).floor() as u64;
615        let max_expected = (base_delay as f64 * (1.0 + RETRY_JITTER_PERCENT)).ceil() as u64;
616
617        assert!(
618            (min_expected as u128..=max_expected as u128).contains(&jittered.as_millis()),
619            "Jittered value {} outside expected range {}..={}",
620            jittered.as_millis(),
621            min_expected,
622            max_expected
623        );
624
625        // Test edge cases
626        assert_eq!(apply_jitter(0).as_millis(), 0);
627
628        // Test small values where jitter might be 0
629        for delay in 1..5 {
630            let jittered = apply_jitter(delay);
631            let jitter_range = (delay as f64 * RETRY_JITTER_PERCENT).floor() as u64;
632
633            if jitter_range == 0 {
634                assert_eq!(jittered.as_millis(), delay as u128);
635            } else {
636                let min_expected = delay.saturating_sub(jitter_range);
637                let max_expected = delay.saturating_add(jitter_range);
638                assert!(
639                    (min_expected as u128..=max_expected as u128).contains(&jittered.as_millis()),
640                    "Jittered value {} outside expected range {}..={}",
641                    jittered.as_millis(),
642                    min_expected,
643                    max_expected
644                );
645            }
646        }
647
648        let base_delay = 10000;
649        let iterations = 200;
650        let mut additions = 0;
651        let mut subtractions = 0;
652
653        for _ in 0..iterations {
654            let jittered = apply_jitter(base_delay);
655            let j_millis = jittered.as_millis();
656            let b_delay = base_delay as u128;
657
658            match j_millis.cmp(&b_delay) {
659                Ordering::Greater => {
660                    additions += 1;
661                }
662                Ordering::Less => {
663                    subtractions += 1;
664                }
665                Ordering::Equal => {}
666            }
667        }
668
669        assert!(additions > 0, "No additions were observed");
670        assert!(subtractions > 0, "No subtractions were observed");
671    }
672
673    #[test]
674    fn test_retry_config() {
675        let config = RetryConfig::new(5, 2, 100, 5000);
676        assert_eq!(config.max_retries, 5);
677        assert_eq!(config.max_failovers, 2);
678        assert_eq!(config.base_delay_ms, 100);
679        assert_eq!(config.max_delay_ms, 5000);
680    }
681
682    #[test]
683    fn test_retry_config_from_env() {
684        let _lock = RETRY_TEST_ENV_MUTEX
685            .lock()
686            .unwrap_or_else(|e| e.into_inner());
687        let mut guard = setup_test_env();
688        // Add missing environment variables that ServerConfig requires
689        guard.set("REDIS_URL", "redis://localhost:6379");
690        guard.set(
691            "RELAYER_PRIVATE_KEY",
692            "0x1234567890123456789012345678901234567890123456789012345678901234",
693        );
694
695        let config = RetryConfig::from_env();
696        assert_eq!(config.max_retries, 2);
697        assert_eq!(config.max_failovers, 1);
698        assert_eq!(config.base_delay_ms, 1);
699        assert_eq!(config.max_delay_ms, 5);
700    }
701
702    #[test]
703    fn test_calculate_retry_delay_edge_cases() {
704        // Test attempt = 0 (should be base_delay * 2^0 = base_delay)
705        let delay = calculate_retry_delay(0, 100, 1000);
706        let min_expected = (100.0 * (1.0 - RETRY_JITTER_PERCENT)).floor() as u128;
707        let max_expected = (100.0 * (1.0 + RETRY_JITTER_PERCENT)).ceil() as u128;
708        assert!(
709            (min_expected..=max_expected).contains(&delay.as_millis()),
710            "Delay {} outside expected range {}..={}",
711            delay.as_millis(),
712            min_expected,
713            max_expected
714        );
715
716        // Test equal base and max delays
717        let delay = calculate_retry_delay(5, 100, 100);
718        let min_expected = (100.0 * (1.0 - RETRY_JITTER_PERCENT)).floor() as u128;
719        let max_expected = (100.0 * (1.0 + RETRY_JITTER_PERCENT)).ceil() as u128;
720        assert!(
721            (min_expected..=max_expected).contains(&delay.as_millis()),
722            "Delay {} outside expected range {}..={}",
723            delay.as_millis(),
724            min_expected,
725            max_expected
726        );
727
728        // Test very large delays (near overflow protection)
729        let delay = calculate_retry_delay(60, 1000, u64::MAX);
730        assert!(delay.as_millis() > 0);
731
732        // Test minimum values
733        let delay = calculate_retry_delay(1, 1, 1);
734        assert_eq!(delay.as_millis(), 1);
735    }
736
737    #[test]
738    fn test_retry_config_validation() {
739        // Valid configurations should work
740        let _config = RetryConfig::new(3, 1, 100, 1000);
741        let _config = RetryConfig::new(3, 1, 0, 0); // Both zero is valid
742        let _config = RetryConfig::new(3, 1, 100, 100); // Equal values are valid
743        let _config = RetryConfig::new(0, 0, 1, 1); // Minimum non-zero values
744        let _config = RetryConfig::new(255, 255, 1, 1000); // Maximum u8 values
745    }
746
747    #[test]
748    #[should_panic(
749        expected = "max_delay_ms (50) must be >= base_delay_ms (100) when both are non-zero"
750    )]
751    fn test_retry_config_validation_panic_delay_ordering() {
752        // This should panic because max_delay_ms < base_delay_ms
753        let _config = RetryConfig::new(3, 1, 100, 50);
754    }
755
756    #[test]
757    #[should_panic(
758        expected = "Delay values must be consistent: both zero (no delays) or both non-zero"
759    )]
760    fn test_retry_config_validation_panic_inconsistent_delays_base_zero() {
761        // This should panic because only base_delay_ms is zero
762        let _config = RetryConfig::new(3, 1, 0, 1000);
763    }
764
765    #[test]
766    #[should_panic(
767        expected = "Delay values must be consistent: both zero (no delays) or both non-zero"
768    )]
769    fn test_retry_config_validation_panic_inconsistent_delays_max_zero() {
770        // This should panic because only max_delay_ms is zero
771        let _config = RetryConfig::new(3, 1, 100, 0);
772    }
773
774    #[test]
775    fn test_get_provider() {
776        let _guard = setup_test_env();
777
778        let configs = vec![
779            RpcConfig::new("http://localhost:8545".to_string()),
780            RpcConfig::new("http://localhost:8546".to_string()),
781        ];
782        let selector = RpcSelector::new(configs).expect("Failed to create selector");
783
784        let initializer =
785            |url: &str| -> Result<String, TestError> { Ok(format!("provider-{}", url)) };
786
787        let result = get_provider(&selector, "test_operation", &initializer);
788        assert!(result.is_ok());
789        let (provider, url) = result.unwrap();
790        assert_eq!(url, "http://localhost:8545");
791        assert_eq!(provider, "provider-http://localhost:8545");
792
793        let initializer = |_: &str| -> Result<String, TestError> {
794            Err(TestError("Failed to initialize".to_string()))
795        };
796
797        let result = get_provider(&selector, "test_operation", &initializer);
798        assert!(result.is_err());
799        let err = result.unwrap_err();
800        assert!(format!("{}", err).contains("Failed to initialize"));
801    }
802
803    #[tokio::test]
804    async fn test_try_with_retries() {
805        let provider = "test_provider".to_string();
806        let provider_url = "http://localhost:8545";
807        let mut total_attempts = 0;
808        let config = RetryConfig::new(3, 1, 5, 10);
809
810        let operation = |p: String| async move {
811            assert_eq!(p, "test_provider");
812            Ok::<_, TestError>(42)
813        };
814
815        let result = try_with_retries(
816            &provider,
817            provider_url,
818            "test_operation",
819            &operation,
820            &|_| false,
821            &config,
822            &mut total_attempts,
823        )
824        .await;
825
826        assert!(result.is_ok());
827        assert_eq!(result.unwrap(), 42);
828        assert_eq!(total_attempts, 1);
829
830        let attempts = Arc::new(AtomicU8::new(0));
831        let attempts_clone = attempts.clone();
832        let operation = move |_: String| {
833            let attempts = attempts_clone.clone();
834            async move {
835                let current = attempts.fetch_add(1, AtomicOrdering::SeqCst);
836                if current < 2 {
837                    Err(TestError("Retriable error".to_string()))
838                } else {
839                    Ok(42)
840                }
841            }
842        };
843
844        let mut total_attempts = 0;
845        let result = try_with_retries(
846            &provider,
847            provider_url,
848            "test_operation",
849            &operation,
850            &|_| true,
851            &config,
852            &mut total_attempts,
853        )
854        .await;
855
856        assert!(result.is_ok());
857        assert_eq!(result.unwrap(), 42);
858        assert_eq!(total_attempts, 3);
859
860        // Test non-retriable error
861        let operation = |_: String| async { Err(TestError("Non-retriable error".to_string())) };
862
863        let mut total_attempts = 0;
864        let result: Result<i32, InternalRetryError<TestError>> = try_with_retries(
865            &provider,
866            provider_url,
867            "test_operation",
868            &operation,
869            &|_| false,
870            &config,
871            &mut total_attempts,
872        )
873        .await;
874
875        assert!(result.is_err());
876        assert_eq!(total_attempts, 1);
877        let err = result.unwrap_err();
878        assert!(matches!(err, InternalRetryError::NonRetriable(_)));
879
880        // Test exhausting all retries
881        let operation = |_: String| async { Err(TestError("Always fails".to_string())) };
882
883        let mut total_attempts = 0;
884        let result: Result<i32, InternalRetryError<TestError>> = try_with_retries(
885            &provider,
886            provider_url,
887            "test_operation",
888            &operation,
889            &|_| true,
890            &config,
891            &mut total_attempts,
892        )
893        .await;
894
895        assert!(result.is_err());
896        assert_eq!(total_attempts, 3); // Should try 3 times (max_retries)
897        let error = result.unwrap_err();
898        assert!(matches!(error, InternalRetryError::RetriesExhausted(_)));
899    }
900
901    #[tokio::test]
902    async fn test_try_with_retries_max_retries_zero() {
903        let provider = "test_provider".to_string();
904        let provider_url = "http://localhost:8545";
905        let mut total_attempts = 0;
906        let config = RetryConfig::new(0, 1, 5, 10);
907
908        // Test successful operation with max_retries = 0
909        let operation = |_p: String| async move { Ok::<_, TestError>(42) };
910
911        let result = try_with_retries(
912            &provider,
913            provider_url,
914            "test_operation",
915            &operation,
916            &|_| false,
917            &config,
918            &mut total_attempts,
919        )
920        .await;
921
922        assert!(result.is_ok());
923        assert_eq!(result.unwrap(), 42);
924
925        // Test failing operation with max_retries = 0
926        let operation = |_: String| async { Err(TestError("Always fails".to_string())) };
927
928        let mut total_attempts = 0;
929        let result: Result<i32, InternalRetryError<TestError>> = try_with_retries(
930            &provider,
931            provider_url,
932            "test_operation",
933            &operation,
934            &|_| true,
935            &config,
936            &mut total_attempts,
937        )
938        .await;
939
940        assert!(result.is_err());
941        let error = result.unwrap_err();
942        assert!(matches!(error, InternalRetryError::NonRetriable(_))); // Should be NonRetriable due to max_retries <= 1
943    }
944
945    #[tokio::test]
946    async fn test_try_with_retries_max_retries_one() {
947        let provider = "test_provider".to_string();
948        let provider_url = "http://localhost:8545";
949        let mut total_attempts = 0;
950        let config = RetryConfig::new(1, 1, 5, 10);
951
952        // Test successful operation with max_retries = 1
953        let operation = |p: String| async move {
954            assert_eq!(p, "test_provider");
955            Ok::<_, TestError>(42)
956        };
957
958        let result = try_with_retries(
959            &provider,
960            provider_url,
961            "test_operation",
962            &operation,
963            &|_| false,
964            &config,
965            &mut total_attempts,
966        )
967        .await;
968
969        assert!(result.is_ok());
970        assert_eq!(result.unwrap(), 42);
971
972        // Test failing operation with max_retries = 1
973        let operation = |_: String| async { Err(TestError("Always fails".to_string())) };
974
975        let mut total_attempts = 0;
976        let result: Result<i32, InternalRetryError<TestError>> = try_with_retries(
977            &provider,
978            provider_url,
979            "test_operation",
980            &operation,
981            &|_| true,
982            &config,
983            &mut total_attempts,
984        )
985        .await;
986
987        assert!(result.is_err());
988        let error = result.unwrap_err();
989        assert!(matches!(error, InternalRetryError::NonRetriable(_))); // Should be NonRetriable due to max_retries <= 1
990    }
991
992    #[tokio::test]
993    async fn test_non_retriable_error_does_not_mark_provider_failed() {
994        let _guard = setup_test_env();
995
996        let configs = vec![
997            RpcConfig::new("http://localhost:8545".to_string()),
998            RpcConfig::new("http://localhost:8546".to_string()),
999        ];
1000        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1001
1002        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1003
1004        // Operation that always fails with a non-retriable error
1005        let operation =
1006            |_provider: String| async move { Err(TestError("Non-retriable error".to_string())) };
1007
1008        let config = RetryConfig::new(3, 1, 0, 0);
1009
1010        // Get initial provider count
1011        let initial_available_count = selector.available_provider_count();
1012
1013        let result: Result<i32, TestError> = retry_rpc_call(
1014            &selector,
1015            "test_operation",
1016            |_| false, // Error is NOT retriable
1017            |_| false, // Error is NOT retriable
1018            provider_initializer,
1019            operation,
1020            Some(config),
1021        )
1022        .await;
1023
1024        assert!(result.is_err());
1025
1026        // Provider should NOT be marked as failed for non-retriable errors
1027        let final_available_count = selector.available_provider_count();
1028        assert_eq!(
1029            initial_available_count, final_available_count,
1030            "Provider count should remain the same for non-retriable errors"
1031        );
1032    }
1033
1034    #[tokio::test]
1035    async fn test_retriable_error_marks_provider_failed_after_retries_exhausted() {
1036        let _guard = setup_test_env();
1037
1038        let configs = vec![
1039            RpcConfig::new("http://localhost:8545".to_string()),
1040            RpcConfig::new("http://localhost:8546".to_string()),
1041        ];
1042        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1043
1044        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1045
1046        // Operation that always fails with a retriable error
1047        let operation = |_provider: String| async { Err(TestError("Retriable error".to_string())) };
1048
1049        let config = RetryConfig::new(2, 1, 0, 0); // 2 retries, 1 failover
1050
1051        // Get initial provider count
1052        let initial_available_count = selector.available_provider_count();
1053
1054        let result: Result<i32, TestError> = retry_rpc_call(
1055            &selector,
1056            "test_operation",
1057            |_| true, // Error IS retriable
1058            |_| true, // Error SHOULD mark provider as failed
1059            provider_initializer,
1060            operation,
1061            Some(config),
1062        )
1063        .await;
1064
1065        assert!(result.is_err());
1066
1067        // At least one provider should be marked as failed after retries are exhausted
1068        let final_available_count = selector.available_provider_count();
1069        assert!(final_available_count < initial_available_count,
1070            "At least one provider should be marked as failed after retriable errors exhaust retries");
1071    }
1072
1073    #[tokio::test]
1074    async fn test_retry_rpc_call_success() {
1075        let _guard = setup_test_env();
1076
1077        let configs = vec![
1078            RpcConfig::new("http://localhost:8545".to_string()),
1079            RpcConfig::new("http://localhost:8546".to_string()),
1080        ];
1081        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1082
1083        let attempts = Arc::new(AtomicU8::new(0));
1084        let attempts_clone = attempts.clone();
1085
1086        let provider_initializer =
1087            |_url: &str| -> Result<String, TestError> { Ok("mock_provider".to_string()) };
1088
1089        let operation = move |_provider: String| {
1090            let attempts = attempts_clone.clone();
1091            async move {
1092                attempts.fetch_add(1, AtomicOrdering::SeqCst);
1093                Ok::<_, TestError>(42)
1094            }
1095        };
1096
1097        let config = RetryConfig::new(1, 1, 0, 0);
1098
1099        let result = retry_rpc_call(
1100            &selector,
1101            "test_operation",
1102            |_| false, // No errors are retriable
1103            |_| false, // No errors are retriable
1104            provider_initializer,
1105            operation,
1106            Some(config),
1107        )
1108        .await;
1109
1110        assert!(result.is_ok(), "Expected OK result but got: {:?}", result);
1111        assert_eq!(result.unwrap(), 42);
1112        assert_eq!(attempts.load(AtomicOrdering::SeqCst), 1); // Should be called once
1113    }
1114
1115    #[tokio::test]
1116    async fn test_retry_rpc_call_with_provider_failover() {
1117        let _guard = setup_test_env();
1118
1119        let configs = vec![
1120            RpcConfig::new("http://localhost:8545".to_string()),
1121            RpcConfig::new("http://localhost:8546".to_string()),
1122        ];
1123        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1124
1125        let current_provider = Arc::new(Mutex::new(String::new()));
1126        let current_provider_clone = current_provider.clone();
1127
1128        let provider_initializer = move |url: &str| -> Result<String, TestError> {
1129            let mut provider = current_provider_clone.lock().unwrap();
1130            *provider = url.to_string();
1131            Ok(url.to_string())
1132        };
1133
1134        let operation = move |provider: String| async move {
1135            if provider.contains("8545") {
1136                Err(TestError("First provider error".to_string()))
1137            } else {
1138                Ok(42)
1139            }
1140        };
1141
1142        let config = RetryConfig::new(2, 1, 0, 0); // Set max_retries to 2 to enable retry exhaustion
1143
1144        let result = retry_rpc_call(
1145            &selector,
1146            "test_operation",
1147            |_| true, // Errors are retriable to trigger RetriesExhausted and failover
1148            |_| true, // Errors SHOULD mark provider as failed to enable failover
1149            provider_initializer,
1150            operation,
1151            Some(config),
1152        )
1153        .await;
1154
1155        assert!(result.is_ok(), "Expected OK result but got: {:?}", result);
1156        assert_eq!(result.unwrap(), 42);
1157
1158        // Final provider should be the second one
1159        let final_provider = current_provider.lock().unwrap().clone();
1160        assert!(
1161            final_provider.contains("8546"),
1162            "Wrong provider selected: {}",
1163            final_provider
1164        );
1165    }
1166
1167    #[tokio::test]
1168    async fn test_retry_rpc_call_all_providers_fail() {
1169        let _guard = setup_test_env();
1170
1171        let configs = vec![
1172            RpcConfig::new("http://localhost:8545".to_string()),
1173            RpcConfig::new("http://localhost:8546".to_string()),
1174        ];
1175        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1176
1177        let provider_initializer =
1178            |_: &str| -> Result<String, TestError> { Ok("mock_provider".to_string()) };
1179
1180        let operation = |_: String| async { Err(TestError("Always fails".to_string())) };
1181
1182        let config = RetryConfig::new(2, 1, 0, 0); // Set max_retries to 2 to enable retry exhaustion
1183
1184        let result: Result<i32, TestError> = retry_rpc_call(
1185            &selector,
1186            "test_operation",
1187            |_| true,  // Errors are retriable to trigger RetriesExhausted and failover
1188            |_| false, // Errors are NOT retriable to prevent marking
1189            provider_initializer,
1190            operation,
1191            Some(config),
1192        )
1193        .await;
1194
1195        assert!(result.is_err(), "Expected an error but got: {:?}", result);
1196    }
1197
1198    #[tokio::test]
1199    async fn test_retry_rpc_call_with_default_config() {
1200        let (_guard, selector) = {
1201            let _lock = RETRY_TEST_ENV_MUTEX
1202                .lock()
1203                .unwrap_or_else(|e| e.into_inner());
1204            let guard = setup_test_env();
1205
1206            let configs = vec![RpcConfig::new("http://localhost:8545".to_string())];
1207            let selector = RpcSelector::new(configs).expect("Failed to create selector");
1208            (guard, selector)
1209        };
1210
1211        let provider_initializer =
1212            |_url: &str| -> Result<String, TestError> { Ok("mock_provider".to_string()) };
1213
1214        let operation = |_provider: String| async move { Ok::<_, TestError>(42) };
1215
1216        // Test with None config (should use default from env)
1217        let result = retry_rpc_call(
1218            &selector,
1219            "test_operation",
1220            |_| false,
1221            |_| false,
1222            provider_initializer,
1223            operation,
1224            None, // Use default config
1225        )
1226        .await;
1227
1228        assert!(result.is_ok());
1229        assert_eq!(result.unwrap(), 42);
1230    }
1231
1232    #[tokio::test]
1233    async fn test_retry_rpc_call_provider_initialization_failures() {
1234        let _guard = setup_test_env();
1235
1236        let configs = vec![
1237            RpcConfig::new("http://localhost:8545".to_string()),
1238            RpcConfig::new("http://localhost:8546".to_string()),
1239        ];
1240        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1241
1242        let attempt_count = Arc::new(AtomicU8::new(0));
1243        let attempt_count_clone = attempt_count.clone();
1244
1245        let provider_initializer = move |url: &str| -> Result<String, TestError> {
1246            let count = attempt_count_clone.fetch_add(1, AtomicOrdering::SeqCst);
1247            if count == 0 && url.contains("8545") {
1248                Err(TestError("First provider init failed".to_string()))
1249            } else {
1250                Ok(url.to_string())
1251            }
1252        };
1253
1254        let operation = |_provider: String| async move { Ok::<_, TestError>(42) };
1255
1256        let config = RetryConfig::new(2, 1, 0, 0);
1257
1258        let result = retry_rpc_call(
1259            &selector,
1260            "test_operation",
1261            |_| true,
1262            |_| false,
1263            provider_initializer,
1264            operation,
1265            Some(config),
1266        )
1267        .await;
1268
1269        assert!(result.is_ok());
1270        assert_eq!(result.unwrap(), 42);
1271        assert!(attempt_count.load(AtomicOrdering::SeqCst) >= 2); // Should have tried multiple providers
1272    }
1273
1274    #[test]
1275    fn test_get_provider_selector_errors() {
1276        let _guard = setup_test_env();
1277
1278        // Create selector with a single provider, select it, then mark it as failed
1279        let configs = vec![RpcConfig::new("http://localhost:8545".to_string())];
1280        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1281
1282        // First select the provider to make it current, then mark it as failed
1283        let _ = selector.get_current_url().unwrap(); // This selects the provider
1284        selector.mark_current_as_failed(); // Now mark it as failed
1285
1286        let provider_initializer =
1287            |url: &str| -> Result<String, TestError> { Ok(format!("provider-{}", url)) };
1288
1289        // Now get_provider should fail because the only provider is marked as failed
1290        let result = get_provider(&selector, "test_operation", &provider_initializer);
1291        assert!(result.is_err());
1292    }
1293
1294    #[tokio::test]
1295    async fn test_last_provider_never_marked_as_failed() {
1296        let _guard = setup_test_env();
1297
1298        // Test with a single provider
1299        let configs = vec![RpcConfig::new("http://localhost:8545".to_string())];
1300        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1301
1302        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1303
1304        // Operation that always fails with a retriable error
1305        let operation = |_provider: String| async { Err(TestError("Always fails".to_string())) };
1306
1307        let config = RetryConfig::new(2, 1, 0, 0); // 2 retries, 1 failover
1308
1309        // Get initial provider count
1310        let initial_available_count = selector.available_provider_count();
1311        assert_eq!(initial_available_count, 1);
1312
1313        let result: Result<i32, TestError> = retry_rpc_call(
1314            &selector,
1315            "test_operation",
1316            |_| true, // Error IS retriable
1317            |_| true, // Error SHOULD mark provider as failed, but last provider should be preserved
1318            provider_initializer,
1319            operation,
1320            Some(config),
1321        )
1322        .await;
1323
1324        assert!(result.is_err());
1325
1326        // The last provider should NOT be marked as failed
1327        let final_available_count = selector.available_provider_count();
1328        assert_eq!(
1329            final_available_count, initial_available_count,
1330            "Last provider should never be marked as failed"
1331        );
1332        assert_eq!(
1333            final_available_count, 1,
1334            "Should still have 1 provider available"
1335        );
1336    }
1337
1338    #[tokio::test]
1339    async fn test_last_provider_behavior_with_multiple_providers() {
1340        let _guard = setup_test_env();
1341
1342        // Test with multiple providers, but mark all but one as failed
1343        let configs = vec![
1344            RpcConfig::new("http://localhost:8545".to_string()),
1345            RpcConfig::new("http://localhost:8546".to_string()),
1346            RpcConfig::new("http://localhost:8547".to_string()),
1347        ];
1348        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1349
1350        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1351
1352        // Operation that always fails with a retriable error
1353        let operation = |_provider: String| async { Err(TestError("Always fails".to_string())) };
1354
1355        let config = RetryConfig::new(2, 2, 0, 0); // 2 retries, 2 failovers
1356
1357        // Get initial provider count
1358        let initial_available_count = selector.available_provider_count();
1359        assert_eq!(initial_available_count, 3);
1360
1361        let result: Result<i32, TestError> = retry_rpc_call(
1362            &selector,
1363            "test_operation",
1364            |_| true, // Error IS retriable
1365            |_| true, // Error SHOULD mark provider as failed, but last provider should be preserved
1366            provider_initializer,
1367            operation,
1368            Some(config),
1369        )
1370        .await;
1371
1372        assert!(result.is_err());
1373
1374        // Should have marked 2 providers as failed, but kept the last one
1375        let final_available_count = selector.available_provider_count();
1376        assert_eq!(
1377            final_available_count, 1,
1378            "Should have exactly 1 provider left (the last one should not be marked as failed)"
1379        );
1380    }
1381
1382    #[tokio::test]
1383    async fn test_non_retriable_error_should_mark_provider_failed() {
1384        let _guard = setup_test_env();
1385
1386        let configs = vec![
1387            RpcConfig::new("http://localhost:8545".to_string()),
1388            RpcConfig::new("http://localhost:8546".to_string()),
1389        ];
1390        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1391
1392        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1393
1394        // Operation that fails with a non-retriable error that SHOULD mark provider as failed
1395        let operation = |_provider: String| async move {
1396            Err(TestError("Critical non-retriable error".to_string()))
1397        };
1398
1399        let config = RetryConfig::new(3, 1, 0, 0);
1400
1401        // Get initial provider count
1402        let initial_available_count = selector.available_provider_count();
1403        assert_eq!(initial_available_count, 2);
1404
1405        let result: Result<i32, TestError> = retry_rpc_call(
1406            &selector,
1407            "test_operation",
1408            |_| false,                    // Error is NOT retriable
1409            |e| e.0.contains("Critical"), // Error SHOULD mark provider as failed if it contains "Critical"
1410            provider_initializer,
1411            operation,
1412            Some(config),
1413        )
1414        .await;
1415
1416        assert!(result.is_err());
1417
1418        // Provider should be marked as failed because should_mark_provider_failed returned true
1419        let final_available_count = selector.available_provider_count();
1420        assert_eq!(final_available_count, 1,
1421            "Provider should be marked as failed when should_mark_provider_failed returns true for non-retriable error");
1422    }
1423
1424    #[tokio::test]
1425    async fn test_non_retriable_error_should_not_mark_provider_failed() {
1426        let _guard = setup_test_env();
1427
1428        let configs = vec![
1429            RpcConfig::new("http://localhost:8545".to_string()),
1430            RpcConfig::new("http://localhost:8546".to_string()),
1431        ];
1432        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1433
1434        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1435
1436        // Operation that fails with a non-retriable error that should NOT mark provider as failed
1437        let operation = |_provider: String| async move {
1438            Err(TestError("Minor non-retriable error".to_string()))
1439        };
1440
1441        let config = RetryConfig::new(3, 1, 0, 0);
1442
1443        // Get initial provider count
1444        let initial_available_count = selector.available_provider_count();
1445        assert_eq!(initial_available_count, 2);
1446
1447        let result: Result<i32, TestError> = retry_rpc_call(
1448            &selector,
1449            "test_operation",
1450            |_| false,                    // Error is NOT retriable
1451            |e| e.0.contains("Critical"), // Error should NOT mark provider as failed (doesn't contain "Critical")
1452            provider_initializer,
1453            operation,
1454            Some(config),
1455        )
1456        .await;
1457
1458        assert!(result.is_err());
1459
1460        // Provider should NOT be marked as failed because should_mark_provider_failed returned false
1461        let final_available_count = selector.available_provider_count();
1462        assert_eq!(final_available_count, initial_available_count,
1463            "Provider should NOT be marked as failed when should_mark_provider_failed returns false for non-retriable error");
1464    }
1465
1466    #[tokio::test]
1467    async fn test_retriable_error_ignores_should_mark_provider_failed() {
1468        let _guard = setup_test_env();
1469
1470        let configs = vec![
1471            RpcConfig::new("http://localhost:8545".to_string()),
1472            RpcConfig::new("http://localhost:8546".to_string()),
1473        ];
1474        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1475
1476        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1477
1478        // Operation that always fails with a retriable error
1479        let operation =
1480            |_provider: String| async { Err(TestError("Retriable network error".to_string())) };
1481
1482        let config = RetryConfig::new(2, 1, 0, 0); // 2 retries, 1 failover
1483
1484        // Get initial provider count
1485        let initial_available_count = selector.available_provider_count();
1486        assert_eq!(initial_available_count, 2);
1487
1488        let result: Result<i32, TestError> = retry_rpc_call(
1489            &selector,
1490            "test_operation",
1491            |_| true,  // Error IS retriable
1492            |_| false, // should_mark_provider_failed returns false, but should be IGNORED for retriable errors
1493            provider_initializer,
1494            operation,
1495            Some(config),
1496        )
1497        .await;
1498
1499        assert!(result.is_err());
1500
1501        // Provider should be marked as failed despite should_mark_provider_failed returning false,
1502        // because retriable errors that exhaust retries always mark the provider as failed
1503        let final_available_count = selector.available_provider_count();
1504        assert!(final_available_count < initial_available_count,
1505            "Provider should be marked as failed when retriable errors exhaust retries, regardless of should_mark_provider_failed");
1506    }
1507
1508    #[tokio::test]
1509    async fn test_mixed_error_scenarios_with_different_marking_behavior() {
1510        let _guard = setup_test_env();
1511
1512        // Test scenario 1: Non-retriable error that should mark provider as failed
1513        let configs = vec![
1514            RpcConfig::new("http://localhost:8545".to_string()),
1515            RpcConfig::new("http://localhost:8546".to_string()),
1516        ];
1517        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1518
1519        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1520
1521        let operation =
1522            |_provider: String| async move { Err(TestError("Critical network error".to_string())) };
1523
1524        let config = RetryConfig::new(1, 1, 0, 0);
1525        let initial_count = selector.available_provider_count();
1526
1527        let result: Result<i32, TestError> = retry_rpc_call(
1528            &selector,
1529            "test_operation",
1530            |_| false,                    // Non-retriable
1531            |e| e.0.contains("Critical"), // Should mark as failed
1532            provider_initializer,
1533            operation,
1534            Some(config.clone()),
1535        )
1536        .await;
1537
1538        assert!(result.is_err());
1539        let after_critical_count = selector.available_provider_count();
1540        assert_eq!(
1541            after_critical_count,
1542            initial_count - 1,
1543            "Critical error should mark provider as failed"
1544        );
1545
1546        // Test scenario 2: Non-retriable error that should NOT mark provider as failed
1547        let operation =
1548            |_provider: String| async move { Err(TestError("Minor validation error".to_string())) };
1549
1550        let result: Result<i32, TestError> = retry_rpc_call(
1551            &selector,
1552            "test_operation",
1553            |_| false,                    // Non-retriable
1554            |e| e.0.contains("Critical"), // Should NOT mark as failed (doesn't contain "Critical")
1555            provider_initializer,
1556            operation,
1557            Some(config),
1558        )
1559        .await;
1560
1561        assert!(result.is_err());
1562        let final_count = selector.available_provider_count();
1563        assert_eq!(
1564            final_count, after_critical_count,
1565            "Minor error should NOT mark provider as failed"
1566        );
1567    }
1568
1569    #[tokio::test]
1570    async fn test_should_mark_provider_failed_respects_last_provider_protection() {
1571        let _guard = setup_test_env();
1572
1573        // Test with a single provider (last provider protection)
1574        let configs = vec![RpcConfig::new("http://localhost:8545".to_string())];
1575        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1576
1577        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1578
1579        // Operation that fails with a non-retriable error that SHOULD mark provider as failed
1580        let operation =
1581            |_provider: String| async move { Err(TestError("Critical network error".to_string())) };
1582
1583        let config = RetryConfig::new(1, 1, 0, 0);
1584
1585        // Get initial provider count
1586        let initial_available_count = selector.available_provider_count();
1587        assert_eq!(initial_available_count, 1);
1588
1589        let result: Result<i32, TestError> = retry_rpc_call(
1590            &selector,
1591            "test_operation",
1592            |_| false,                    // Error is NOT retriable
1593            |e| e.0.contains("Critical"), // Error SHOULD mark provider as failed
1594            provider_initializer,
1595            operation,
1596            Some(config),
1597        )
1598        .await;
1599
1600        assert!(result.is_err());
1601
1602        // Last provider should NEVER be marked as failed, even if should_mark_provider_failed returns true
1603        let final_available_count = selector.available_provider_count();
1604        assert_eq!(final_available_count, initial_available_count,
1605            "Last provider should never be marked as failed, regardless of should_mark_provider_failed");
1606        assert_eq!(
1607            final_available_count, 1,
1608            "Should still have 1 provider available"
1609        );
1610    }
1611
1612    #[tokio::test]
1613    async fn test_should_mark_provider_failed_with_multiple_providers_last_protection() {
1614        let _guard = setup_test_env();
1615
1616        // Test with multiple providers, but ensure last one is protected
1617        let configs = vec![
1618            RpcConfig::new("http://localhost:8545".to_string()),
1619            RpcConfig::new("http://localhost:8546".to_string()),
1620        ];
1621        let selector = RpcSelector::new(configs).expect("Failed to create selector");
1622
1623        let attempt_count = Arc::new(AtomicU8::new(0));
1624        let attempt_count_clone = attempt_count.clone();
1625
1626        let provider_initializer = |url: &str| -> Result<String, TestError> { Ok(url.to_string()) };
1627
1628        // Operation that always fails with errors that should mark provider as failed
1629        let operation = move |_provider: String| {
1630            let attempt_count = attempt_count_clone.clone();
1631            async move {
1632                let count = attempt_count.fetch_add(1, AtomicOrdering::SeqCst);
1633                Err(TestError(format!("Critical error #{}", count)))
1634            }
1635        };
1636
1637        let config = RetryConfig::new(1, 1, 0, 0); // 1 retry, 1 failover
1638
1639        // Get initial provider count
1640        let initial_available_count = selector.available_provider_count();
1641        assert_eq!(initial_available_count, 2);
1642
1643        let result: Result<i32, TestError> = retry_rpc_call(
1644            &selector,
1645            "test_operation",
1646            |_| false,                    // All errors are non-retriable
1647            |e| e.0.contains("Critical"), // All errors should mark provider as failed
1648            provider_initializer,
1649            operation,
1650            Some(config),
1651        )
1652        .await;
1653
1654        assert!(result.is_err());
1655
1656        // First provider should be marked as failed, but last provider should be protected
1657        let final_available_count = selector.available_provider_count();
1658        assert_eq!(
1659            final_available_count, 1,
1660            "First provider should be marked as failed, but last provider should be protected"
1661        );
1662    }
1663}