openzeppelin_relayer/metrics/
mod.rs

1//! Metrics module for the application.
2//!
3//! - This module contains the global Prometheus registry.
4//! - Defines specific metrics for the application.
5
6pub mod middleware;
7use lazy_static::lazy_static;
8use prometheus::{
9    CounterVec, Encoder, Gauge, HistogramOpts, HistogramVec, Opts, Registry, TextEncoder,
10};
11use sysinfo::{Disks, System};
12
13lazy_static! {
14    // Global Prometheus registry.
15    pub static ref REGISTRY: Registry = Registry::new();
16
17    // Counter: Total HTTP requests.
18    pub static ref REQUEST_COUNTER: CounterVec = {
19        let opts = Opts::new("requests_total", "Total number of HTTP requests");
20        let counter_vec = CounterVec::new(opts, &["endpoint", "method", "status"]).unwrap();
21        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
22        counter_vec
23    };
24
25    // Counter: Total HTTP requests by raw URI.
26    pub static ref RAW_REQUEST_COUNTER: CounterVec = {
27      let opts = Opts::new("raw_requests_total", "Total number of HTTP requests by raw URI");
28      let counter_vec = CounterVec::new(opts, &["raw_uri", "method", "status"]).unwrap();
29      REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
30      counter_vec
31    };
32
33    // Histogram for request latency in seconds.
34    pub static ref REQUEST_LATENCY: HistogramVec = {
35      let histogram_opts = HistogramOpts::new("request_latency_seconds", "Request latency in seconds")
36          .buckets(vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0]);
37      let histogram_vec = HistogramVec::new(histogram_opts, &["endpoint", "method", "status"]).unwrap();
38      REGISTRY.register(Box::new(histogram_vec.clone())).unwrap();
39      histogram_vec
40    };
41
42    // Counter for error responses.
43    pub static ref ERROR_COUNTER: CounterVec = {
44        let opts = Opts::new("error_requests_total", "Total number of error responses");
45        // Using "status" to record the HTTP status code (or a special label like "service_error")
46        let counter_vec = CounterVec::new(opts, &["endpoint", "method", "status"]).unwrap();
47        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
48        counter_vec
49    };
50
51    // Gauge for CPU usage percentage.
52    pub static ref CPU_USAGE: Gauge = {
53      let gauge = Gauge::new("cpu_usage_percentage", "Current CPU usage percentage").unwrap();
54      REGISTRY.register(Box::new(gauge.clone())).unwrap();
55      gauge
56    };
57
58    // Gauge for memory usage percentage.
59    pub static ref MEMORY_USAGE_PERCENT: Gauge = {
60      let gauge = Gauge::new("memory_usage_percentage", "Memory usage percentage").unwrap();
61      REGISTRY.register(Box::new(gauge.clone())).unwrap();
62      gauge
63    };
64
65    // Gauge for memory usage in bytes.
66    pub static ref MEMORY_USAGE: Gauge = {
67        let gauge = Gauge::new("memory_usage_bytes", "Memory usage in bytes").unwrap();
68        REGISTRY.register(Box::new(gauge.clone())).unwrap();
69        gauge
70    };
71
72    // Gauge for total memory in bytes.
73    pub static ref TOTAL_MEMORY: Gauge = {
74      let gauge = Gauge::new("total_memory_bytes", "Total memory in bytes").unwrap();
75      REGISTRY.register(Box::new(gauge.clone())).unwrap();
76      gauge
77    };
78
79    // Gauge for available memory in bytes.
80    pub static ref AVAILABLE_MEMORY: Gauge = {
81        let gauge = Gauge::new("available_memory_bytes", "Available memory in bytes").unwrap();
82        REGISTRY.register(Box::new(gauge.clone())).unwrap();
83        gauge
84    };
85
86    // Gauge for used disk space in bytes.
87    pub static ref DISK_USAGE: Gauge = {
88      let gauge = Gauge::new("disk_usage_bytes", "Used disk space in bytes").unwrap();
89      REGISTRY.register(Box::new(gauge.clone())).unwrap();
90      gauge
91    };
92
93    // Gauge for disk usage percentage.
94    pub static ref DISK_USAGE_PERCENT: Gauge = {
95      let gauge = Gauge::new("disk_usage_percentage", "Disk usage percentage").unwrap();
96      REGISTRY.register(Box::new(gauge.clone())).unwrap();
97      gauge
98    };
99}
100
101/// Gather all metrics and encode into the provided format.
102pub fn gather_metrics() -> Result<Vec<u8>, Box<dyn std::error::Error>> {
103    let encoder = TextEncoder::new();
104    let metric_families = REGISTRY.gather();
105    let mut buffer = Vec::new();
106    encoder.encode(&metric_families, &mut buffer)?;
107    Ok(buffer)
108}
109
110/// Updates the system metrics for CPU and memory usage.
111pub fn update_system_metrics() {
112    let mut sys = System::new_all();
113    sys.refresh_all();
114
115    // Overall CPU usage.
116    let cpu_usage = sys.global_cpu_usage();
117    CPU_USAGE.set(cpu_usage as f64);
118
119    // Total memory (in bytes).
120    let total_memory = sys.total_memory();
121    TOTAL_MEMORY.set(total_memory as f64);
122
123    // Available memory (in bytes).
124    let available_memory = sys.available_memory();
125    AVAILABLE_MEMORY.set(available_memory as f64);
126
127    // Used memory (in bytes).
128    let memory_usage = sys.used_memory();
129    MEMORY_USAGE.set(memory_usage as f64);
130
131    // Calculate memory usage percentage
132    let memory_percentage = if total_memory > 0 {
133        (memory_usage as f64 / total_memory as f64) * 100.0
134    } else {
135        0.0
136    };
137    MEMORY_USAGE_PERCENT.set(memory_percentage);
138
139    // Calculate disk usage:
140    // Sum total space and available space across all disks.
141    let disks = Disks::new_with_refreshed_list();
142    let mut total_disk_space: u64 = 0;
143    let mut total_disk_available: u64 = 0;
144    for disk in disks.list() {
145        total_disk_space += disk.total_space();
146        total_disk_available += disk.available_space();
147    }
148    // Used disk space is total minus available ( in bytes).
149    let used_disk_space = total_disk_space.saturating_sub(total_disk_available);
150    DISK_USAGE.set(used_disk_space as f64);
151
152    // Calculate disk usage percentage.
153    let disk_percentage = if total_disk_space > 0 {
154        (used_disk_space as f64 / total_disk_space as f64) * 100.0
155    } else {
156        0.0
157    };
158    DISK_USAGE_PERCENT.set(disk_percentage);
159}
160
161#[cfg(test)]
162mod actix_tests {
163    use super::*;
164    use actix_web::{
165        dev::{Service, ServiceRequest, ServiceResponse, Transform},
166        http, test, Error, HttpResponse,
167    };
168    use futures::future::{self};
169    use middleware::MetricsMiddleware;
170    use prometheus::proto::MetricFamily;
171    use std::{
172        pin::Pin,
173        task::{Context, Poll},
174    };
175
176    // Dummy service that always returns a successful response (HTTP 200 OK).
177    struct DummySuccessService;
178
179    impl Service<ServiceRequest> for DummySuccessService {
180        type Response = ServiceResponse;
181        type Error = Error;
182        type Future = Pin<Box<dyn future::Future<Output = Result<Self::Response, Self::Error>>>>;
183
184        fn poll_ready(&self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
185            Poll::Ready(Ok(()))
186        }
187
188        fn call(&self, req: ServiceRequest) -> Self::Future {
189            let resp = req.into_response(HttpResponse::Ok().finish());
190            Box::pin(async move { Ok(resp) })
191        }
192    }
193
194    // Dummy service that always returns an error.
195    struct DummyErrorService;
196
197    impl Service<ServiceRequest> for DummyErrorService {
198        type Response = ServiceResponse;
199        type Error = Error;
200        type Future = Pin<Box<dyn future::Future<Output = Result<Self::Response, Self::Error>>>>;
201
202        fn poll_ready(&self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
203            Poll::Ready(Ok(()))
204        }
205
206        fn call(&self, _req: ServiceRequest) -> Self::Future {
207            Box::pin(async move { Err(actix_web::error::ErrorInternalServerError("dummy error")) })
208        }
209    }
210
211    // Helper function to find a metric family by name.
212    fn find_metric_family<'a>(
213        name: &str,
214        families: &'a [MetricFamily],
215    ) -> Option<&'a MetricFamily> {
216        families.iter().find(|mf| mf.name() == name)
217    }
218
219    #[actix_rt::test]
220    async fn test_gather_metrics_contains_expected_names() {
221        // Update system metrics
222        update_system_metrics();
223
224        // Increment request counters to ensure they appear in output
225        REQUEST_COUNTER
226            .with_label_values(&["/test", "GET", "200"])
227            .inc();
228        RAW_REQUEST_COUNTER
229            .with_label_values(&["/test?param=value", "GET", "200"])
230            .inc();
231        REQUEST_LATENCY
232            .with_label_values(&["/test", "GET", "200"])
233            .observe(0.1);
234        ERROR_COUNTER
235            .with_label_values(&["/test", "GET", "500"])
236            .inc();
237
238        let metrics = gather_metrics().expect("failed to gather metrics");
239        let output = String::from_utf8(metrics).expect("metrics output is not valid UTF-8");
240
241        // System metrics
242        assert!(output.contains("cpu_usage_percentage"));
243        assert!(output.contains("memory_usage_percentage"));
244        assert!(output.contains("memory_usage_bytes"));
245        assert!(output.contains("total_memory_bytes"));
246        assert!(output.contains("available_memory_bytes"));
247        assert!(output.contains("disk_usage_bytes"));
248        assert!(output.contains("disk_usage_percentage"));
249
250        // Request metrics
251        assert!(output.contains("requests_total"));
252        assert!(output.contains("raw_requests_total"));
253        assert!(output.contains("request_latency_seconds"));
254        assert!(output.contains("error_requests_total"));
255    }
256
257    #[actix_rt::test]
258    async fn test_update_system_metrics() {
259        // Reset metrics to ensure clean state
260        CPU_USAGE.set(0.0);
261        TOTAL_MEMORY.set(0.0);
262        AVAILABLE_MEMORY.set(0.0);
263        MEMORY_USAGE.set(0.0);
264        MEMORY_USAGE_PERCENT.set(0.0);
265        DISK_USAGE.set(0.0);
266        DISK_USAGE_PERCENT.set(0.0);
267
268        // Call the function we're testing
269        update_system_metrics();
270
271        // Verify that metrics have been updated with reasonable values
272        let cpu_usage = CPU_USAGE.get();
273        assert!(
274            (0.0..=100.0).contains(&cpu_usage),
275            "CPU usage should be between 0-100%, got {}",
276            cpu_usage
277        );
278
279        let memory_usage = MEMORY_USAGE.get();
280        assert!(
281            memory_usage >= 0.0,
282            "Memory usage should be >= 0, got {}",
283            memory_usage
284        );
285
286        let memory_percent = MEMORY_USAGE_PERCENT.get();
287        assert!(
288            (0.0..=100.0).contains(&memory_percent),
289            "Memory usage percentage should be between 0-100%, got {}",
290            memory_percent
291        );
292
293        let total_memory = TOTAL_MEMORY.get();
294        assert!(
295            total_memory > 0.0,
296            "Total memory should be > 0, got {}",
297            total_memory
298        );
299
300        let available_memory = AVAILABLE_MEMORY.get();
301        assert!(
302            available_memory >= 0.0,
303            "Available memory should be >= 0, got {}",
304            available_memory
305        );
306
307        let disk_usage = DISK_USAGE.get();
308        assert!(
309            disk_usage >= 0.0,
310            "Disk usage should be >= 0, got {}",
311            disk_usage
312        );
313
314        let disk_percent = DISK_USAGE_PERCENT.get();
315        assert!(
316            (0.0..=100.0).contains(&disk_percent),
317            "Disk usage percentage should be between 0-100%, got {}",
318            disk_percent
319        );
320
321        // Verify that memory usage doesn't exceed total memory
322        assert!(
323            memory_usage <= total_memory,
324            "Memory usage should be <= total memory, got {}",
325            memory_usage
326        );
327
328        // Verify that available memory plus used memory doesn't exceed total memory
329        assert!(
330            (available_memory + memory_usage) <= total_memory,
331            "Available memory plus used memory should be <= total memory {}, got {}",
332            total_memory,
333            available_memory + memory_usage
334        );
335    }
336
337    #[actix_rt::test]
338    async fn test_middleware_success() {
339        let req = test::TestRequest::with_uri("/test_success").to_srv_request();
340
341        let middleware = MetricsMiddleware;
342        let service = middleware.new_transform(DummySuccessService).await.unwrap();
343
344        let resp = service.call(req).await.unwrap();
345        assert_eq!(resp.response().status(), http::StatusCode::OK);
346
347        let families = REGISTRY.gather();
348        let counter_fam = find_metric_family("requests_total", &families)
349            .expect("requests_total metric family not found");
350
351        let mut found = false;
352        for m in counter_fam.get_metric() {
353            let labels = m.get_label();
354            if labels
355                .iter()
356                .any(|l| l.name() == "endpoint" && l.value() == "/test_success")
357            {
358                found = true;
359                assert!(m.get_counter().value() >= 1.0);
360            }
361        }
362        assert!(
363            found,
364            "Expected metric with endpoint '/test_success' not found"
365        );
366    }
367
368    #[actix_rt::test]
369    async fn test_middleware_error() {
370        let req = test::TestRequest::with_uri("/test_error").to_srv_request();
371
372        let middleware = MetricsMiddleware;
373        let service = middleware.new_transform(DummyErrorService).await.unwrap();
374
375        let result = service.call(req).await;
376        assert!(result.is_err());
377
378        let families = REGISTRY.gather();
379        let error_counter_fam = find_metric_family("error_requests_total", &families)
380            .expect("error_requests_total metric family not found");
381
382        let mut found = false;
383        for m in error_counter_fam.get_metric() {
384            let labels = m.get_label();
385            if labels
386                .iter()
387                .any(|l| l.name() == "endpoint" && l.value() == "/test_error")
388            {
389                found = true;
390                assert!(m.get_counter().value() >= 1.0);
391            }
392        }
393        assert!(
394            found,
395            "Expected error metric with endpoint '/test_error' not found"
396        );
397    }
398}
399
400#[cfg(test)]
401mod property_tests {
402    use proptest::{prelude::*, test_runner::Config};
403
404    // A helper function to compute percentage used from total.
405    fn compute_percentage(used: u64, total: u64) -> f64 {
406        if total > 0 {
407            (used as f64 / total as f64) * 100.0
408        } else {
409            0.0
410        }
411    }
412
413    proptest! {
414        // Set the number of cases to 1000
415        #![proptest_config(Config {
416          cases: 1000, ..Config::default()
417        })]
418
419        #[test]
420        fn prop_compute_percentage((total, used) in {
421            (1u64..1_000_000u64).prop_flat_map(|total| {
422                (Just(total), 0u64..=total)
423            })
424        }) {
425            let percentage = compute_percentage(used, total);
426            prop_assert!(percentage >= 0.0);
427            prop_assert!(percentage <= 100.0);
428        }
429
430        #[test]
431        fn prop_labels_are_reasonable(
432              endpoint in ".*",
433              method in prop::sample::select(vec![
434                "GET".to_string(),
435                "POST".to_string(),
436                "PUT".to_string(),
437                "DELETE".to_string()
438                ])
439            ) {
440            let endpoint_label = if endpoint.is_empty() { "/".to_string() } else { endpoint.clone() };
441            let method_label = method;
442
443            prop_assert!(endpoint_label.chars().count() <= 1024, "Endpoint label too long");
444            prop_assert!(method_label.chars().count() <= 16, "Method label too long");
445
446            let status = "200".to_string();
447            let labels = vec![endpoint_label, method_label, status];
448
449            for label in labels {
450                prop_assert!(!label.is_empty());
451                prop_assert!(label.len() < 1024);
452            }
453        }
454    }
455}