manta_server/server/handlers/
mod.rs

1//! Top-level Axum handlers module.
2//!
3//! `mod.rs` keeps:
4//! - request extractors (`BearerToken`, `SiteName`, `RequestCtx`, `SiteHeader`)
5//! - the `ErrorResponse` body type + error mappers (`to_handler_error`,
6//!   `serialize_or_500`)
7//! - guard helpers (`require_vault`, `require_k8s_url`,
8//!   `validate_repo_list_lengths`, `parse_iso_datetime`)
9//! - the cross-handler `resolve_xnames_from_request` helper
10//! - the `health` endpoint
11//!
12//! Every other handler lives in a per-resource sub-module (mirroring
13//! the `service/` layout) and is re-exported here so `routes.rs` and
14//! `api_doc.rs` can keep referencing `handlers::X` unchanged.
15
16use std::sync::Arc;
17
18use axum::{
19  Json,
20  extract::FromRequestParts,
21  http::{StatusCode, header, request::Parts},
22  response::IntoResponse,
23};
24use manta_backend_dispatcher::error::Error as BackendError;
25use serde::Serialize;
26use utoipa::{IntoParams, ToSchema};
27
28use super::ServerState;
29use super::common::app_context::InfraContext;
30
31mod analysis;
32mod auth;
33mod boot_parameters;
34mod cluster;
35mod configuration;
36mod console;
37mod ephemeral_env;
38mod group;
39mod hardware;
40mod hw_cluster;
41mod image;
42mod kernel_parameters;
43mod migrate;
44mod node;
45mod power;
46mod redfish_endpoints;
47mod sat_file;
48mod session;
49mod template;
50
51pub use analysis::*;
52pub use auth::*;
53pub use boot_parameters::*;
54pub use cluster::*;
55pub use configuration::*;
56pub use console::*;
57pub use ephemeral_env::*;
58pub use group::*;
59pub use hardware::*;
60pub use hw_cluster::*;
61pub use image::*;
62pub use kernel_parameters::*;
63pub use migrate::*;
64pub use node::*;
65pub use power::*;
66pub use redfish_endpoints::*;
67pub use sat_file::*;
68pub use session::*;
69pub use template::*;
70
71// ---------------------------------------------------------------------------
72// Bearer-token extractor — eliminates token-extraction boilerplate
73// ---------------------------------------------------------------------------
74
75/// Axum extractor that pulls the token from `Authorization: Bearer <token>`.
76pub struct BearerToken(pub String);
77
78impl<S: Send + Sync> FromRequestParts<S> for BearerToken {
79  type Rejection = (StatusCode, Json<ErrorResponse>);
80
81  async fn from_request_parts(
82    parts: &mut Parts,
83    _state: &S,
84  ) -> Result<Self, Self::Rejection> {
85    let auth_header = parts
86      .headers
87      .get(header::AUTHORIZATION)
88      .and_then(|v| v.to_str().ok())
89      .ok_or_else(|| {
90        (
91          StatusCode::UNAUTHORIZED,
92          Json(ErrorResponse {
93            error: "Missing Authorization header".to_string(),
94          }),
95        )
96      })?;
97
98    let token = auth_header
99      .strip_prefix("Bearer ")
100      .or_else(|| auth_header.strip_prefix("bearer "))
101      .ok_or_else(|| {
102        (
103          StatusCode::UNAUTHORIZED,
104          Json(ErrorResponse {
105            error: "Authorization header must use Bearer scheme".to_string(),
106          }),
107        )
108      })?;
109
110    Ok(BearerToken(token.to_string()))
111  }
112}
113
114/// Axum extractor that reads the target site name from `X-Manta-Site`.
115///
116/// Every handler that touches backend APIs requires this header so the server
117/// knows which site's CA certificate, base URL, and credentials to use.
118pub struct SiteName(pub String);
119
120impl<S: Send + Sync> FromRequestParts<S> for SiteName {
121  type Rejection = (StatusCode, Json<ErrorResponse>);
122
123  async fn from_request_parts(
124    parts: &mut Parts,
125    _state: &S,
126  ) -> Result<Self, Self::Rejection> {
127    let site = parts
128      .headers
129      .get("X-Manta-Site")
130      .and_then(|v| v.to_str().ok())
131      .ok_or_else(|| {
132        (
133          StatusCode::BAD_REQUEST,
134          Json(ErrorResponse {
135            error: "Missing X-Manta-Site header".to_string(),
136          }),
137        )
138      })?;
139    Ok(SiteName(site.to_string()))
140  }
141}
142
143/// Required header parameter present on every authenticated endpoint.
144///
145/// Tells the server which cluster to route the request to.
146/// **Not** an authentication mechanism — documented as a plain header parameter.
147///
148/// The field is consumed by the `utoipa::IntoParams` derive macro at compile
149/// time to generate the OpenAPI spec; the runtime extractor is [`SiteName`].
150#[derive(IntoParams)]
151#[into_params(parameter_in = Header)]
152#[allow(dead_code)]
153pub struct SiteHeader {
154  /// Name of the target cluster (matches a site configured in the server).
155  #[param(required = true, rename = "X-Manta-Site")]
156  pub x_manta_site: String,
157}
158
159// ---------------------------------------------------------------------------
160// RequestCtx — bundles the State + BearerToken + SiteName extractors that
161// every authenticated handler opens with. Plus `infra()` for the
162// `state.infra_context(&site_name).map_err(to_handler_error)?` line that
163// follows. Each handler shrinks by 3-4 lines.
164// ---------------------------------------------------------------------------
165
166/// Bundled extractor for `State<Arc<ServerState>>` + `BearerToken` +
167/// `SiteName`. Use it in handler signatures instead of the three
168/// individual extractors when all three are needed (the typical case).
169///
170/// The unauthenticated `/auth/*` handlers and the health endpoint
171/// still use explicit extractors — they don't need a Bearer token.
172pub struct RequestCtx {
173  /// Shared server state (backend dispatcher, per-site config, TLS
174  /// material, optional Vault + k8s URLs).
175  pub state: Arc<ServerState>,
176  /// Bearer token extracted from the inbound `Authorization` header.
177  pub token: String,
178  /// Site name extracted from the inbound `X-Manta-Site` header;
179  /// used to pick the right `[sites.X]` entry from `state`.
180  pub site_name: String,
181}
182
183impl FromRequestParts<Arc<ServerState>> for RequestCtx {
184  type Rejection = (StatusCode, Json<ErrorResponse>);
185
186  async fn from_request_parts(
187    parts: &mut Parts,
188    state: &Arc<ServerState>,
189  ) -> Result<Self, Self::Rejection> {
190    let BearerToken(token) =
191      BearerToken::from_request_parts(parts, state).await?;
192    let SiteName(site_name) =
193      SiteName::from_request_parts(parts, state).await?;
194    // Validate the site resolves to a configured backend NOW, so the
195    // per-handler `ctx.infra()` call below cannot fail. Returning the
196    // 404-mapped error from extraction is the same shape the handler
197    // would have produced.
198    state.infra_context(&site_name).map_err(to_handler_error)?;
199    Ok(Self {
200      state: Arc::clone(state),
201      token,
202      site_name,
203    })
204  }
205}
206
207impl RequestCtx {
208  /// Borrow the per-site infrastructure (backend, base URLs, root
209  /// cert, optional Vault + k8s URLs). Infallible — the site was
210  /// validated during extraction; a missing site would have failed
211  /// the request before the handler body ran.
212  pub fn infra(&self) -> InfraContext<'_> {
213    self
214      .state
215      .infra_context(&self.site_name)
216      .expect("site validated during RequestCtx extraction")
217  }
218}
219
220/// Render an error and its `source()` chain as a multi-line string.
221///
222/// `thiserror`'s `Display` only emits the top-level message; nested
223/// errors reached via `std::error::Error::source()` are dropped. This
224/// walks the chain so the server log carries the full causal context
225/// (e.g. the underlying TLS / connect error behind a `reqwest::Error`).
226/// Works uniformly for thiserror-derived and `anyhow::Error` chains.
227fn format_with_causes(e: &(dyn std::error::Error + 'static)) -> String {
228  let mut out = e.to_string();
229  let mut src = e.source();
230  while let Some(cause) = src {
231    out.push_str("\n  caused by: ");
232    out.push_str(&cause.to_string());
233    src = cause.source();
234  }
235  out
236}
237
238/// Convert a `BackendError` into the best-fitting HTTP error response.
239///
240/// `pub` (rather than `pub(crate)`) so the integration tests in
241/// `crates/manta-server/tests/` can exercise the mapping directly.
242//
243// `e` is consumed via `e.to_string()` at the end; technically it could
244// take `&BackendError`, but the canonical call shape is
245// `.map_err(to_handler_error)?` which threads the value through.
246// Switching to a reference would force every site to write
247// `.map_err(|e| to_handler_error(&e))?` — losing the point-free form
248// across hundreds of handler call sites is a worse trade than the
249// ineffectual `Drop` here.
250#[allow(clippy::needless_pass_by_value)]
251pub fn to_handler_error(e: BackendError) -> (StatusCode, Json<ErrorResponse>) {
252  let status = match &e {
253    BackendError::NotFound(_)
254    | BackendError::SessionNotFound
255    | BackendError::ConfigurationNotFound => StatusCode::NOT_FOUND,
256    BackendError::Conflict(_)
257    | BackendError::ConfigurationAlreadyExistsError(_) => StatusCode::CONFLICT,
258    BackendError::BadRequest(_)
259    | BackendError::InvalidPattern(_)
260    | BackendError::UnsupportedBackend(_)
261    | BackendError::InvalidNodeId(_) => StatusCode::BAD_REQUEST,
262    BackendError::AuthenticationTokenNotFound(_)
263    | BackendError::JwtMalformed(_) => StatusCode::UNAUTHORIZED,
264    BackendError::InsufficientResources(_) => StatusCode::UNPROCESSABLE_ENTITY,
265    // Backend HTTP errors carry the originating status code (CSM or
266    // ochami). Propagate it verbatim when it's a valid HTTP status, so
267    // a 404 from the backend surfaces as a 404 from manta-server (not a
268    // generic 500). Fall back to 502 Bad Gateway if the embedded code
269    // is outside the HTTP status range — that's the canonical "upstream
270    // returned something nonsensical" signal.
271    BackendError::CsmError { status, .. } => {
272      StatusCode::from_u16(*status).unwrap_or(StatusCode::BAD_GATEWAY)
273    }
274    // The CSM-side reqwest client times out via `NetError(reqwest::Error)`;
275    // surface that as 504 Gateway Timeout so the CLI sees a distinct
276    // status (not a generic 500) and the body explicitly names the hop.
277    BackendError::NetError(rqe) if rqe.is_timeout() => {
278      StatusCode::GATEWAY_TIMEOUT
279    }
280    _ => StatusCode::INTERNAL_SERVER_ERROR,
281  };
282  let chain = format_with_causes(&e);
283  if status == StatusCode::INTERNAL_SERVER_ERROR {
284    tracing::error!("Internal error: {}", chain);
285  } else {
286    tracing::debug!("Service error {}: {}", status, chain);
287  }
288  let error_body = categorise_backend_error_body(&e);
289  (status, Json(ErrorResponse { error: error_body }))
290}
291
292/// Rewrite the error body when the underlying `BackendError` is a
293/// timeout or connect failure on the manta-server -> CSM hop. The
294/// rewritten body leads with which hop timed out so the operator
295/// (and the CLI's own `categorise_server_error`) can name it.
296fn categorise_backend_error_body(e: &BackendError) -> String {
297  match e {
298    BackendError::NetError(rqe) if rqe.is_timeout() => {
299      format!(
300        "manta-server -> CSM call timed out (csm-rs reqwest \
301         HTTP_REQUEST_TIMEOUT, default 15 min). CSM did not send \
302         response headers in time. Original: {rqe}"
303      )
304    }
305    BackendError::NetError(rqe) if rqe.is_connect() => {
306      format!(
307        "manta-server -> CSM connect failed. Could not establish a \
308         TCP/TLS connection to the configured CSM endpoint. Check \
309         the site's backend URL and network reachability. Original: {rqe}"
310      )
311    }
312    _ => e.to_string(),
313  }
314}
315
316pub(super) fn serialize_or_500<T: Serialize>(
317  v: &T,
318) -> Result<serde_json::Value, (StatusCode, Json<ErrorResponse>)> {
319  serde_json::to_value(v).map_err(|e| {
320    let chain = format_with_causes(&e);
321    tracing::error!("Failed to serialize: {}", chain);
322    (
323      StatusCode::INTERNAL_SERVER_ERROR,
324      Json(ErrorResponse {
325        error: format!("Failed to serialize: {e}"),
326      }),
327    )
328  })
329}
330
331pub(super) fn require_vault(
332  url: Option<&str>,
333) -> Result<&str, (StatusCode, Json<ErrorResponse>)> {
334  url.ok_or_else(|| {
335    (
336      StatusCode::NOT_IMPLEMENTED,
337      Json(ErrorResponse {
338        error: "vault_base_url not configured on this server".into(),
339      }),
340    )
341  })
342}
343
344pub(super) fn require_k8s_url(
345  url: Option<&str>,
346) -> Result<&str, (StatusCode, Json<ErrorResponse>)> {
347  url.ok_or_else(|| {
348    (
349      StatusCode::NOT_IMPLEMENTED,
350      Json(ErrorResponse {
351        error: "k8s_api_url not configured on this server".into(),
352      }),
353    )
354  })
355}
356
357pub(super) fn validate_repo_list_lengths(
358  repo_names: &[String],
359  repo_last_commit_ids: &[String],
360) -> Result<(), (StatusCode, Json<ErrorResponse>)> {
361  if repo_names.len() != repo_last_commit_ids.len() {
362    return Err((
363      StatusCode::BAD_REQUEST,
364      Json(ErrorResponse {
365        error: format!(
366          "repo_names ({}) and repo_last_commit_ids ({}) must have the same length",
367          repo_names.len(),
368          repo_last_commit_ids.len()
369        ),
370      }),
371    ));
372  }
373  Ok(())
374}
375
376pub(super) fn parse_iso_datetime(
377  field: &str,
378  value: &str,
379) -> Result<chrono::NaiveDateTime, (StatusCode, Json<ErrorResponse>)> {
380  chrono::NaiveDateTime::parse_from_str(value, "%Y-%m-%dT%H:%M:%S").map_err(
381    |e| {
382      (
383        StatusCode::BAD_REQUEST,
384        Json(ErrorResponse {
385          error: format!("Invalid '{field}' datetime '{value}': {e}"),
386        }),
387      )
388    },
389  )
390}
391
392// ---------------------------------------------------------------------------
393// Shared response types
394// ---------------------------------------------------------------------------
395
396/// Standard JSON error body returned by all failed endpoints.
397#[derive(Serialize, ToSchema)]
398pub struct ErrorResponse {
399  /// Human-readable explanation of the failure. Never includes
400  /// stack traces, credentials, or internal type names.
401  pub error: String,
402}
403
404// ---------------------------------------------------------------------------
405// Health check
406// ---------------------------------------------------------------------------
407
408/// GET /health — liveness probe; returns `{"status":"ok"}`.
409#[utoipa::path(get, path = "/health", tag = "system",
410  responses(
411    (status = 200, description = "Server is healthy"),
412  )
413)]
414#[tracing::instrument(skip_all)]
415pub async fn health() -> impl IntoResponse {
416  Json(serde_json::json!({ "status": "ok" }))
417}
418
419// ---------------------------------------------------------------------------
420// Shared helpers
421// ---------------------------------------------------------------------------
422
423/// Resolve target xnames from an explicit list or an HSM group name.
424/// Returns 400 if neither is provided.
425async fn resolve_xnames_from_request(
426  infra: &crate::server::common::app_context::InfraContext<'_>,
427  token: &str,
428  xnames_expression: Option<&str>,
429  group_name_opt: Option<&str>,
430) -> Result<Vec<String>, (StatusCode, Json<ErrorResponse>)> {
431  if let Some(expr) = xnames_expression
432    && !expr.is_empty()
433  {
434    return crate::service::node_ops::from_user_hosts_expression_to_xname_vec(
435      infra, token, expr, false,
436    )
437    .await
438    .map_err(to_handler_error);
439  }
440  if let Some(group) = group_name_opt {
441    return crate::service::node_ops::resolve_target_nodes(
442      infra,
443      token,
444      None,
445      Some(group),
446      None,
447    )
448    .await
449    .map_err(to_handler_error);
450  }
451  Err((
452    StatusCode::BAD_REQUEST,
453    Json(ErrorResponse {
454      error: "At least one of 'xnames' or 'hsm_group' must be provided"
455        .to_string(),
456    }),
457  ))
458}
459
460#[cfg(test)]
461mod tests {
462  //! Pure-logic locks for the helpers in this module that don't need
463  //! a live router. Route- and error-mapping coverage lives in
464  //! `crates/manta-server/tests/server_routes.rs`.
465
466  use super::format_with_causes;
467  use std::error::Error;
468  use std::fmt;
469
470  /// Toy error whose `source()` returns the inner error, so we can
471  /// build a fixed-depth `Display + Error` chain for the walk test.
472  #[derive(Debug)]
473  struct Chain {
474    msg: &'static str,
475    src: Option<Box<Chain>>,
476  }
477  impl fmt::Display for Chain {
478    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
479      f.write_str(self.msg)
480    }
481  }
482  impl Error for Chain {
483    fn source(&self) -> Option<&(dyn Error + 'static)> {
484      self.src.as_deref().map(|s| s as &(dyn Error + 'static))
485    }
486  }
487
488  #[test]
489  fn format_with_causes_single_error_has_no_caused_by() {
490    let e = Chain {
491      msg: "boom",
492      src: None,
493    };
494    assert_eq!(format_with_causes(&e), "boom");
495  }
496
497  #[test]
498  fn format_with_causes_two_level_chain_is_indented() {
499    let e = Chain {
500      msg: "outer",
501      src: Some(Box::new(Chain {
502        msg: "inner",
503        src: None,
504      })),
505    };
506    assert_eq!(format_with_causes(&e), "outer\n  caused by: inner");
507  }
508
509  #[test]
510  fn format_with_causes_walks_to_the_root() {
511    // Deeply nested chain — emulates anyhow's `with_context()` stack.
512    let e = Chain {
513      msg: "top",
514      src: Some(Box::new(Chain {
515        msg: "middle",
516        src: Some(Box::new(Chain {
517          msg: "root",
518          src: None,
519        })),
520      })),
521    };
522    assert_eq!(
523      format_with_causes(&e),
524      "top\n  caused by: middle\n  caused by: root"
525    );
526  }
527}