manta_server/service/hw_cluster/
apply.rs

1//! High-level coordinators: `apply_hw_configuration` (pin/unpin),
2//! `add_hw_component`, `delete_hw_component`. These are the functions
3//! the server handlers call directly.
4
5use std::collections::HashMap;
6
7use manta_backend_dispatcher::{
8  error::Error, interfaces::hsm::group::GroupTrait, types::Group,
9};
10
11use super::{
12  AddHwResult, ApplyHwResult, DeleteHwResult, HwClusterMode,
13  MEMORY_CAPACITY_LCM, pin_unpin, scoring,
14};
15use crate::server::common::app_context::InfraContext;
16
17/// Pin or unpin nodes between `parent_group_name` and
18/// `target_group_name` so the target group satisfies `pattern`.
19///
20/// The flow is parse → ensure → score → resolve → apply: the
21/// component pattern is parsed into a counts map, the target group is
22/// created on demand (or refused when `create_target_group` is false
23/// and the group is missing), parent and target hardware inventories
24/// are fetched, the resource-sufficiency check rejects patterns that
25/// ask for more of a component than exists in the pool, and `mode`
26/// (`Pin` / `Unpin`) picks which selection algorithm runs. `dryrun`
27/// shortcuts every backend mutation but still returns the would-be
28/// final memberships so the operator sees the plan.
29/// Parameters for [`apply_hw_configuration`].
30pub struct ApplyHwConfigurationParams<'a> {
31  /// `Pin` (capacity-aware selection) or `Unpin` (release all).
32  pub mode: HwClusterMode,
33  /// Destination HSM group that will receive nodes matching `pattern`.
34  pub target_group_name: &'a str,
35  /// Source HSM group nodes are drawn from when honouring `pattern`.
36  pub parent_group_name: &'a str,
37  /// Hardware-component request string, e.g. `"a100:8,milan:2"`.
38  pub pattern: &'a str,
39  /// When `true`, plan the moves but skip every backend mutation; the
40  /// returned `ApplyHwResult` still reflects the would-be membership.
41  pub dryrun: bool,
42  /// Create `target_group_name` if it doesn't already exist.
43  pub create_target_group: bool,
44  /// Delete the parent group when the move leaves it with no members.
45  pub delete_empty_parent_group: bool,
46}
47
48/// Service entry point for `POST /hardware-clusters/{target}/configuration`.
49pub async fn apply_hw_configuration(
50  infra: &InfraContext<'_>,
51  shasta_token: &str,
52  p: ApplyHwConfigurationParams<'_>,
53) -> Result<ApplyHwResult, Error> {
54  let ApplyHwConfigurationParams {
55    mode,
56    target_group_name,
57    parent_group_name,
58    pattern,
59    dryrun,
60    create_target_group,
61    delete_empty_parent_group,
62  } = p;
63  let (user_defined_hw_component_vec, user_defined_hw_component_count_hashmap) =
64    pin_unpin::parse_hw_pattern_usize(target_group_name, pattern)?;
65
66  pin_unpin::ensure_target_group_exists(
67    infra,
68    shasta_token,
69    target_group_name,
70    dryrun,
71    create_target_group,
72  )
73  .await?;
74
75  let (
76    target_hsm_group_member_vec,
77    target_hsm_node_hw_component_count_vec,
78    target_hsm_hw_component_summary,
79  ) = scoring::fetch_group_hw_inventory(
80    infra,
81    shasta_token,
82    &user_defined_hw_component_vec,
83    target_group_name,
84    MEMORY_CAPACITY_LCM,
85  )
86  .await?;
87
88  tracing::info!(
89    "HSM group '{}' hw component summary: {:?}",
90    target_group_name,
91    target_hsm_hw_component_summary
92  );
93
94  let (
95    parent_hsm_group_member_vec,
96    parent_hsm_node_hw_component_count_vec,
97    _parent_summary,
98  ) = scoring::fetch_group_hw_inventory(
99    infra,
100    shasta_token,
101    &user_defined_hw_component_vec,
102    parent_group_name,
103    MEMORY_CAPACITY_LCM,
104  )
105  .await?;
106
107  pin_unpin::validate_resource_sufficiency(
108    &target_hsm_node_hw_component_count_vec,
109    &parent_hsm_node_hw_component_count_vec,
110    &user_defined_hw_component_count_hashmap,
111  )?;
112
113  let (
114    target_hsm_node_hw_component_count_vec,
115    parent_hsm_node_hw_component_count_vec,
116  ) = scoring::resolve_hw_description_to_xnames(
117    mode,
118    target_hsm_node_hw_component_count_vec,
119    parent_hsm_node_hw_component_count_vec,
120    &user_defined_hw_component_count_hashmap,
121  )?;
122
123  let target_hsm_node_vec: Vec<String> = target_hsm_node_hw_component_count_vec
124    .into_iter()
125    .map(|(xname, _)| xname)
126    .collect();
127
128  let parent_hsm_node_vec: Vec<String> = parent_hsm_node_hw_component_count_vec
129    .into_iter()
130    .map(|(xname, _)| xname)
131    .collect();
132
133  pin_unpin::apply_group_updates(
134    infra,
135    shasta_token,
136    pin_unpin::GroupUpdate {
137      target_group: target_group_name,
138      parent_group: parent_group_name,
139      old_target_members: &target_hsm_group_member_vec,
140      old_parent_members: &parent_hsm_group_member_vec,
141      new_target_members: &target_hsm_node_vec,
142      new_parent_members: &parent_hsm_node_vec,
143      dryrun,
144      delete_empty_parent: delete_empty_parent_group,
145    },
146  )
147  .await?;
148
149  Ok(ApplyHwResult {
150    target_nodes: target_hsm_node_vec,
151    parent_nodes: parent_hsm_node_vec,
152  })
153}
154
155// ── add_hw_component ─────────────────────────────────────────────────────────
156
157/// Ensure the target HSM group exists for add-hw-component, creating it if needed.
158async fn ensure_add_target_group_exists(
159  infra: &InfraContext<'_>,
160  shasta_token: &str,
161  target_hsm_group_name: &str,
162  dryrun: bool,
163  create_hsm_group: bool,
164) -> Result<(), Error> {
165  if infra
166    .backend
167    .get_group(shasta_token, target_hsm_group_name)
168    .await
169    .is_ok()
170  {
171    tracing::debug!("The group '{}' exists, good.", target_hsm_group_name);
172    return Ok(());
173  }
174  if !create_hsm_group {
175    return Err(Error::NotFound(format!(
176      "Group '{target_hsm_group_name}' does not exist, but the \
177       option to create the group was NOT \
178       specified, cannot continue."
179    )));
180  }
181  tracing::info!(
182    "Group '{}' does not exist, but the option \
183     to create the group has been selected, \
184     creating it now.",
185    target_hsm_group_name
186  );
187  if dryrun {
188    return Err(Error::BadRequest(
189      "Dryrun selected, cannot create \
190       the new group and continue."
191        .to_string(),
192    ));
193  }
194  let group = Group {
195    label: target_hsm_group_name.to_string(),
196    description: None,
197    tags: None,
198    members: None,
199    exclusive_group: Some("false".to_string()),
200  };
201  infra.backend.add_group(shasta_token, group).await?;
202  Ok(())
203}
204
205/// Compute the final parent HSM hw component summary after subtracting user-requested deltas.
206//
207// `deltas` carries signed counters (`isize`) because callers compute
208// the difference between current and target counts; in practice the
209// values are non-negative HW component subtractions. The
210// `*counter as usize` cast is guarded by the explicit
211// `if *counter > current as isize` overflow check above each call site.
212#[allow(clippy::cast_sign_loss)]
213fn compute_final_parent_summary(
214  current_summary: &HashMap<String, usize>,
215  deltas: &HashMap<String, isize>,
216  parent_group_name: &str,
217) -> Result<HashMap<String, usize>, Error> {
218  let mut final_summary: HashMap<String, usize> = HashMap::new();
219
220  for (hw_component, counter) in deltas {
221    let current = *current_summary.get(hw_component).unwrap_or(&0);
222    if *counter > current.cast_signed() {
223      return Err(Error::InsufficientResources(format!(
224        "Cannot remove more hw component '{}' \
225         ({}) than available in parent group \
226         '{}' ({})",
227        hw_component, *counter, parent_group_name, current
228      )));
229    }
230    let new_counter = current - *counter as usize;
231    final_summary.insert(hw_component.clone(), new_counter);
232  }
233
234  Ok(final_summary)
235}
236
237/// Move enough nodes out of `parent_group_name` into
238/// `target_group_name` to add the components described by
239/// `pattern` (`<component>:<delta>` pairs) to the target.
240///
241/// The target group is created on demand when `create_group` is
242/// set; missing it otherwise yields `NotFound`. The parent group's
243/// post-move hw component summary is computed up front so the
244/// algorithm can reject patterns that would over-draw the parent
245/// (`InsufficientResources`). Selection uses scarcity-weighted scores
246/// so common components get pulled first and rare ones are preserved.
247/// In `dryrun` mode the planned move is returned without any backend
248/// mutation.
249pub async fn add_hw_component(
250  infra: &InfraContext<'_>,
251  shasta_token: &str,
252  target_group_name: &str,
253  parent_group_name: &str,
254  pattern: &str,
255  dryrun: bool,
256  create_group: bool,
257) -> Result<AddHwResult, Error> {
258  ensure_add_target_group_exists(
259    infra,
260    shasta_token,
261    target_group_name,
262    dryrun,
263    create_group,
264  )
265  .await?;
266
267  let pattern_str = format!("{target_group_name}:{pattern}");
268  let pattern_lowercase = pattern_str.to_lowercase();
269  let mut pattern_element_vec: Vec<&str> =
270    pattern_lowercase.split(':').collect();
271  let target_name = pattern_element_vec.remove(0);
272
273  let (
274    user_defined_delta_hw_component_vec,
275    user_defined_delta_hw_component_count_hashmap,
276  ) = scoring::parse_hw_pattern(&pattern_element_vec)?;
277
278  let (
279    _parent_member_vec,
280    mut parent_hsm_node_hw_component_count_vec,
281    parent_hsm_hw_component_summary,
282  ) = scoring::fetch_group_hw_inventory(
283    infra,
284    shasta_token,
285    &user_defined_delta_hw_component_vec,
286    parent_group_name,
287    MEMORY_CAPACITY_LCM,
288  )
289  .await?;
290
291  let final_parent_hsm_hw_component_summary = compute_final_parent_summary(
292    &parent_hsm_hw_component_summary,
293    &user_defined_delta_hw_component_count_hashmap,
294    parent_group_name,
295  )?;
296
297  let scarcity_scores = scoring::calculate_hw_component_scarcity_scores(
298    &parent_hsm_node_hw_component_count_vec,
299  );
300
301  let hw_counters_to_move = pin_unpin::calculate_target_group_unpin(
302    &final_parent_hsm_hw_component_summary,
303    &final_parent_hsm_hw_component_summary
304      .keys()
305      .cloned()
306      .collect::<Vec<String>>(),
307    &mut parent_hsm_node_hw_component_count_vec,
308    &scarcity_scores,
309  )?;
310
311  let nodes_to_move: Vec<String> = hw_counters_to_move
312    .iter()
313    .map(|(xname, _)| xname.clone())
314    .collect();
315
316  let mut target_hsm_node_vec: Vec<String> = infra
317    .backend
318    .get_member_vec_from_group_name_vec(
319      shasta_token,
320      &[target_name.to_string()],
321    )
322    .await?;
323
324  target_hsm_node_vec.extend(nodes_to_move.clone());
325  target_hsm_node_vec.sort();
326
327  if !dryrun {
328    for xname in &nodes_to_move {
329      infra
330        .backend
331        .delete_member_from_group(shasta_token, parent_group_name, xname)
332        .await?;
333
334      infra
335        .backend
336        .add_members_to_group(shasta_token, target_name, &[xname.as_str()])
337        .await?;
338    }
339  }
340
341  let parent_nodes: Vec<String> = parent_hsm_node_hw_component_count_vec
342    .iter()
343    .map(|(xname, _)| xname.clone())
344    .collect();
345
346  Ok(AddHwResult {
347    nodes_moved: nodes_to_move,
348    target_nodes: target_hsm_node_vec,
349    parent_nodes,
350  })
351}
352
353// ── delete_hw_component ──────────────────────────────────────────────────────
354
355/// Handle the case when target HSM group is already empty.
356async fn handle_empty_target(
357  infra: &InfraContext<'_>,
358  shasta_token: &str,
359  target_hsm_group_name: &str,
360  dryrun: bool,
361  delete_hsm_group: bool,
362) -> Result<(), Error> {
363  tracing::info!(
364    "The target HSM group {} is already empty, cannot \
365     remove hardware from it.",
366    target_hsm_group_name
367  );
368
369  if dryrun || !delete_hsm_group {
370    tracing::info!(
371      "The option to delete empty groups has NOT been \
372       selected, or the dryrun has been enabled. We \
373       are done with this action."
374    );
375    return Ok(());
376  }
377
378  tracing::info!(
379    "The option to delete empty groups has been \
380     selected, removing it."
381  );
382  match infra
383    .backend
384    .delete_group(shasta_token, target_hsm_group_name)
385    .await
386  {
387    Ok(_) => {
388      tracing::info!(
389        "HSM group removed successfully, we are \
390         done with this action."
391      );
392    }
393    Err(e) => tracing::debug!(
394      "Error removing the HSM group. This always \
395       fails, ignore please. Reported: {}",
396      e
397    ),
398  }
399  Ok(())
400}
401
402/// Compute the final target HSM hw component summary after subtracting deltas.
403//
404// Same `isize → usize` cast rationale as `compute_final_parent_summary`:
405// callers compute non-negative HW deltas; the cast preserves intent.
406#[allow(clippy::cast_sign_loss)]
407fn compute_delete_final_summary(
408  current_summary: &HashMap<String, usize>,
409  deltas: &HashMap<String, isize>,
410) -> Result<HashMap<String, usize>, Error> {
411  let mut final_summary: HashMap<String, usize> = HashMap::new();
412
413  for (hw_component, counter) in deltas {
414    let current = *current_summary.get(hw_component).ok_or_else(|| {
415      Error::NotFound(format!(
416        "hw component '{hw_component}' not found in target HSM \
417           hw component summary"
418      ))
419    })?;
420
421    final_summary.insert(hw_component.clone(), current - *counter as usize);
422  }
423
424  Ok(final_summary)
425}
426
427/// Move nodes between HSM groups: delete from target, add to parent.
428async fn apply_node_moves(
429  infra: &InfraContext<'_>,
430  shasta_token: &str,
431  target_group: &str,
432  parent_group: &str,
433  nodes: &[String],
434  target_will_be_empty: bool,
435  delete_hsm_group: bool,
436) -> Result<(), Error> {
437  for xname in nodes {
438    infra
439      .backend
440      .delete_member_from_group(shasta_token, target_group, xname.as_str())
441      .await?;
442
443    infra
444      .backend
445      .add_members_to_group(shasta_token, parent_group, &[xname.as_str()])
446      .await?;
447  }
448
449  if target_will_be_empty {
450    if delete_hsm_group {
451      tracing::info!(
452        "HSM group {} is now empty and the option to \
453         delete empty groups has been selected, \
454         removing it.",
455        target_group
456      );
457      match infra.backend.delete_group(shasta_token, target_group).await {
458        Ok(_) => tracing::info!("HSM group removed successfully."),
459        Err(e) => tracing::debug!(
460          "Error removing the HSM group. This always \
461           fails, ignore please. Reported: {}",
462          e
463        ),
464      }
465    } else {
466      tracing::debug!(
467        "HSM group {} is now empty and the option to \
468         delete empty groups has NOT been selected, \
469         will not remove it.",
470        target_group
471      );
472    }
473  }
474
475  Ok(())
476}
477
478/// Move enough nodes out of `target_group_name` back into
479/// `parent_group_name` to remove the components described by
480/// `pattern` from the target.
481///
482/// The target group must already exist (returns `NotFound`
483/// otherwise). When the target is already empty the routine
484/// short-circuits, optionally deleting the empty group if
485/// `delete_group` is set. Selection scores combine both groups'
486/// scarcity, so the move keeps the most scarce hardware in the target
487/// group whenever possible. After moving, the function deletes the
488/// target group if it ended up empty and `delete_group` is true.
489/// `dryrun` returns the planned move without touching the backend.
490pub async fn delete_hw_component(
491  infra: &InfraContext<'_>,
492  token: &str,
493  target_group_name: &str,
494  parent_group_name: &str,
495  pattern: &str,
496  dryrun: bool,
497  delete_group: bool,
498) -> Result<DeleteHwResult, Error> {
499  match infra.backend.get_group(token, target_group_name).await {
500    Ok(_) => {}
501    Err(_) => {
502      return Err(Error::NotFound(format!(
503        "HSM group {target_group_name} does not exist, cannot remove hw from it."
504      )));
505    }
506  }
507
508  let pattern_str = format!("{target_group_name}:{pattern}");
509  let pattern_lowercase = pattern_str.to_lowercase();
510  let mut pattern_element_vec: Vec<&str> =
511    pattern_lowercase.split(':').collect();
512  let target_name = pattern_element_vec.remove(0);
513
514  let (
515    user_defined_delta_hw_component_vec,
516    user_defined_delta_hw_component_count_hashmap,
517  ) = scoring::parse_hw_pattern(&pattern_element_vec)?;
518
519  let (
520    target_hsm_group_member_vec,
521    mut target_hsm_node_hw_component_count_vec,
522    target_hsm_hw_component_summary,
523  ) = scoring::fetch_group_hw_inventory(
524    infra,
525    token,
526    &user_defined_delta_hw_component_vec,
527    target_name,
528    MEMORY_CAPACITY_LCM,
529  )
530  .await?;
531
532  if target_hsm_node_hw_component_count_vec.is_empty() {
533    handle_empty_target(infra, token, target_name, dryrun, delete_group)
534      .await?;
535    return Ok(DeleteHwResult {
536      nodes_moved: vec![],
537      target_nodes: vec![],
538      parent_nodes: vec![],
539    });
540  }
541
542  let (
543    parent_hsm_group_member_vec,
544    parent_hsm_node_hw_component_count_vec,
545    _parent_summary,
546  ) = scoring::fetch_group_hw_inventory(
547    infra,
548    token,
549    &user_defined_delta_hw_component_vec,
550    parent_group_name,
551    MEMORY_CAPACITY_LCM,
552  )
553  .await?;
554
555  let combined = [
556    target_hsm_node_hw_component_count_vec.clone(),
557    parent_hsm_node_hw_component_count_vec.clone(),
558  ]
559  .concat();
560  let scarcity_scores =
561    scoring::calculate_hw_component_scarcity_scores(&combined);
562
563  let final_target_summary = compute_delete_final_summary(
564    &target_hsm_hw_component_summary,
565    &user_defined_delta_hw_component_count_hashmap,
566  )?;
567
568  let hw_counters_to_move = pin_unpin::calculate_target_group_unpin(
569    &final_target_summary,
570    &final_target_summary
571      .keys()
572      .cloned()
573      .collect::<Vec<String>>(),
574    &mut target_hsm_node_hw_component_count_vec,
575    &scarcity_scores,
576  )?;
577
578  let nodes_to_move: Vec<String> = hw_counters_to_move
579    .iter()
580    .map(|(xname, _)| xname.clone())
581    .collect();
582
583  let mut parent_nodes: Vec<String> = parent_hsm_group_member_vec;
584  parent_nodes.extend(nodes_to_move.clone());
585  parent_nodes.sort();
586
587  let target_nodes: Vec<String> = target_hsm_node_hw_component_count_vec
588    .iter()
589    .map(|(xname, _)| xname.clone())
590    .collect();
591
592  if !dryrun {
593    apply_node_moves(
594      infra,
595      token,
596      target_name,
597      parent_group_name,
598      &nodes_to_move,
599      target_hsm_group_member_vec.len() == nodes_to_move.len(),
600      delete_group,
601    )
602    .await?;
603  }
604
605  Ok(DeleteHwResult {
606    nodes_moved: nodes_to_move,
607    target_nodes,
608    parent_nodes,
609  })
610}