Merge pull request #404 from gyptazy/feature/373-add-resource-reservation-for-nodes

Feature/resource reservation (#380)
This commit is contained in:
gyptazy
2025-12-24 09:10:12 +01:00
committed by GitHub
7 changed files with 76 additions and 3 deletions

View File

@@ -0,0 +1,2 @@
added:
- Add resource reservation support for PVE nodes (@Chipmonk2). [#373]

View File

@@ -290,6 +290,7 @@ The following options can be set in the configuration file `proxlb.yaml`:
| | method | | memory | `Str` | The balancing method that should be used. [values: `memory` (default), `cpu`, `disk`]|
| | mode | | used | `Str` | The balancing mode that should be used. [values: `used` (default), `assigned`, `psi` (pressure)] |
| | balance_larger_guests_first | | False | `Bool` | Option to prefer larger/smaller guests first |
| | node_resource_reserve | | { default: { memory: 4 }, { node01: { memory: 6 }} } | `Dict` | A dict of pool names and their type for creating affinity/anti-affinity rules |
| | psi | | { nodes: { memory: { pressure_full: 0.20, pressure_some: 0.20, pressure_spikes: 1.00 }}} | `Dict` | A dict of PSI based thresholds for nodes and guests |
| | pools | | pools: { dev: { type: affinity }, de-nbg01-db: { type: anti-affinity }} | `Dict` | A dict of pool names and their type for creating affinity/anti-affinity rules |
| `service` | | | | | |
@@ -338,6 +339,11 @@ balancing:
method: memory
mode: used
balance_larger_guests_first: False
node_resource_reserve:
defaults:
memory: 4
node01:
memory: 6
# # PSI thresholds only apply when using mode 'psi'
# # PSI based balancing is currently in beta and req. PVE >= 9
# psi:

View File

@@ -33,6 +33,11 @@ balancing:
method: memory # 'memory' | 'cpu' | 'disk'
mode: used # 'assigned' | 'used' | 'psi'
balance_larger_guests_first: False # Option to prioritize balancing of larger or smaller guests first
node_resource_reserve: # Optional: Define resource reservations for nodes (in GB)
defaults: # Default reservation values applying to all nodes (unless explicitly overridden)
memory: 4 # Default: 4 GB memory reserved per node
node01: # Specific node reservation override for node 'node01'
memory: 6 # Specific: 6 GB memory reserved for node 'node01'
# # PSI thresholds only apply when using mode 'psi'
# psi:
# nodes:

View File

@@ -90,6 +90,7 @@ def main():
# Update the initial node resource assignments
# by the previously created groups.
Calculations.set_node_assignments(proxlb_data)
Helper.log_node_metrics(proxlb_data, init=False)
Calculations.set_node_hot(proxlb_data)
Calculations.set_guest_hot(proxlb_data)
Calculations.get_most_free_node(proxlb_data, cli_args.best_node)

View File

@@ -753,10 +753,10 @@ class Calculations:
guest_disk_required = proxlb_data["guests"][guest_name]["disk_used"]
if guest_memory_required < node_memory_free:
logger.debug(f"Node '{node_target}' has sufficient resources for guest '{guest_name}'.")
logger.debug(f"Node '{node_target}' has sufficient resources ({node_memory_free / (1024 ** 3):.2f} GB free) for guest '{guest_name}'.")
logger.debug("Finished: validate_node_resources.")
return True
else:
logger.debug(f"Node '{node_target}' lacks sufficient resources for guest '{guest_name}'.")
logger.debug(f"Node '{node_target}' lacks sufficient resources ({node_memory_free / (1024 ** 3):.2f} GB free) for guest '{guest_name}'.")
logger.debug("Finished: validate_node_resources.")
return False

View File

@@ -24,6 +24,7 @@ __license__ = "GPL-3.0"
import time
from typing import Dict, Any
from utils.logger import SystemdLogger
from utils.helper import Helper
logger = SystemdLogger()
@@ -77,7 +78,7 @@ class Nodes:
nodes["nodes"][node["node"]]["cpu_pressure_some_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "some", spikes=True)
nodes["nodes"][node["node"]]["cpu_pressure_full_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "full", spikes=True)
nodes["nodes"][node["node"]]["cpu_pressure_hot"] = False
nodes["nodes"][node["node"]]["memory_total"] = node["maxmem"]
nodes["nodes"][node["node"]]["memory_total"] = Nodes.set_node_resource_reservation(node["node"], node["maxmem"], proxlb_config, "memory")
nodes["nodes"][node["node"]]["memory_assigned"] = 0
nodes["nodes"][node["node"]]["memory_used"] = node["mem"]
nodes["nodes"][node["node"]]["memory_free"] = node["maxmem"] - node["mem"]
@@ -253,3 +254,59 @@ class Nodes:
logger.debug(f"Got version {version['version']} for node {node_name}.")
logger.debug("Finished: get_node_pve_version.")
return version["version"]
@staticmethod
def set_node_resource_reservation(node_name, resource_value, proxlb_config, resource_type) -> int:
"""
Check if there is a configured resource reservation for the current node and apply it as needed.
Checks for a node specific config first, then if there is any configured default and if neither then nothing is reserved.
Reservations are applied by directly modifying the resource value.
Args:
node_name (str): The name of the node.
resource_value (int): The total resource value in bytes.
proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration.
resource_type (str): The type of resource ('memory', 'disk', etc.).
Returns:
int: The resource value after applying any configured reservations.
"""
logger.debug(f"Starting: apply_resource_reservation")
balancing_cfg = proxlb_config.get("balancing", {})
reserve_cfg = balancing_cfg.get("node_resource_reserve", {})
node_resource_reservation = reserve_cfg.get(node_name, {}).get(resource_type, 0)
default_resource_reservation = reserve_cfg.get("defaults", {}).get(resource_type, 0)
# Ensure reservations are numeric values
node_resource_reservation = node_resource_reservation if isinstance(node_resource_reservation, (int, float)) else 0
default_resource_reservation = default_resource_reservation if isinstance(default_resource_reservation, (int, float)) else 0
# Apply node specific reservation if set
if node_resource_reservation > 0:
if resource_value < (node_resource_reservation * 1024 ** 3):
logger.critical(f"Configured resource reservation for node {node_name} of type {resource_type} with {node_resource_reservation} GB is higher than available resource value {resource_value / (1024 ** 3):.2f} GB. Not applying...")
return resource_value
else:
logger.debug(f"Applying node specific reservation for {node_name} of type {resource_type} with {node_resource_reservation} GB.")
resource_value_new = resource_value - (node_resource_reservation * 1024 ** 3)
logger.debug(f'Switched resource value for node {node_name} of type {resource_type} from {resource_value / (1024 ** 3):.2f} GB to {resource_value_new / (1024 ** 3):.2f} GB after applying reservation.')
logger.debug(f"Before: {resource_value} | After: {resource_value_new}")
return resource_value_new
# Apply default reservation if set and no node specific reservation has been performed
elif default_resource_reservation > 0:
if resource_value < (default_resource_reservation * 1024 ** 3):
logger.critical(f"Configured default reservation for node {node_name} of type {resource_type} with {default_resource_reservation} GB is higher than available resource value {resource_value / (1024 ** 3):.2f} GB. Not applying...")
return resource_value
else:
logger.debug(f"Applying default reservation for {node_name} of type {resource_type} with {default_resource_reservation} GB.")
resource_value_new = resource_value - (default_resource_reservation * 1024 ** 3)
logger.debug(f'Switched resource value for node {node_name} of type {resource_type} from {resource_value / (1024 ** 3):.2f} GB to {resource_value_new / (1024 ** 3):.2f} GB after applying reservation.')
logger.debug(f"Before: {resource_value} | After: {resource_value_new}")
return resource_value_new
else:
logger.debug(f"No default or node specific resource reservation for node {node_name} found. Skipping...")
logger.debug(f"Finished: apply_resource_reservation")
return resource_value

View File

@@ -81,6 +81,7 @@ class Helper:
"""
logger.debug("Starting: log_node_metrics.")
nodes_usage_memory = " | ".join([f"{key}: {value['memory_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_assigned_memory = " | ".join([f"{key}: {value['memory_assigned_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_usage_cpu = " | ".join([f"{key}: {value['cpu_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_usage_disk = " | ".join([f"{key}: {value['disk_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
@@ -90,6 +91,7 @@ class Helper:
proxlb_data["meta"]["statistics"]["after"] = {"memory": nodes_usage_memory, "cpu": nodes_usage_cpu, "disk": nodes_usage_disk}
logger.debug(f"Nodes usage memory: {nodes_usage_memory}")
logger.debug(f"Nodes usage memory assigned: {nodes_assigned_memory}")
logger.debug(f"Nodes usage cpu: {nodes_usage_cpu}")
logger.debug(f"Nodes usage disk: {nodes_usage_disk}")
logger.debug("Finished: log_node_metrics.")