mirror of
https://github.com/gyptazy/ProxLB.git
synced 2026-04-05 20:31:57 +02:00
Merge pull request #404 from gyptazy/feature/373-add-resource-reservation-for-nodes
Feature/resource reservation (#380)
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add resource reservation support for PVE nodes (@Chipmonk2). [#373]
|
||||
@@ -290,6 +290,7 @@ The following options can be set in the configuration file `proxlb.yaml`:
|
||||
| | method | | memory | `Str` | The balancing method that should be used. [values: `memory` (default), `cpu`, `disk`]|
|
||||
| | mode | | used | `Str` | The balancing mode that should be used. [values: `used` (default), `assigned`, `psi` (pressure)] |
|
||||
| | balance_larger_guests_first | | False | `Bool` | Option to prefer larger/smaller guests first |
|
||||
| | node_resource_reserve | | { default: { memory: 4 }, { node01: { memory: 6 }} } | `Dict` | A dict of pool names and their type for creating affinity/anti-affinity rules |
|
||||
| | psi | | { nodes: { memory: { pressure_full: 0.20, pressure_some: 0.20, pressure_spikes: 1.00 }}} | `Dict` | A dict of PSI based thresholds for nodes and guests |
|
||||
| | pools | | pools: { dev: { type: affinity }, de-nbg01-db: { type: anti-affinity }} | `Dict` | A dict of pool names and their type for creating affinity/anti-affinity rules |
|
||||
| `service` | | | | | |
|
||||
@@ -338,6 +339,11 @@ balancing:
|
||||
method: memory
|
||||
mode: used
|
||||
balance_larger_guests_first: False
|
||||
node_resource_reserve:
|
||||
defaults:
|
||||
memory: 4
|
||||
node01:
|
||||
memory: 6
|
||||
# # PSI thresholds only apply when using mode 'psi'
|
||||
# # PSI based balancing is currently in beta and req. PVE >= 9
|
||||
# psi:
|
||||
|
||||
@@ -33,6 +33,11 @@ balancing:
|
||||
method: memory # 'memory' | 'cpu' | 'disk'
|
||||
mode: used # 'assigned' | 'used' | 'psi'
|
||||
balance_larger_guests_first: False # Option to prioritize balancing of larger or smaller guests first
|
||||
node_resource_reserve: # Optional: Define resource reservations for nodes (in GB)
|
||||
defaults: # Default reservation values applying to all nodes (unless explicitly overridden)
|
||||
memory: 4 # Default: 4 GB memory reserved per node
|
||||
node01: # Specific node reservation override for node 'node01'
|
||||
memory: 6 # Specific: 6 GB memory reserved for node 'node01'
|
||||
# # PSI thresholds only apply when using mode 'psi'
|
||||
# psi:
|
||||
# nodes:
|
||||
|
||||
@@ -90,6 +90,7 @@ def main():
|
||||
# Update the initial node resource assignments
|
||||
# by the previously created groups.
|
||||
Calculations.set_node_assignments(proxlb_data)
|
||||
Helper.log_node_metrics(proxlb_data, init=False)
|
||||
Calculations.set_node_hot(proxlb_data)
|
||||
Calculations.set_guest_hot(proxlb_data)
|
||||
Calculations.get_most_free_node(proxlb_data, cli_args.best_node)
|
||||
|
||||
@@ -753,10 +753,10 @@ class Calculations:
|
||||
guest_disk_required = proxlb_data["guests"][guest_name]["disk_used"]
|
||||
|
||||
if guest_memory_required < node_memory_free:
|
||||
logger.debug(f"Node '{node_target}' has sufficient resources for guest '{guest_name}'.")
|
||||
logger.debug(f"Node '{node_target}' has sufficient resources ({node_memory_free / (1024 ** 3):.2f} GB free) for guest '{guest_name}'.")
|
||||
logger.debug("Finished: validate_node_resources.")
|
||||
return True
|
||||
else:
|
||||
logger.debug(f"Node '{node_target}' lacks sufficient resources for guest '{guest_name}'.")
|
||||
logger.debug(f"Node '{node_target}' lacks sufficient resources ({node_memory_free / (1024 ** 3):.2f} GB free) for guest '{guest_name}'.")
|
||||
logger.debug("Finished: validate_node_resources.")
|
||||
return False
|
||||
|
||||
@@ -24,6 +24,7 @@ __license__ = "GPL-3.0"
|
||||
import time
|
||||
from typing import Dict, Any
|
||||
from utils.logger import SystemdLogger
|
||||
from utils.helper import Helper
|
||||
|
||||
logger = SystemdLogger()
|
||||
|
||||
@@ -77,7 +78,7 @@ class Nodes:
|
||||
nodes["nodes"][node["node"]]["cpu_pressure_some_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "some", spikes=True)
|
||||
nodes["nodes"][node["node"]]["cpu_pressure_full_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "cpu", "full", spikes=True)
|
||||
nodes["nodes"][node["node"]]["cpu_pressure_hot"] = False
|
||||
nodes["nodes"][node["node"]]["memory_total"] = node["maxmem"]
|
||||
nodes["nodes"][node["node"]]["memory_total"] = Nodes.set_node_resource_reservation(node["node"], node["maxmem"], proxlb_config, "memory")
|
||||
nodes["nodes"][node["node"]]["memory_assigned"] = 0
|
||||
nodes["nodes"][node["node"]]["memory_used"] = node["mem"]
|
||||
nodes["nodes"][node["node"]]["memory_free"] = node["maxmem"] - node["mem"]
|
||||
@@ -253,3 +254,59 @@ class Nodes:
|
||||
logger.debug(f"Got version {version['version']} for node {node_name}.")
|
||||
logger.debug("Finished: get_node_pve_version.")
|
||||
return version["version"]
|
||||
|
||||
@staticmethod
|
||||
def set_node_resource_reservation(node_name, resource_value, proxlb_config, resource_type) -> int:
|
||||
"""
|
||||
Check if there is a configured resource reservation for the current node and apply it as needed.
|
||||
Checks for a node specific config first, then if there is any configured default and if neither then nothing is reserved.
|
||||
Reservations are applied by directly modifying the resource value.
|
||||
|
||||
Args:
|
||||
node_name (str): The name of the node.
|
||||
resource_value (int): The total resource value in bytes.
|
||||
proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration.
|
||||
resource_type (str): The type of resource ('memory', 'disk', etc.).
|
||||
|
||||
Returns:
|
||||
int: The resource value after applying any configured reservations.
|
||||
"""
|
||||
logger.debug(f"Starting: apply_resource_reservation")
|
||||
|
||||
balancing_cfg = proxlb_config.get("balancing", {})
|
||||
reserve_cfg = balancing_cfg.get("node_resource_reserve", {})
|
||||
node_resource_reservation = reserve_cfg.get(node_name, {}).get(resource_type, 0)
|
||||
default_resource_reservation = reserve_cfg.get("defaults", {}).get(resource_type, 0)
|
||||
|
||||
# Ensure reservations are numeric values
|
||||
node_resource_reservation = node_resource_reservation if isinstance(node_resource_reservation, (int, float)) else 0
|
||||
default_resource_reservation = default_resource_reservation if isinstance(default_resource_reservation, (int, float)) else 0
|
||||
|
||||
# Apply node specific reservation if set
|
||||
if node_resource_reservation > 0:
|
||||
if resource_value < (node_resource_reservation * 1024 ** 3):
|
||||
logger.critical(f"Configured resource reservation for node {node_name} of type {resource_type} with {node_resource_reservation} GB is higher than available resource value {resource_value / (1024 ** 3):.2f} GB. Not applying...")
|
||||
return resource_value
|
||||
else:
|
||||
logger.debug(f"Applying node specific reservation for {node_name} of type {resource_type} with {node_resource_reservation} GB.")
|
||||
resource_value_new = resource_value - (node_resource_reservation * 1024 ** 3)
|
||||
logger.debug(f'Switched resource value for node {node_name} of type {resource_type} from {resource_value / (1024 ** 3):.2f} GB to {resource_value_new / (1024 ** 3):.2f} GB after applying reservation.')
|
||||
logger.debug(f"Before: {resource_value} | After: {resource_value_new}")
|
||||
return resource_value_new
|
||||
|
||||
# Apply default reservation if set and no node specific reservation has been performed
|
||||
elif default_resource_reservation > 0:
|
||||
if resource_value < (default_resource_reservation * 1024 ** 3):
|
||||
logger.critical(f"Configured default reservation for node {node_name} of type {resource_type} with {default_resource_reservation} GB is higher than available resource value {resource_value / (1024 ** 3):.2f} GB. Not applying...")
|
||||
return resource_value
|
||||
else:
|
||||
logger.debug(f"Applying default reservation for {node_name} of type {resource_type} with {default_resource_reservation} GB.")
|
||||
resource_value_new = resource_value - (default_resource_reservation * 1024 ** 3)
|
||||
logger.debug(f'Switched resource value for node {node_name} of type {resource_type} from {resource_value / (1024 ** 3):.2f} GB to {resource_value_new / (1024 ** 3):.2f} GB after applying reservation.')
|
||||
logger.debug(f"Before: {resource_value} | After: {resource_value_new}")
|
||||
return resource_value_new
|
||||
|
||||
else:
|
||||
logger.debug(f"No default or node specific resource reservation for node {node_name} found. Skipping...")
|
||||
logger.debug(f"Finished: apply_resource_reservation")
|
||||
return resource_value
|
||||
|
||||
@@ -81,6 +81,7 @@ class Helper:
|
||||
"""
|
||||
logger.debug("Starting: log_node_metrics.")
|
||||
nodes_usage_memory = " | ".join([f"{key}: {value['memory_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
|
||||
nodes_assigned_memory = " | ".join([f"{key}: {value['memory_assigned_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
|
||||
nodes_usage_cpu = " | ".join([f"{key}: {value['cpu_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
|
||||
nodes_usage_disk = " | ".join([f"{key}: {value['disk_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
|
||||
|
||||
@@ -90,6 +91,7 @@ class Helper:
|
||||
proxlb_data["meta"]["statistics"]["after"] = {"memory": nodes_usage_memory, "cpu": nodes_usage_cpu, "disk": nodes_usage_disk}
|
||||
|
||||
logger.debug(f"Nodes usage memory: {nodes_usage_memory}")
|
||||
logger.debug(f"Nodes usage memory assigned: {nodes_assigned_memory}")
|
||||
logger.debug(f"Nodes usage cpu: {nodes_usage_cpu}")
|
||||
logger.debug(f"Nodes usage disk: {nodes_usage_disk}")
|
||||
logger.debug("Finished: log_node_metrics.")
|
||||
|
||||
Reference in New Issue
Block a user