Feature/resource reservation (#380)

Add resource reservation of memory for nodes
This commit is contained in:
Chipmonk2
2025-12-23 15:08:29 +01:00
committed by GitHub
parent b8093454d7
commit 2ce3d73262
6 changed files with 106 additions and 1 deletions

4
.gitignore vendored
View File

@@ -5,3 +5,7 @@ build/
dist/
*.egg-info/
proxlb_dev.yaml
log.log
proxlb/log.log
proxlb.yaml
proxlb_reservation.yaml

View File

@@ -530,7 +530,7 @@ Connect with us in our dedicated chat room for immediate support and live intera
| Support Channel | Link |
|------|:------:|
| Matrix | [#proxlb:gyptazy.com](https://matrix.to/#/#proxlb:gyptazy.com) |
| Discord | [Discord](https://discord.gg/JemGu7WbfQ) |
| Discord | [Discord](https://discord.gg/JemGu7WbfQ) |
| GitHub Community | [GitHub Community](https://github.com/gyptazy/ProxLB/discussions/)
| GitHub | [ProxLB GitHub](https://github.com/gyptazy/ProxLB/issues) |

View File

@@ -69,6 +69,15 @@ balancing:
pin: # Define a pinning og guests to specific node(s)
- virt66
- virt77
# reserve some ressource for proxmox and / or other application on the nodes
node_resource_reserve:
# defaults - cpu in %, memory in GB, disk in %
defaults:
cpu: 10
memory: 4
disk: 20
node1:
memory: 4
service:
daemon: True

View File

@@ -90,6 +90,7 @@ def main():
# Update the initial node resource assignments
# by the previously created groups.
Calculations.set_node_assignments(proxlb_data)
Helper.log_node_metrics(proxlb_data, init=False)
Calculations.set_node_hot(proxlb_data)
Calculations.set_guest_hot(proxlb_data)
Calculations.get_most_free_node(proxlb_data, cli_args.best_node)

View File

@@ -24,6 +24,7 @@ __license__ = "GPL-3.0"
import time
from typing import Dict, Any
from utils.logger import SystemdLogger
from utils.helper import Helper
logger = SystemdLogger()
@@ -102,6 +103,7 @@ class Nodes:
nodes["nodes"][node["node"]]["disk_pressure_full_spikes_percent"] = Nodes.get_node_rrd_data(proxmox_api, node["node"], "disk", "full", spikes=True)
nodes["nodes"][node["node"]]["disk_pressure_hot"] = False
Nodes.apply_resource_reservation(nodes["nodes"][node["node"]]["name"], proxlb_config, nodes["nodes"][node["node"]])
# Evaluate if node should be set to maintenance mode
if Nodes.set_node_maintenance(proxmox_api, proxlb_config, node["node"]):
nodes["nodes"][node["node"]]["maintenance"] = True
@@ -253,3 +255,62 @@ class Nodes:
logger.debug(f"Got version {version['version']} for node {node_name}.")
logger.debug("Finished: get_node_pve_version.")
return version["version"]
@staticmethod
def apply_resource_reservation(node_name, proxlb_config: Dict[str, Any], node_data: Dict[str, Any]) -> None:
"""
Check if there is a a configured resource reservation for the current nodes and apply it as needed.
Checks for a node specific config first, then if there is any configured default and if neither then nothing is reserved.
Reservations are applied by directly modifying the data gathered from the nodes.
Args:
node_name: (str) the name of the node
proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration.
node_data: (Dict[str, Any]): Dict containing the current nodes data
Returns: none
"""
logger.debug(f"Starting: Resource reservation")
logger.debug(f"Processing resource reservation for node {node_name}")
# load the balancing section from the config dict, will create an empty dict if there isnt anything
balancing_cfg = proxlb_config.get("balancing", {})
# get the reservation dict from the previously loaded dict , will create an empty dict if there isnt anything
reserve_cfg = balancing_cfg.get("node_resource_reserve", {})
# try to get the reserved memory for the node
reserved_memory_gb_node = reserve_cfg.get(node_name, {}).get("memory")
# try to load the default reserved memory - will set to 0 if there isn't anything
reserved_memory_gb_default = reserve_cfg.get("defaults", {}).get("memory", 0)
# make sure the reservation is a numeric value - check for both, default and node specific
if not isinstance(reserved_memory_gb_default, (int, float)):
if reserved_memory_gb_default is not None:
logger.info("Invalid default memory reservation: Found a string while expecting a numeric value - skipping default reservation")
reserved_memory_gb_default = 0
if not isinstance(reserved_memory_gb_node, (int, float)):
if reserved_memory_gb_node is not None:
logger.info(f"Invalid memory reservation: Found a string while expecting a numeric value - applying current default of {reserved_memory_gb_default} Bytes")
reserved_memory_gb_node = reserved_memory_gb_default
# make sure the reservation is positive
if reserved_memory_gb_node < 0:
logger.info(f"{nodes['nodes'][node['node']]['name']}: Invalid assigned memory reservation, applying defaults")
reserved_memory_gb_node = reserved_memory_gb_default
# convert the reservation from GB to Bytes, get the current nodes physical memory
reserved_memory_node = int(round(reserved_memory_gb_node * 1024 ** 3))
total_mem = node_data.get("memory_total")
# check if the reservation doesnt exceed the nodes total memory
if reserved_memory_node > total_mem:
logger.debug(f"Reservation of {reserved_memory_node} Bytes exceeds available memory of {total_mem} Bytes- skipping reservation")
reserved_memory_node = 0
reserved_memory_gb_node = 0
logger.debug(f"Reserved Memory: {reserved_memory_gb_node} GB ({reserved_memory_node} Bytes)")
node_data["memory_total"] -= reserved_memory_node
Helper.update_node_resource_percentages(node_data)
logger.debug(f"End: Resource reservation")
return

View File

@@ -81,6 +81,7 @@ class Helper:
"""
logger.debug("Starting: log_node_metrics.")
nodes_usage_memory = " | ".join([f"{key}: {value['memory_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_assigned_memory = " | ".join([f"{key}: {value['memory_assigned_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_usage_cpu = " | ".join([f"{key}: {value['cpu_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
nodes_usage_disk = " | ".join([f"{key}: {value['disk_used_percent']:.2f}%" for key, value in proxlb_data["nodes"].items()])
@@ -90,6 +91,7 @@ class Helper:
proxlb_data["meta"]["statistics"]["after"] = {"memory": nodes_usage_memory, "cpu": nodes_usage_cpu, "disk": nodes_usage_disk}
logger.debug(f"Nodes usage memory: {nodes_usage_memory}")
logger.debug(f"Nodes usage memory assigned: {nodes_assigned_memory}")
logger.debug(f"Nodes usage cpu: {nodes_usage_cpu}")
logger.debug(f"Nodes usage disk: {nodes_usage_disk}")
logger.debug("Finished: log_node_metrics.")
@@ -333,3 +335,31 @@ class Helper:
return (rc == 0, rc if rc != 0 else None)
finally:
test_socket.close()
@staticmethod
def update_node_resource_percentages(node_data: Dict[str, any]) -> None:
"""
This function updates the percentages of the node's resources in its respective dict
Args:
node_data: (Dict[str, any]) dict containing the nodes data to be updated
Returns: none
"""
logger.debug(f"Starting: Update resource percentages")
logger.debug(f"node data: {node_data}")
# memory
node_data["memory_assigned_percent"] = node_data["memory_assigned"] / node_data["memory_total"] * 100
node_data["memory_free_percent"] = node_data["memory_free"] / node_data["memory_total"] * 100
node_data["memory_used_percent"] = node_data["memory_used"] / node_data["memory_total"] * 100
# cpu
node_data["cpu_assigned_percent"] = node_data["cpu_assigned"] / node_data["cpu_total"] * 100
node_data["cpu_free_percent"] = node_data["cpu_free"] / node_data["cpu_total"] * 100
node_data["cpu_used_percent"] = node_data["cpu_used"] / node_data["cpu_total"] * 100
# disk
node_data["disk_assigned_percent"] = node_data["disk_assigned"] / node_data["disk_total"] * 100
node_data["disk_free_percent"] = node_data["disk_free"] / node_data["disk_total"] * 100
node_data["disk_used_percent"] = node_data["disk_used"] / node_data["disk_total"] * 100
logger.debug(f"node data: {node_data}")
logger.debug(f"End: Update resource percentages")
return