Merge branch 'main' into feature/auto-node-upgrade

feature: Add node auto-update support / rolling updates
2026-04-06 04:41:58 +02:00 · 2024-08-07 13:12:24 +02:00 · 2024-08-07 13:05:51 +02:00
15 changed files with 2824 additions and 30 deletions
--- a/packaging/proxlb-additions/01_package.sh
+++ b/packaging/proxlb-additions/01_package.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+mkdir packages
+mkdir build
+cd build
+cmake ..
+cpack -G DEB .
+cpack -G RPM .
+cp *.deb ../packages
+cp *.rpm ../packages
+cd ..
+rm -rf build
+echo "Packages created. Packages can be found in directory: packages"
--- a/packaging/proxlb-additions/CMakeLists.txt
+++ b/packaging/proxlb-additions/CMakeLists.txt
@@ -0,0 +1,37 @@
+cmake_minimum_required(VERSION 3.16)
+project(proxmox-rebalancing-service VERSION 1.0.0)
+
+install(FILES perl5/PVE/API2/Nodes.pm DESTINATION /usr/share/perl5/PVE/API2/)
+
+# General
+set(CPACK_PACKAGE_NAME "proxlb-additions")
+set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/../LICENSE")
+set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/../README.md")
+set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Florian Paul Azim <gyptazy> Hoberg <gyptazy@gyptazy.ch>")
+set(CPACK_PACKAGE_CONTACT "Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>")
+set(CPACK_PACKAGE_VENDOR "gyptazy")
+
+# RPM packaging
+set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION})
+set(CPACK_GENERATOR "RPM")
+set(CPACK_RPM_PACKAGE_ARCHITECTURE "amd64")
+set(CPACK_RPM_PACKAGE_SUMMARY "ProxLB Additions - Additional optional patched libaries for ProxLB.")
+set(CPACK_RPM_PACKAGE_DESCRIPTION "ProxLB Additions - Additional optional patched libaries for ProxLB.")
+set(CPACK_RPM_CHANGELOG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/changelog_redhat")
+set(CPACK_PACKAGE_RELEASE 1)
+set(CPACK_RPM_PACKAGE_LICENSE "GPL 3.0")
+set(CPACK_RPM_PACKAGE_REQUIRES "python >= 3.2.0")
+
+# DEB packaging
+set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT)
+set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64")
+set(CPACK_DEBIAN_PACKAGE_SUMMARY "ProxLB Additions - Additional optional patched libaries for ProxLB.")
+set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "ProxLB Additions - Additional optional patched libaries for ProxLB.")
+set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/changelog_debian")
+set(CPACK_DEBIAN_PACKAGE_DEPENDS "python3")
+set(CPACK_DEBIAN_PACKAGE_LICENSE "GPL 3.0")
+
+
+# Install
+set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
+include(CPack)
--- a/packaging/proxlb-additions/README.md
+++ b/packaging/proxlb-additions/README.md
--- a/packaging/proxlb-additions/changelog_debian
+++ b/packaging/proxlb-additions/changelog_debian
--- a/packaging/proxlb-additions/changelog_redhat
+++ b/packaging/proxlb-additions/changelog_redhat
--- a/packaging/proxlb-additions/perl5/PVE/API2/Nodes.pm
+++ b/packaging/proxlb-additions/perl5/PVE/API2/Nodes.pm
--- a/packaging/proxlb/01_package.sh
+++ b/packaging/proxlb/01_package.sh
--- a/packaging/proxlb/02_changelog_only.sh
+++ b/packaging/proxlb/02_changelog_only.sh
--- a/packaging/proxlb/CMakeLists.txt
+++ b/packaging/proxlb/CMakeLists.txt
--- a/packaging/proxlb/README.md
+++ b/packaging/proxlb/README.md
@@ -0,0 +1,14 @@
+## Build packages
+Building the packages requires cmake, deb and rpm.
+For building packages, simly run the following commands:
+
+```
+mkdir build
+cd build
+cmake ..
+cpack -G RPM .
+cpack -G DEB .
+```
+
+When running on Debian/Ubuntu you can directly call `01_package.sh`
+to create your own packages.
--- a/packaging/proxlb/changelog_debian
+++ b/packaging/proxlb/changelog_debian
@@ -0,0 +1,11 @@
+proxlb (1.0.0) unstable; urgency=low
+
+  * Initial release of ProxLB.
+
+ -- Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>  Thu, 01 Aug 2024 17:04:12 +0200
+
+proxlb (0.9.0) unstable; urgency=low
+
+  * Initial development release of ProxLB as a tech preview.
+
+ -- Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>  Sun, 07 Jul 2024 05:38:41 +0200
--- a/packaging/proxlb/changelog_redhat
+++ b/packaging/proxlb/changelog_redhat
@@ -0,0 +1,5 @@
+* Thu Aug 01 2024 Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>
+- Initial release of ProxLB.
+
+* Sun Jul 07 2024 Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>
+- Initial development release of ProxLB as a tech preview.
--- a/packaging/proxlb/postinst
+++ b/packaging/proxlb/postinst
--- a/packaging/proxlb/proxlb.service
+++ b/packaging/proxlb/proxlb.service
--- a/169
+++ b/169
@@ -96,7 +96,7 @@ def pre_validations(config_path):
    logging.info(f'{info_prefix} All pre-validations done.')


-def post_validations():
+def post_validations(api_object, node_requires_reboot):
    """ Run post-validations as sanity checks. """
    error_prefix = 'Error: [post-validations]:'
    info_prefix  = 'Info: [post-validations]:'
@@ -105,6 +105,8 @@ def post_validations():
        logging.critical(f'{error_prefix} Not all post-validations succeeded. Please validate!')
    else:
        logging.info(f'{info_prefix} All post-validations succeeded.')
+        # Reboot node if necessary and all validations were performed.
+        run_node_reboot(api_object, node_requires_reboot)


 def validate_daemon(daemon, schedule):
@@ -270,6 +272,97 @@ def validate_cluster_master(cluster_master):
        return True


+<<<<<<< HEAD
+def get_node_update_status(api_object):
+    """ Get the current update status of the current executing host node in the cluster. """
+    info_prefix   = 'Info: [node-update-status-getter]:'
+    error_prefix  = 'Error: [node-update-status-getter]:'
+
+    node_executor_hostname = socket.gethostname()
+    logging.info(f'{info_prefix} Get update status for node: {node_executor_hostname}.')
+
+    try:
+        update_status_object = api_object.nodes(node_executor_hostname).apt().update.get()
+    except proxmoxer.core.ResourceException:
+        logging.critical(f'{info_prefix} Unknown node in cluster: {node_executor_hostname}.')
+        sys.exit(2)
+
+    if len(update_status_object) > 0:
+        logging.info(f'{info_prefix} Updates available for node: {node_executor_hostname}.')
+        return True
+    else:
+        logging.info(f'{info_prefix} No updates available for node: {node_executor_hostname}.')
+        return False
+
+
+def run_node_update(api_object, node_requires_updates):
+    """ Run the update execution on node. """
+    info_prefix   = 'Info: [node-update-executor]:'
+    error_prefix  = 'Error: [node-update-executor]:'
+
+    node_executor_hostname = socket.gethostname()
+
+    if node_requires_updates:
+        logging.info(f'{info_prefix} Execute updates on node: {node_executor_hostname}.')
+        try:
+            update_status_object = api_object.nodes(node_executor_hostname).status().post(command='upgrade')
+        except proxmoxer.core.ResourceException:
+            logging.critical(f'{error_prefix} Missing API endpoint on node: {node_executor_hostname}. Please make sure to have the package proxlb-additions installed.')
+            sys.exit(2)
+        logging.info(f'{info_prefix} Sucessfully integrated updates to node: {node_executor_hostname}.')
+
+
+def extend_ignore_node_list(ignore_nodes):
+    """ Extend the node ignore list by this node. """
+    info_prefix   = 'Info: [node-ignore-list-adder]:'
+    error_prefix  = 'Error: [node-ignore-list-adder]:'
+
+    node_executor_hostname = socket.gethostname()
+    logging.info(f'{info_prefix} Adding node {node_executor_hostname} to ignore list.')
+    ignore_nodes = ignore_nodes + f',{node_executor_hostname}'
+    logging.info(f'{info_prefix} Ignored nodes are now: {ignore_nodes}.')
+
+    return ignore_nodes
+
+
+def get_node_reboot_status():
+    """ Get the current reboot status of the current executing host node in the cluster. """
+    info_prefix        = 'Info: [node-reboot-status-getter]:'
+    error_prefix       = 'Error: [node-reboot-status-getter]:'
+    reboot_status_file = '/var/run/reboot-required'
+
+    node_executor_hostname = socket.gethostname()
+    logging.info(f'{info_prefix} Get reboot status for node: {node_executor_hostname}.')
+
+    reboot_status_object = os.path.exists(reboot_status_file)
+
+    if reboot_status_object:
+        logging.info(f'{info_prefix} Reboot required for node: {node_executor_hostname}.')
+        return True
+    else:
+        logging.info(f'{info_prefix} No reboot required for node: {node_executor_hostname}.')
+        return False
+
+
+def run_node_reboot(api_object, node_requires_reboot):
+    """ Run the update execution on node. """
+    info_prefix   = 'Info: [node-reboot-executor]:'
+    error_prefix  = 'Error: [node-reboot-executor]:'
+
+    node_executor_hostname = socket.gethostname()
+
+    if node_requires_reboot:
+        logging.info(f'{info_prefix} Execute reboot on node: {node_executor_hostname}.')
+        try:
+            update_status_object = api_object.nodes(node_executor_hostname).status().post(command='reboot')
+        except proxmoxer.core.ResourceException:
+            logging.critical(f'{error_prefix} Missing API endpoint on node: {node_executor_hostname}. Please make sure to have the package proxlb-additions installed.')
+            sys.exit(2)
+        logging.info(f'{info_prefix} Rebooting node now: {node_executor_hostname}.')
+
+
+=======
+>>>>>>> main
 def get_node_statistics(api_object, ignore_nodes):
    """ Get statistics of cpu, memory and disk for each node in the cluster. """
    info_prefix       = 'Info: [node-statistics]:'
@@ -404,27 +497,29 @@ def get_vm_statistics(api_object, ignore_vms, balancing_type):
    return vm_statistics


-def update_node_statistics(node_statistics, vm_statistics):
+def update_node_statistics(node_statistics, vm_statistics, ignore_nodes):
    """ Update node statistics by VMs statistics. """
-    info_prefix = 'Info: [node-update-statistics]:'
-    warn_prefix = 'Warning: [node-update-statistics]:'
+    info_prefix       = 'Info: [node-update-statistics]:'
+    warn_prefix       = 'Warning: [node-update-statistics]:'
+    ignore_nodes_list =  ignore_nodes.split(',')

    for vm, vm_value in vm_statistics.items():
-        node_statistics[vm_value['node_parent']]['cpu_assigned']            = node_statistics[vm_value['node_parent']]['cpu_assigned'] + int(vm_value['cpu_total'])
-        node_statistics[vm_value['node_parent']]['cpu_assigned_percent']    = (node_statistics[vm_value['node_parent']]['cpu_assigned'] / node_statistics[vm_value['node_parent']]['cpu_total']) * 100
-        node_statistics[vm_value['node_parent']]['memory_assigned']         = node_statistics[vm_value['node_parent']]['memory_assigned'] + int(vm_value['memory_total'])
-        node_statistics[vm_value['node_parent']]['memory_assigned_percent'] = (node_statistics[vm_value['node_parent']]['memory_assigned'] / node_statistics[vm_value['node_parent']]['memory_total']) * 100
-        node_statistics[vm_value['node_parent']]['disk_assigned']           = node_statistics[vm_value['node_parent']]['disk_assigned'] + int(vm_value['disk_total'])
-        node_statistics[vm_value['node_parent']]['disk_assigned_percent']   = (node_statistics[vm_value['node_parent']]['disk_assigned'] / node_statistics[vm_value['node_parent']]['disk_total']) * 100
+        if not vm_value['node_parent'] in ignore_nodes_list:
+            node_statistics[vm_value['node_parent']]['cpu_assigned']            = node_statistics[vm_value['node_parent']]['cpu_assigned'] + int(vm_value['cpu_total'])
+            node_statistics[vm_value['node_parent']]['cpu_assigned_percent']    = (node_statistics[vm_value['node_parent']]['cpu_assigned'] / node_statistics[vm_value['node_parent']]['cpu_total']) * 100
+            node_statistics[vm_value['node_parent']]['memory_assigned']         = node_statistics[vm_value['node_parent']]['memory_assigned'] + int(vm_value['memory_total'])
+            node_statistics[vm_value['node_parent']]['memory_assigned_percent'] = (node_statistics[vm_value['node_parent']]['memory_assigned'] / node_statistics[vm_value['node_parent']]['memory_total']) * 100
+            node_statistics[vm_value['node_parent']]['disk_assigned']           = node_statistics[vm_value['node_parent']]['disk_assigned'] + int(vm_value['disk_total'])
+            node_statistics[vm_value['node_parent']]['disk_assigned_percent']   = (node_statistics[vm_value['node_parent']]['disk_assigned'] / node_statistics[vm_value['node_parent']]['disk_total']) * 100

-        if node_statistics[vm_value['node_parent']]['cpu_assigned_percent'] > 99:
-            logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for CPU by {int(node_statistics[vm_value["node_parent"]]["cpu_assigned_percent"])}%.')
+            if node_statistics[vm_value['node_parent']]['cpu_assigned_percent'] > 99:
+                logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for CPU by {int(node_statistics[vm_value["node_parent"]]["cpu_assigned_percent"])}%.')

-        if node_statistics[vm_value['node_parent']]['memory_assigned_percent'] > 99:
-            logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for memory by {int(node_statistics[vm_value["node_parent"]]["memory_assigned_percent"])}%.')
+            if node_statistics[vm_value['node_parent']]['memory_assigned_percent'] > 99:
+                logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for memory by {int(node_statistics[vm_value["node_parent"]]["memory_assigned_percent"])}%.')

-        if node_statistics[vm_value['node_parent']]['disk_assigned_percent'] > 99:
-            logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for disk by {int(node_statistics[vm_value["node_parent"]]["disk_assigned_percent"])}%.')
+            if node_statistics[vm_value['node_parent']]['disk_assigned_percent'] > 99:
+                logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for disk by {int(node_statistics[vm_value["node_parent"]]["disk_assigned_percent"])}%.')

    logging.info(f'{info_prefix} Updated node resource assignments by all VMs.')
    logging.debug('node_statistics')
@@ -484,7 +579,7 @@ def __get_proxlb_groups(vm_tags):
    return group_include, group_exclude, vm_ignore


-def balancing_calculations(balancing_method, balancing_mode, balancing_mode_option, node_statistics, vm_statistics, balanciness, rebalance, processed_vms):
+def balancing_calculations(balancing_method, balancing_mode, balancing_mode_option, node_statistics, vm_statistics, balanciness, ignore_nodes, rebalance, processed_vms):
    """ Calculate re-balancing of VMs on present nodes across the cluster. """
    info_prefix  = 'Info: [rebalancing-calculator]:'

@@ -501,14 +596,14 @@ def balancing_calculations(balancing_method, balancing_mode, balancing_mode_opti

        # Update resource statistics for VMs and nodes.
        node_statistics, vm_statistics         = __update_resource_statistics(resources_vm_most_used, resources_node_most_free,
-                                                                             vm_statistics, node_statistics, balancing_method, balancing_mode)
+                                                                             vm_statistics, node_statistics, balancing_method, balancing_mode, ignore_nodes)

        # Start recursion until we do not have any needs to rebalance anymore.
-        balancing_calculations(balancing_method, balancing_mode, balancing_mode_option, node_statistics, vm_statistics, balanciness, rebalance, processed_vms)
+        balancing_calculations(balancing_method, balancing_mode, balancing_mode_option, node_statistics, vm_statistics, balanciness, ignore_nodes, rebalance, processed_vms)

    # Honour groupings for include and exclude groups for rebalancing VMs.
-    node_statistics, vm_statistics = __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method, balancing_mode)
-    node_statistics, vm_statistics = __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method, balancing_mode)
+    node_statistics, vm_statistics = __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method, balancing_mode, ignore_nodes)
+    node_statistics, vm_statistics = __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method, balancing_mode, ignore_nodes)

    # Remove VMs that are not being relocated.
    vms_to_remove = [vm_name for vm_name, vm_info in vm_statistics.items() if 'node_rebalance' in vm_info and vm_info['node_rebalance'] == vm_info.get('node_parent')]
@@ -632,11 +727,12 @@ def __get_most_free_resources_node(balancing_method, balancing_mode, balancing_m
    return node


-def __update_resource_statistics(resource_highest_used_resources_vm, resource_highest_free_resources_node, vm_statistics, node_statistics, balancing_method, balancing_mode):
+def __update_resource_statistics(resource_highest_used_resources_vm, resource_highest_free_resources_node, vm_statistics, node_statistics, balancing_method, balancing_mode, ignore_nodes):
    """ Update VM and node resource statistics. """
    info_prefix = 'Info: [rebalancing-resource-statistics-update]:'
+    ignore_nodes_list =  ignore_nodes.split(',')

-    if resource_highest_used_resources_vm[1]['node_parent'] != resource_highest_free_resources_node[0]:
+    if resource_highest_used_resources_vm[1]['node_parent'] != resource_highest_free_resources_node[0] and resource_highest_used_resources_vm[1]['node_parent'] not in ignore_nodes_list:
        vm_name            = resource_highest_used_resources_vm[0]
        vm_node_parent     = resource_highest_used_resources_vm[1]['node_parent']
        vm_node_rebalance  = resource_highest_free_resources_node[0]
@@ -668,7 +764,7 @@ def __update_resource_statistics(resource_highest_used_resources_vm, resource_hi
    return node_statistics, vm_statistics


-def __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method, balancing_mode):
+def __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method, balancing_mode, ignore_nodes):
    """ Get VMs tags for include groups. """
    info_prefix = 'Info: [rebalancing-tags-group-include]:'
    tags_include_vms = {}
@@ -697,13 +793,13 @@ def __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_metho
                    vm_node_rebalance = vm_statistics[vm_name]['node_rebalance']
                else:
                    _mocked_vm_object = (vm_name, vm_statistics[vm_name])
-                    node_statistics, vm_statistics = __update_resource_statistics(_mocked_vm_object, [vm_node_rebalance], vm_statistics, node_statistics, balancing_method, balancing_mode)
+                    node_statistics, vm_statistics = __update_resource_statistics(_mocked_vm_object, [vm_node_rebalance], vm_statistics, node_statistics, balancing_method, balancing_mode, ignore_nodes)
            processed_vm.append(vm_name)

    return node_statistics, vm_statistics


-def __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method, balancing_mode):
+def __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method, balancing_mode, ignore_nodes):
    """ Get VMs tags for exclude groups. """
    info_prefix = 'Info: [rebalancing-tags-group-exclude]:'
    tags_exclude_vms = {}
@@ -736,7 +832,7 @@ def __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_metho
                        random_node = random.choice(list(node_statistics.keys()))
                else:
                    _mocked_vm_object = (vm_name, vm_statistics[vm_name])
-                    node_statistics, vm_statistics = __update_resource_statistics(_mocked_vm_object, [random_node], vm_statistics, node_statistics, balancing_method, balancing_mode)
+                    node_statistics, vm_statistics = __update_resource_statistics(_mocked_vm_object, [random_node], vm_statistics, node_statistics, balancing_method, balancing_mode, ignore_nodes)
            processed_vm.append(vm_name)

    return node_statistics, vm_statistics
@@ -891,20 +987,33 @@ def main():
                validate_daemon(daemon, schedule)
                continue

+<<<<<<< HEAD
+        # Validate for node auto update in cluster for rolling updates.
+        # Note: This requires proxlb-additions with a patched Proxmox API!
+        #rolling_updates = 1
+        if bool(int(rolling_updates)):
+            node_requires_updates = get_node_update_status(api_object)
+            run_node_update(api_object, node_requires_updates)
+            node_requires_reboot = get_node_reboot_status()
+            if node_requires_reboot:
+                ignore_nodes = extend_ignore_node_list(ignore_nodes)
+
+=======
+>>>>>>> main
        # Get metric & statistics for vms and nodes.
        node_statistics = get_node_statistics(api_object, ignore_nodes)
        vm_statistics   = get_vm_statistics(api_object, ignore_vms, balancing_type)
-        node_statistics = update_node_statistics(node_statistics, vm_statistics)
+        node_statistics = update_node_statistics(node_statistics, vm_statistics, ignore_nodes)

        # Calculate rebalancing of vms.
        node_statistics_rebalanced, vm_statistics_rebalanced = balancing_calculations(balancing_method, balancing_mode, balancing_mode_option,
-                                                                                      node_statistics, vm_statistics, balanciness, rebalance=False, processed_vms=[])
+                                                                                      node_statistics, vm_statistics, balanciness, ignore_nodes, rebalance=False, processed_vms=[])

        # Rebalance vms to new nodes within the cluster.
        run_vm_rebalancing(api_object, vm_statistics_rebalanced, app_args, parallel_migrations)

        # Validate for any errors.
-        post_validations()
+        post_validations(api_object, node_requires_reboot)

        # Validate daemon service.
        validate_daemon(daemon, schedule)
Author	SHA1	Message	Date
Florian Paul Azim Hoberg	23eff5069e	Merge branch 'main' into feature/auto-node-upgrade	2024-08-07 13:12:24 +02:00
Florian Paul Azim Hoberg	e3fdc506f9	feature: Add node auto-update support / rolling updates	2024-08-07 13:05:51 +02:00