mirror of
https://github.com/gyptazy/ProxLB.git
synced 2026-04-06 04:41:58 +02:00
* Add new cli param: -d for dry-run: Prints movements to cli * Add new cli param: -j for json: Prints movement as a json to cli Fixes: #6
637 lines
28 KiB
Python
Executable File
637 lines
28 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# ProxLB
|
|
# ProxLB (re)balances VM workloads across nodes in Proxmox clusters.
|
|
# ProxLB obtains current metrics from all nodes within the cluster for
|
|
# further auto balancing by memory, disk or cpu and rebalances the VMs
|
|
# over all available nodes in a cluster by having an equal resource usage.
|
|
# Copyright (C) 2024 Florian Paul Azim Hoberg @gyptazy <gyptazy@gyptazy.ch>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
import argparse
|
|
import configparser
|
|
import json
|
|
import logging
|
|
import os
|
|
try:
|
|
import proxmoxer
|
|
_imports = True
|
|
except ImportError:
|
|
_imports = False
|
|
import random
|
|
import re
|
|
import requests
|
|
import sys
|
|
import time
|
|
import urllib3
|
|
|
|
|
|
# Constants
|
|
__appname__ = "ProxLB"
|
|
__version__ = "0.9.9"
|
|
__author__ = "Florian Paul Azim Hoberg <gyptazy@gyptazy.ch> @gyptazy"
|
|
__errors__ = False
|
|
|
|
|
|
# Classes
|
|
## Logging class
|
|
class SystemdHandler(logging.Handler):
|
|
""" Class to handle logging options. """
|
|
PREFIX = {
|
|
logging.CRITICAL: "<2> " + __appname__ + ": ",
|
|
logging.ERROR: "<3> " + __appname__ + ": ",
|
|
logging.WARNING: "<4> " + __appname__ + ": ",
|
|
logging.INFO: "<6> " + __appname__ + ": ",
|
|
logging.DEBUG: "<7> " + __appname__ + ": ",
|
|
logging.NOTSET: "<7 " + __appname__ + ": ",
|
|
}
|
|
|
|
def __init__(self, stream=sys.stdout):
|
|
self.stream = stream
|
|
logging.Handler.__init__(self)
|
|
|
|
def emit(self, record):
|
|
try:
|
|
msg = self.PREFIX[record.levelno] + self.format(record) + "\n"
|
|
self.stream.write(msg)
|
|
self.stream.flush()
|
|
except Exception:
|
|
self.handleError(record)
|
|
|
|
|
|
# Functions
|
|
def initialize_logger(log_level, log_handler):
|
|
""" Initialize ProxLB logging handler. """
|
|
info_prefix = 'Info: [logger]:'
|
|
|
|
root_logger = logging.getLogger()
|
|
root_logger.setLevel(log_level)
|
|
root_logger.addHandler(SystemdHandler())
|
|
logging.info(f'{info_prefix} Logger got initialized.')
|
|
|
|
|
|
def pre_validations(config_path):
|
|
""" Run pre-validations as sanity checks. """
|
|
info_prefix = 'Info: [pre-validations]:'
|
|
|
|
__validate_imports()
|
|
__validate_config_file(config_path)
|
|
logging.info(f'{info_prefix} All pre-validations done.')
|
|
|
|
|
|
def post_validations():
|
|
""" Run post-validations as sanity checks. """
|
|
error_prefix = 'Error: [post-validations]:'
|
|
info_prefix = 'Info: [post-validations]:'
|
|
|
|
if __errors__:
|
|
logging.critical(f'{error_prefix} Not all post-validations succeeded. Please validate!')
|
|
else:
|
|
logging.info(f'{info_prefix} All post-validations succeeded.')
|
|
|
|
|
|
def validate_daemon(daemon, schedule):
|
|
""" Validate if ProxLB runs as a daemon. """
|
|
info_prefix = 'Info: [daemon]:'
|
|
|
|
if bool(int(daemon)):
|
|
logging.info(f'{info_prefix} Running in daemon mode. Next run in {schedule} hours.')
|
|
time.sleep(int(schedule) * 60)
|
|
else:
|
|
logging.info(f'{info_prefix} Not running in daemon mode. Quitting.')
|
|
sys.exit(0)
|
|
|
|
|
|
def __validate_imports():
|
|
""" Validate if all Python imports succeeded. """
|
|
error_prefix = 'Error: [python-imports]:'
|
|
info_prefix = 'Info: [python-imports]:'
|
|
|
|
if not _imports:
|
|
logging.critical(f'{error_prefix} Could not import all dependencies. Please install "proxmoxer".')
|
|
sys.exit(2)
|
|
else:
|
|
logging.info(f'{info_prefix} All required dependencies were imported.')
|
|
|
|
|
|
def __validate_config_file(config_path):
|
|
""" Validate if all Python imports succeeded. """
|
|
error_prefix = 'Error: [config]:'
|
|
info_prefix = 'Info: [config]:'
|
|
|
|
if not os.path.isfile(config_path):
|
|
logging.critical(f'{error_prefix} Could not find config file in: {config_path}.')
|
|
sys.exit(2)
|
|
else:
|
|
logging.info(f'{info_prefix} Configuration file loaded from: {config_path}.')
|
|
|
|
|
|
def initialize_args():
|
|
""" Initialize given arguments for ProxLB. """
|
|
argparser = argparse.ArgumentParser(description='ProxLB')
|
|
argparser.add_argument('-c', '--config', type=str, help='Path to config file.', required=True)
|
|
argparser.add_argument('-d', '--dry-run', help='Perform a dry-run without doing any actions.', action='store_true', required=False)
|
|
argparser.add_argument('-j', '--json', help='Return a JSON of the VM movement.', action='store_true', required=False)
|
|
return argparser.parse_args()
|
|
|
|
|
|
def initialize_config_path(app_args):
|
|
""" Initialize path to ProxLB config file. """
|
|
info_prefix = 'Info: [config]:'
|
|
|
|
config_path = app_args.config
|
|
if app_args.config is None:
|
|
config_path = '/etc/proxlb/proxlb.conf'
|
|
logging.info(f'{info_prefix} No config file provided. Falling back to: {config_path}.')
|
|
else:
|
|
logging.info(f'{info_prefix} Using config file: {config_path}.')
|
|
return config_path
|
|
|
|
|
|
def initialize_config_options(config_path):
|
|
""" Read configuration from given config file for ProxLB. """
|
|
error_prefix = 'Error: [config]:'
|
|
info_prefix = 'Info: [config]:'
|
|
|
|
try:
|
|
config = configparser.ConfigParser()
|
|
config.read(config_path)
|
|
# Proxmox config
|
|
proxmox_api_host = config['proxmox']['api_host']
|
|
proxmox_api_user = config['proxmox']['api_user']
|
|
proxmox_api_pass = config['proxmox']['api_pass']
|
|
proxmox_api_ssl_v = config['proxmox']['verify_ssl']
|
|
# Balancing
|
|
balancing_method = config['balancing'].get('method', 'memory')
|
|
balanciness = config['balancing'].get('balanciness', 10)
|
|
ignore_nodes = config['balancing'].get('ignore_nodes', None)
|
|
ignore_vms = config['balancing'].get('ignore_vms', None)
|
|
# Service
|
|
daemon = config['service'].get('daemon', 1)
|
|
schedule = config['service'].get('schedule', 24)
|
|
except configparser.NoSectionError:
|
|
logging.critical(f'{error_prefix} Could not find the required section.')
|
|
sys.exit(2)
|
|
except configparser.ParsingError:
|
|
logging.critical(f'{error_prefix} Unable to parse the config file.')
|
|
sys.exit(2)
|
|
except KeyError:
|
|
logging.critical(f'{error_prefix} Could not find the required options in config file.')
|
|
sys.exit(2)
|
|
|
|
logging.info(f'{info_prefix} Configuration file loaded.')
|
|
return proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v, balancing_method, \
|
|
balanciness, ignore_nodes, ignore_vms, daemon, schedule
|
|
|
|
|
|
def api_connect(proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v):
|
|
""" Connect and authenticate to the Proxmox remote API. """
|
|
error_prefix = 'Error: [api-connection]:'
|
|
warn_prefix = 'Warning: [api-connection]:'
|
|
info_prefix = 'Info: [api-connection]:'
|
|
proxmox_api_ssl_v = bool(int(proxmox_api_ssl_v))
|
|
|
|
if not proxmox_api_ssl_v:
|
|
requests.packages.urllib3.disable_warnings()
|
|
logging.warning(f'{warn_prefix} API connection does not verify SSL certificate.')
|
|
|
|
try:
|
|
api_object = proxmoxer.ProxmoxAPI(proxmox_api_host, user=proxmox_api_user, password=proxmox_api_pass, verify_ssl=proxmox_api_ssl_v)
|
|
except urllib3.exceptions.NameResolutionError:
|
|
logging.critical(f'{error_prefix} Could not resolve the given host: {proxmox_api_host}.')
|
|
sys.exit(2)
|
|
except requests.exceptions.ConnectTimeout:
|
|
logging.critical(f'{error_prefix} Connection time out to host: {proxmox_api_host}.')
|
|
sys.exit(2)
|
|
except requests.exceptions.SSLError:
|
|
logging.critical(f'{error_prefix} SSL certificate verification failed for host: {proxmox_api_host}.')
|
|
sys.exit(2)
|
|
|
|
logging.info(f'{info_prefix} API connection succeeded to host: {proxmox_api_host}.')
|
|
return api_object
|
|
|
|
|
|
def get_node_statistics(api_object, ignore_nodes):
|
|
""" Get statistics of cpu, memory and disk for each node in the cluster. """
|
|
info_prefix = 'Info: [node-statistics]:'
|
|
node_statistics = {}
|
|
ignore_nodes_list = ignore_nodes.split(',')
|
|
|
|
for node in api_object.nodes.get():
|
|
if node['status'] == 'online' and node['node'] not in ignore_nodes_list:
|
|
node_statistics[node['node']] = {}
|
|
node_statistics[node['node']]['cpu_total'] = node['maxcpu']
|
|
node_statistics[node['node']]['cpu_used'] = node['cpu']
|
|
node_statistics[node['node']]['cpu_free'] = int(node['maxcpu']) - int(node['cpu'])
|
|
node_statistics[node['node']]['cpu_free_percent'] = int((node_statistics[node['node']]['cpu_free']) / int(node['maxcpu']) * 100)
|
|
node_statistics[node['node']]['memory_total'] = node['maxmem']
|
|
node_statistics[node['node']]['memory_used'] = node['mem']
|
|
node_statistics[node['node']]['memory_free'] = int(node['maxmem']) - int(node['mem'])
|
|
node_statistics[node['node']]['memory_free_percent'] = int((node_statistics[node['node']]['memory_free']) / int(node['maxmem']) * 100)
|
|
node_statistics[node['node']]['disk_total'] = node['maxdisk']
|
|
node_statistics[node['node']]['disk_used'] = node['disk']
|
|
node_statistics[node['node']]['disk_free'] = int(node['maxdisk']) - int(node['disk'])
|
|
node_statistics[node['node']]['disk_free_percent'] = int((node_statistics[node['node']]['disk_free']) / int(node['maxdisk']) * 100)
|
|
logging.info(f'{info_prefix} Added node {node["node"]}.')
|
|
|
|
logging.info(f'{info_prefix} Created node statistics.')
|
|
return node_statistics
|
|
|
|
|
|
def get_vm_statistics(api_object, ignore_vms):
|
|
""" Get statistics of cpu, memory and disk for each vm in the cluster. """
|
|
info_prefix = 'Info: [vm-statistics]:'
|
|
vm_statistics = {}
|
|
ignore_vms_list = ignore_vms.split(',')
|
|
group_include = None
|
|
group_exclude = None
|
|
vm_ignore = None
|
|
vm_ignore_wildcard = False
|
|
|
|
# Wildcard support: Initially validate if we need to honour
|
|
# any wildcards within the vm_ignore list.
|
|
vm_ignore_wildcard = __validate_ignore_vm_wildcard(ignore_vms)
|
|
|
|
for node in api_object.nodes.get():
|
|
for vm in api_object.nodes(node['node']).qemu.get():
|
|
|
|
# Get the VM tags from API.
|
|
vm_tags = __get_vm_tags(api_object, node, vm['vmid'])
|
|
if vm_tags is not None:
|
|
group_include, group_exclude, vm_ignore = __get_proxlb_groups(vm_tags)
|
|
|
|
# Get wildcard match for VMs to ignore if a wildcard pattern was
|
|
# previously found. Wildcards may slow down the task when using
|
|
# many patterns in the ignore list. Therefore, run this only if
|
|
# a wildcard pattern was found. We also do not need to validate
|
|
# this if the VM is already being ignored by a defined tag.
|
|
if vm_ignore_wildcard and not vm_ignore:
|
|
vm_ignore = __check_vm_name_wildcard_pattern(vm['name'], ignore_vms_list)
|
|
|
|
if vm['status'] == 'running' and vm['name'] not in ignore_vms_list and not vm_ignore:
|
|
vm_statistics[vm['name']] = {}
|
|
vm_statistics[vm['name']]['group_include'] = group_include
|
|
vm_statistics[vm['name']]['group_exclude'] = group_exclude
|
|
vm_statistics[vm['name']]['cpu_total'] = vm['cpus']
|
|
vm_statistics[vm['name']]['cpu_used'] = vm['cpu']
|
|
vm_statistics[vm['name']]['memory_total'] = vm['maxmem']
|
|
vm_statistics[vm['name']]['memory_used'] = vm['mem']
|
|
vm_statistics[vm['name']]['disk_total'] = vm['maxdisk']
|
|
vm_statistics[vm['name']]['disk_used'] = vm['disk']
|
|
vm_statistics[vm['name']]['vmid'] = vm['vmid']
|
|
vm_statistics[vm['name']]['node_parent'] = node['node']
|
|
# Rebalancing node will be overwritten after calculations.
|
|
# If the vm stays on the node, it will be removed at a
|
|
# later time.
|
|
vm_statistics[vm['name']]['node_rebalance'] = node['node']
|
|
logging.info(f'{info_prefix} Added vm {vm["name"]}.')
|
|
|
|
logging.info(f'{info_prefix} Created VM statistics.')
|
|
return vm_statistics
|
|
|
|
|
|
def __validate_ignore_vm_wildcard(ignore_vms):
|
|
""" Validate if a wildcard is used for ignored VMs. """
|
|
if '*' in ignore_vms:
|
|
return True
|
|
|
|
|
|
def __check_vm_name_wildcard_pattern(vm_name, ignore_vms_list):
|
|
""" Validate if the VM name is in the ignore list pattern included. """
|
|
for ignore_vm in ignore_vms_list:
|
|
if '*' in ignore_vm:
|
|
if ignore_vm[:-1] in vm_name:
|
|
return True
|
|
|
|
|
|
def __get_vm_tags(api_object, node, vmid):
|
|
""" Get a comment for a VM from a given VMID. """
|
|
info_prefix = 'Info: [api-get-vm-tags]:'
|
|
|
|
vm_config = api_object.nodes(node['node']).qemu(vmid).config.get()
|
|
logging.info(f'{info_prefix} Got VM comment from API.')
|
|
return vm_config.get('tags', None)
|
|
|
|
|
|
def __get_proxlb_groups(vm_tags):
|
|
""" Get ProxLB related include and exclude groups. """
|
|
info_prefix = 'Info: [api-get-vm-include-exclude-tags]:'
|
|
group_include = None
|
|
group_exclude = None
|
|
vm_ignore = None
|
|
|
|
group_list = re.split(";", vm_tags)
|
|
for group in group_list:
|
|
|
|
if group.startswith('plb_include_'):
|
|
logging.info(f'{info_prefix} Got PLB include group.')
|
|
group_include = group
|
|
|
|
if group.startswith('plb_exclude_'):
|
|
logging.info(f'{info_prefix} Got PLB include group.')
|
|
group_exclude = group
|
|
|
|
if group.startswith('plb_ignore_vm'):
|
|
logging.info(f'{info_prefix} Got PLB ignore group.')
|
|
vm_ignore = True
|
|
|
|
return group_include, group_exclude, vm_ignore
|
|
|
|
|
|
def balancing_calculations(balancing_method, node_statistics, vm_statistics, balanciness):
|
|
""" Calculate re-balancing of VMs on present nodes across the cluster. """
|
|
info_prefix = 'Info: [rebalancing-calculator]:'
|
|
balanciness = int(balanciness)
|
|
rebalance = False
|
|
processed_vms = []
|
|
rebalance = True
|
|
emergency_counter = 0
|
|
|
|
# Validate for a supported balancing method.
|
|
__validate_balancing_method(balancing_method)
|
|
|
|
# Rebalance VMs with the highest resource usage to a new
|
|
# node until reaching the desired balanciness.
|
|
while rebalance and emergency_counter < 10000:
|
|
emergency_counter = emergency_counter + 1
|
|
rebalance = __validate_balanciness(balanciness, balancing_method, node_statistics)
|
|
|
|
if rebalance:
|
|
resource_highest_used_resources_vm, processed_vms = __get_most_used_resources_vm(balancing_method, vm_statistics, processed_vms)
|
|
resource_highest_free_resources_node = __get_most_free_resources_node(balancing_method, node_statistics)
|
|
node_statistics, vm_statistics = __update_resource_statistics(resource_highest_used_resources_vm, resource_highest_free_resources_node,
|
|
vm_statistics, node_statistics, balancing_method)
|
|
|
|
# Honour groupings for include and exclude groups for rebalancing VMs.
|
|
node_statistics, vm_statistics = __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method)
|
|
node_statistics, vm_statistics = __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method)
|
|
|
|
# Remove VMs that are not being relocated.
|
|
vms_to_remove = [vm_name for vm_name, vm_info in vm_statistics.items() if 'node_rebalance' in vm_info and vm_info['node_rebalance'] == vm_info.get('node_parent')]
|
|
for vm_name in vms_to_remove:
|
|
del vm_statistics[vm_name]
|
|
|
|
logging.info(f'{info_prefix} Balancing calculations done.')
|
|
return node_statistics, vm_statistics
|
|
|
|
|
|
def __validate_balancing_method(balancing_method):
|
|
""" Validate for valid and supported balancing method. """
|
|
error_prefix = 'Error: [balancing-method-validation]:'
|
|
info_prefix = 'Info: [balancing-method-validation]]:'
|
|
|
|
if balancing_method not in ['memory', 'disk', 'cpu']:
|
|
logging.error(f'{error_prefix} Invalid balancing method: {balancing_method}')
|
|
sys.exit(2)
|
|
else:
|
|
logging.info(f'{info_prefix} Valid balancing method: {balancing_method}')
|
|
|
|
|
|
def __validate_balanciness(balanciness, balancing_method, node_statistics):
|
|
""" Validate for balanciness to ensure further rebalancing is needed. """
|
|
info_prefix = 'Info: [balanciness-validation]]:'
|
|
node_memory_free_percent_list = []
|
|
|
|
for node_name, node_info in node_statistics.items():
|
|
node_memory_free_percent_list.append(node_info[f'{balancing_method}_free_percent'])
|
|
|
|
node_memory_free_percent_list_sorted = sorted(node_memory_free_percent_list)
|
|
node_lowest_percent = node_memory_free_percent_list_sorted[0]
|
|
node_highest_percent = node_memory_free_percent_list_sorted[-1]
|
|
|
|
if (node_lowest_percent + balanciness) < node_highest_percent:
|
|
logging.info(f'{info_prefix} Rebalancing is for {balancing_method} is needed.')
|
|
return True
|
|
else:
|
|
logging.info(f'{info_prefix} Rebalancing is for {balancing_method} is not needed.')
|
|
return False
|
|
|
|
|
|
def __get_most_used_resources_vm(balancing_method, vm_statistics, processed_vms):
|
|
""" Get and return the most used resources of a VM by the defined balancing method. """
|
|
if balancing_method == 'memory':
|
|
vm = max(vm_statistics.items(), key=lambda item: item[1]['memory_used'] if item[0] not in processed_vms else -float('inf'))
|
|
processed_vms.append(vm[0])
|
|
return vm, processed_vms
|
|
if balancing_method == 'disk':
|
|
vm = max(vm_statistics.items(), key=lambda item: item[1]['disk_used'] if item[0] not in processed_vms else -float('inf'))
|
|
processed_vms.append(vm[0])
|
|
return vm, processed_vms
|
|
if balancing_method == 'cpu':
|
|
vm = max(vm_statistics.items(), key=lambda item: item[1]['cpu_used'] if item[0] not in processed_vms else -float('inf'))
|
|
processed_vms.append(vm[0])
|
|
return vm, processed_vms
|
|
|
|
|
|
def __get_most_free_resources_node(balancing_method, node_statistics):
|
|
""" Get and return the most free resources of a node by the defined balancing method. """
|
|
if balancing_method == 'memory':
|
|
return max(node_statistics.items(), key=lambda item: item[1]['memory_free'])
|
|
if balancing_method == 'disk':
|
|
return max(node_statistics.items(), key=lambda item: item[1]['disk_free'])
|
|
if balancing_method == 'cpu':
|
|
return max(node_statistics.items(), key=lambda item: item[1]['cpu_free'])
|
|
|
|
|
|
def __update_resource_statistics(resource_highest_used_resources_vm, resource_highest_free_resources_node, vm_statistics, node_statistics, balancing_method):
|
|
""" Update VM and node resource statistics. """
|
|
info_prefix = 'Info: [rebalancing-resource-statistics-update]:'
|
|
|
|
if resource_highest_used_resources_vm[1]['node_parent'] != resource_highest_free_resources_node[0]:
|
|
vm_name = resource_highest_used_resources_vm[0]
|
|
vm_node_parent = resource_highest_used_resources_vm[1]['node_parent']
|
|
vm_node_rebalance = resource_highest_free_resources_node[0]
|
|
vm_resource_used = vm_statistics[resource_highest_used_resources_vm[0]][f'{balancing_method}_used']
|
|
|
|
# Update dictionaries for new values
|
|
# Assign new rebalance node to vm
|
|
vm_statistics[vm_name]['node_rebalance'] = vm_node_rebalance
|
|
|
|
# Recalculate values for nodes
|
|
## Add freed resources to old parent node
|
|
node_statistics[vm_node_parent][f'{balancing_method}_used'] = int(node_statistics[vm_node_parent][f'{balancing_method}_used']) - int(vm_resource_used)
|
|
node_statistics[vm_node_parent][f'{balancing_method}_free'] = int(node_statistics[vm_node_parent][f'{balancing_method}_free']) + int(vm_resource_used)
|
|
node_statistics[vm_node_parent][f'{balancing_method}_free_percent'] = int(int(node_statistics[vm_node_parent][f'{balancing_method}_free']) / int(node_statistics[vm_node_parent][f'{balancing_method}_total']) * 100)
|
|
|
|
## Removed newly allocated resources to new rebalanced node
|
|
node_statistics[vm_node_rebalance][f'{balancing_method}_used'] = int(node_statistics[vm_node_rebalance][f'{balancing_method}_used']) + int(vm_resource_used)
|
|
node_statistics[vm_node_rebalance][f'{balancing_method}_free'] = int(node_statistics[vm_node_rebalance][f'{balancing_method}_free']) - int(vm_resource_used)
|
|
node_statistics[vm_node_rebalance][f'{balancing_method}_free_percent'] = int(int(node_statistics[vm_node_rebalance][f'{balancing_method}_free']) / int(node_statistics[vm_node_rebalance][f'{balancing_method}_total']) * 100)
|
|
|
|
logging.info(f'{info_prefix} Updated VM and node statistics.')
|
|
return node_statistics, vm_statistics
|
|
|
|
|
|
def __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method):
|
|
""" Get VMs tags for include groups. """
|
|
info_prefix = 'Info: [rebalancing-tags-group-include]:'
|
|
tags_include_vms = {}
|
|
processed_vm = []
|
|
|
|
# Create groups of tags with belongings hosts.
|
|
for vm_name, vm_values in vm_statistics.items():
|
|
if vm_values.get('group_include', None):
|
|
if not tags_include_vms.get(vm_values['group_include'], None):
|
|
tags_include_vms[vm_values['group_include']] = [vm_name]
|
|
else:
|
|
tags_include_vms[vm_values['group_include']] = tags_include_vms[vm_values['group_include']] + [vm_name]
|
|
|
|
# Update the VMs to the corresponding node to their group assignments.
|
|
for group, vm_names in tags_include_vms.items():
|
|
# Do not take care of tags that have only a single host included.
|
|
if len(vm_names) < 2:
|
|
logging.info(f'{info_prefix} Only one host in group assignment.')
|
|
return node_statistics, vm_statistics
|
|
|
|
vm_node_rebalance = False
|
|
logging.info(f'{info_prefix} Create include groups of VM hosts.')
|
|
for vm_name in vm_names:
|
|
if vm_name not in processed_vm:
|
|
if not vm_node_rebalance:
|
|
vm_node_rebalance = vm_statistics[vm_name]['node_rebalance']
|
|
else:
|
|
_mocked_vm_object = (vm_name, vm_statistics[vm_name])
|
|
node_statistics, vm_statistics = __update_resource_statistics(_mocked_vm_object, [vm_node_rebalance],
|
|
vm_statistics, node_statistics, balancing_method)
|
|
processed_vm.append(vm_name)
|
|
|
|
return node_statistics, vm_statistics
|
|
|
|
|
|
def __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method):
|
|
""" Get VMs tags for exclude groups. """
|
|
info_prefix = 'Info: [rebalancing-tags-group-exclude]:'
|
|
tags_exclude_vms = {}
|
|
processed_vm = []
|
|
|
|
# Create groups of tags with belongings hosts.
|
|
for vm_name, vm_values in vm_statistics.items():
|
|
if vm_values.get('group_include', None):
|
|
if not tags_exclude_vms.get(vm_values['group_include'], None):
|
|
tags_exclude_vms[vm_values['group_include']] = [vm_name]
|
|
else:
|
|
tags_exclude_vms[vm_values['group_include']] = tags_exclude_vms[vm_values['group_include']] + [vm_name]
|
|
|
|
# Update the VMs to the corresponding node to their group assignments.
|
|
for group, vm_names in tags_exclude_vms.items():
|
|
# Do not take care of tags that have only a single host included.
|
|
if len(vm_names) < 2:
|
|
logging.info(f'{info_prefix} Only one host in group assignment.')
|
|
return node_statistics, vm_statistics
|
|
|
|
vm_node_rebalance = False
|
|
logging.info(f'{info_prefix} Create exclude groups of VM hosts.')
|
|
for vm_name in vm_names:
|
|
if vm_name not in processed_vm:
|
|
if not vm_node_rebalance:
|
|
random_node = vm_statistics[vm_name]['node_parent']
|
|
# Get a random node and make sure that it is not by accident the
|
|
# currently assigned one.
|
|
while random_node == vm_statistics[vm_name]['node_parent']:
|
|
random_node = random.choice(list(node_statistics.keys()))
|
|
else:
|
|
_mocked_vm_object = (vm_name, vm_statistics[vm_name])
|
|
node_statistics, vm_statistics = __update_resource_statistics(_mocked_vm_object, [random_node],
|
|
vm_statistics, node_statistics, balancing_method)
|
|
processed_vm.append(vm_name)
|
|
|
|
return node_statistics, vm_statistics
|
|
|
|
|
|
def run_vm_rebalancing(api_object, vm_statistics_rebalanced, app_args):
|
|
""" Run rebalancing of vms to new nodes in cluster. """
|
|
error_prefix = 'Error: [rebalancing-executor]:'
|
|
info_prefix = 'Info: [rebalancing-executor]:'
|
|
|
|
if not app_args.dry_run:
|
|
logging.info(f'{info_prefix} Starting to rebalance vms to their new nodes.')
|
|
for vm, value in vm_statistics_rebalanced.items():
|
|
|
|
try:
|
|
logging.info(f'{info_prefix} Rebalancing vm {vm} from node {value["node_parent"]} to node {value["node_rebalance"]}.')
|
|
api_object.nodes(value['node_parent']).qemu(value['vmid']).migrate().post(target=value['node_rebalance'],online=1)
|
|
except proxmoxer.core.ResourceException as error_resource:
|
|
logging.critical(f'{error_prefix} {error_resource}')
|
|
if app_args.json:
|
|
logging.info(f'{info_prefix} Printing json output of VM statistics.')
|
|
json.dumps(vm_statistics_rebalanced)
|
|
else:
|
|
logging.info(f'{info_prefix} Starting dry-run to rebalance vms to their new nodes.')
|
|
_vm_to_node_list = []
|
|
_vm_to_node_list.append(['VM', 'Current Node', 'Rebalanced Node'])
|
|
|
|
for vm_name, vm_values in vm_statistics_rebalanced.items():
|
|
_vm_to_node_list.append([vm_name, vm_values['node_parent'], vm_values['node_rebalance']])
|
|
|
|
if app_args.json:
|
|
logging.info(f'{info_prefix} Printing json output of VM statistics.')
|
|
json.dumps(vm_statistics_rebalanced)
|
|
else:
|
|
if len(vm_statistics_rebalanced) > 0:
|
|
logging.info(f'{info_prefix} Printing cli output of VM rebalancing.')
|
|
print_table_cli(_vm_to_node_list)
|
|
else:
|
|
logging.info(f'{info_prefix} No rebalancing needed according to the defined balanciness.')
|
|
print('No rebalancing needed according to the defined balanciness.')
|
|
|
|
|
|
def print_table_cli(table):
|
|
""" Pretty print a given table to the cli. """
|
|
longest_cols = [
|
|
(max([len(str(row[i])) for row in table]) + 3)
|
|
for i in range(len(table[0]))
|
|
]
|
|
|
|
row_format = "".join(["{:>" + str(longest_col) + "}" for longest_col in longest_cols])
|
|
for row in table:
|
|
print(row_format.format(*row))
|
|
|
|
|
|
def main():
|
|
""" Run ProxLB for balancing VM workloads across a Proxmox cluster. """
|
|
# Initialize PAS.
|
|
initialize_logger('CRITICAL', 'SystemdHandler()')
|
|
app_args = initialize_args()
|
|
config_path = initialize_config_path(app_args)
|
|
pre_validations(config_path)
|
|
|
|
# Parse global config
|
|
proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v, balancing_method, \
|
|
balanciness, ignore_nodes, ignore_vms, daemon, schedule = initialize_config_options(config_path)
|
|
|
|
while True:
|
|
# API Authentication.
|
|
api_object = api_connect(proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v)
|
|
|
|
# Get metric & statistics for vms and nodes.
|
|
node_statistics = get_node_statistics(api_object, ignore_nodes)
|
|
vm_statistics = get_vm_statistics(api_object, ignore_vms)
|
|
|
|
# Calculate rebalancing of vms.
|
|
node_statistics_rebalanced, vm_statistics_rebalanced = balancing_calculations(balancing_method, node_statistics, vm_statistics, balanciness)
|
|
|
|
# Rebalance vms to new nodes within the cluster.
|
|
run_vm_rebalancing(api_object, vm_statistics_rebalanced, app_args)
|
|
|
|
# Validate for any errors
|
|
post_validations()
|
|
|
|
# Validate daemon service
|
|
validate_daemon(daemon, schedule)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|