mirror of
https://github.com/gyptazy/ProxLB.git
synced 2026-04-06 04:41:58 +02:00
Compare commits
142 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b8b73e468 | ||
|
|
a75729dd6a | ||
|
|
b8792a87af | ||
|
|
c1261a2d3c | ||
|
|
0035f57738 | ||
|
|
b372d361e7 | ||
|
|
1e096e1aae | ||
|
|
420d669236 | ||
|
|
24aa6aabc6 | ||
|
|
5a9a4af532 | ||
|
|
50f93e5f59 | ||
|
|
33784f60b4 | ||
|
|
9a261aa781 | ||
|
|
366d5bc264 | ||
|
|
96ffa086b1 | ||
|
|
db005c138e | ||
|
|
1168f545e5 | ||
|
|
cc663c0518 | ||
|
|
40de31bc3b | ||
|
|
5884d76ff4 | ||
|
|
7cc59eb6fc | ||
|
|
24b3b35640 | ||
|
|
f2b8829299 | ||
|
|
4b64a041cc | ||
|
|
bd1157127a | ||
|
|
be6e4bbfa0 | ||
|
|
25b631099c | ||
|
|
1d698c5688 | ||
|
|
40f848ad7f | ||
|
|
fd2725c878 | ||
|
|
34b1d72e40 | ||
|
|
ca7db26976 | ||
|
|
94552f9c9e | ||
|
|
32c67b9c96 | ||
|
|
89f337d8c3 | ||
|
|
8a724400b8 | ||
|
|
f96f1d0f64 | ||
|
|
15398712ee | ||
|
|
ddb9963062 | ||
|
|
f18a9f3d4c | ||
|
|
1402ba9732 | ||
|
|
af51f53221 | ||
|
|
bce2d640ef | ||
|
|
1bb1847e45 | ||
|
|
e9543db138 | ||
|
|
a8e8229787 | ||
|
|
d1c91c6f2a | ||
|
|
843691f8b4 | ||
|
|
c9f14946d1 | ||
|
|
77cd7b5388 | ||
|
|
55502f9bed | ||
|
|
f08b823cc4 | ||
|
|
f831d4044f | ||
|
|
e8d8d160a7 | ||
|
|
dbbd4c0ec8 | ||
|
|
fc9a0e2858 | ||
|
|
17eb43db94 | ||
|
|
06610e9b9d | ||
|
|
889b88fd6c | ||
|
|
c5ca3e13e0 | ||
|
|
c1c524f092 | ||
|
|
7ea7defa1f | ||
|
|
6147c0085b | ||
|
|
0b70a9c767 | ||
|
|
d6d22c4096 | ||
|
|
6da54c1255 | ||
|
|
b55b4ea7a0 | ||
|
|
51625fe09e | ||
|
|
f3b9d33c87 | ||
|
|
8e4326f77a | ||
|
|
3d642a7404 | ||
|
|
552364471d | ||
|
|
cf15866270 | ||
|
|
7d4def14b1 | ||
|
|
20ad9389d4 | ||
|
|
d73073a187 | ||
|
|
b307d556e5 | ||
|
|
17c4dc445e | ||
|
|
03ea29ae81 | ||
|
|
e22a27652c | ||
|
|
c3ae3e1f8c | ||
|
|
094a9b2ebb | ||
|
|
d8b1c74155 | ||
|
|
c8fad9605c | ||
|
|
e8d0c13f16 | ||
|
|
f781e74d3a | ||
|
|
3cbdb12741 | ||
|
|
a714ea8d64 | ||
|
|
d81d4380de | ||
|
|
31498da25a | ||
|
|
7f59f69eab | ||
|
|
200b7cd170 | ||
|
|
94df2fd1a6 | ||
|
|
8d8fd518fe | ||
|
|
37bb226cf0 | ||
|
|
48b8a07135 | ||
|
|
222beb360c | ||
|
|
f9b30d0af4 | ||
|
|
28f87e2907 | ||
|
|
7587e1beaf | ||
|
|
5542b9bc6c | ||
|
|
16c5ee4d74 | ||
|
|
21a73b71df | ||
|
|
d3c055cbad | ||
|
|
24b7d2c860 | ||
|
|
6e87e2d478 | ||
|
|
2593b87d3f | ||
|
|
6310262e97 | ||
|
|
38712e90a3 | ||
|
|
c2b2f62462 | ||
|
|
adde04639e | ||
|
|
a4b1f4af24 | ||
|
|
55c714a888 | ||
|
|
3cd631db20 | ||
|
|
d44da076cc | ||
|
|
95e8fc5737 | ||
|
|
50a9e91633 | ||
|
|
cca4c454dd | ||
|
|
17c9c98bbc | ||
|
|
486acad44f | ||
|
|
f73261e68c | ||
|
|
464644def8 | ||
|
|
93b7894a6f | ||
|
|
d53a6f695f | ||
|
|
029ec31ad9 | ||
|
|
045159eb8d | ||
|
|
3415e0ccec | ||
|
|
ab44d97c7c | ||
|
|
139bcf04f1 | ||
|
|
1420183be7 | ||
|
|
31572830e7 | ||
|
|
5c96fc49eb | ||
|
|
7ddb7ca205 | ||
|
|
8cc2d7188a | ||
|
|
4620bde999 | ||
|
|
45b35d88c4 | ||
|
|
200244bce1 | ||
|
|
fe715f203e | ||
|
|
959c3b5f8d | ||
|
|
ef8b97efc2 | ||
|
|
e4d40b460b | ||
|
|
39142780d5 |
2
.changelogs/1.0.3/51_add_storage_balancing.yml
Normal file
2
.changelogs/1.0.3/51_add_storage_balancing.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add storage balancing function. [#51]
|
||||
6
.changelogs/1.0.3/53_code_improvements.yml
Normal file
6
.changelogs/1.0.3/53_code_improvements.yml
Normal file
@@ -0,0 +1,6 @@
|
||||
added:
|
||||
- Add a convert function to cast all bool alike options from configparser to bools. [#53]
|
||||
- Add a config parser options for future features. [#53]
|
||||
- Add a config versio schema that must be supported by ProxLB. [#53]
|
||||
changed:
|
||||
- Improve the underlying code base for future implementations. [#53]
|
||||
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add feature to allow the API hosts being provided as a comma separated list. [#60]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Improved the overall validation and error handling. [#64]
|
||||
2
.changelogs/1.0.3/67_fix_anti_affinity_rules.yml
Normal file
2
.changelogs/1.0.3/67_fix_anti_affinity_rules.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix anti-affinity rules not evaluating a new and different node. [#67]
|
||||
2
.changelogs/1.0.3/68_adjust_logging_ha_services.yml
Normal file
2
.changelogs/1.0.3/68_adjust_logging_ha_services.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
changed:
|
||||
- Provide a more reasonable output when HA services are not active in a Proxmox cluster. [#68]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix handling of unset `ignore_nodes` and `ignore_vms` resulted in an attribute error. [#71]
|
||||
2
.changelogs/1.0.3/74_fix_documentation_master_only.yml
Normal file
2
.changelogs/1.0.3/74_fix_documentation_master_only.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix documentation for the master_only parameter placed in the wrong config section. [#74]
|
||||
2
.changelogs/1.0.3/8_add_best_next_node_for_placement.yml
Normal file
2
.changelogs/1.0.3/8_add_best_next_node_for_placement.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add cli arg `-b` to return the next best node for next VM/CT placement. [#8]
|
||||
2
.changelogs/1.0.3/bug_fix_cluster_master_only.yml
Normal file
2
.changelogs/1.0.3/bug_fix_cluster_master_only.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fixed `master_only` function by inverting the condition.
|
||||
@@ -0,0 +1,4 @@
|
||||
fixed:
|
||||
- Fix bug in the `proxlb.conf` in the vm_balancing section.
|
||||
added:
|
||||
- Add doc how to add dedicated user for authentication. (by @Dulux-Oz)
|
||||
1
.changelogs/1.0.3/release_meta.yml
Normal file
1
.changelogs/1.0.3/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2024-09-12
|
||||
2
.changelogs/1.0.4/58_add_maintenance_mode.yml
Normal file
2
.changelogs/1.0.4/58_add_maintenance_mode.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add maintenance mode to evacuate a node and move workloads for other nodes in the cluster. [#58]
|
||||
2
.changelogs/1.0.4/75_fix_cpu_balancing.yml
Normal file
2
.changelogs/1.0.4/75_fix_cpu_balancing.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix CPU balancing where calculations are done in float instead of int. (by @glitchvern) [#75]
|
||||
3
.changelogs/1.0.4/79_storage_balancing_whitelist.yml
Normal file
3
.changelogs/1.0.4/79_storage_balancing_whitelist.yml
Normal file
@@ -0,0 +1,3 @@
|
||||
changed:
|
||||
- Run storage balancing only on supported shared storages. [#79]
|
||||
- Run storage balancing only when needed to save time. [#79]
|
||||
2
.changelogs/1.0.4/81_documentation_infrastrucutre.yml
Normal file
2
.changelogs/1.0.4/81_documentation_infrastrucutre.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix documentation for the underlying infrastructure. [#81]
|
||||
2
.changelogs/1.0.4/89_add_version_output.yml
Normal file
2
.changelogs/1.0.4/89_add_version_output.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add version output cli arg. [#89]
|
||||
2
.changelogs/1.0.4/91_make_api_timeout_configureable.yml
Normal file
2
.changelogs/1.0.4/91_make_api_timeout_configureable.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add feature to make API timeout configureable. [#91]
|
||||
1
.changelogs/1.0.4/release_meta.yml
Normal file
1
.changelogs/1.0.4/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2024-10-11
|
||||
2
.changelogs/1.0.5/104_docs_bool_adjustment.yml
Normal file
2
.changelogs/1.0.5/104_docs_bool_adjustment.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
changed:
|
||||
- Change docs to make bool usage in configs more clear (by @gyptazy). [#104]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix evaluation of maintenance mode where comparing list & string resulted in a crash (by @glitchvern). [#106]
|
||||
2
.changelogs/1.0.5/107_fix_offline_node_eval.yml
Normal file
2
.changelogs/1.0.5/107_fix_offline_node_eval.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix node (and its objects) evaluation when not reachable, e.g., maintenance (by @gyptazy). [#107]
|
||||
2
.changelogs/1.0.5/113_fix_migration_from_local_disk.yml
Normal file
2
.changelogs/1.0.5/113_fix_migration_from_local_disk.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix migration from local disks (by @greenlogles). [#113]
|
||||
2
.changelogs/1.0.5/98_fix_log_verbosity_opts.yml
Normal file
2
.changelogs/1.0.5/98_fix_log_verbosity_opts.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix allowed values (add DEBUG, WARNING) for log verbosity (by @gyptazy). [#98]
|
||||
1
.changelogs/1.0.5/release_meta.yml
Normal file
1
.changelogs/1.0.5/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2024-10-30
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix that a scheduler time definition of 1 (int) gets wrongly interpreted as a bool (by @gyptazy). [#115]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix maintenance mode when using cli arg and config mode by using the merged list (by @CartCaved). [#119]
|
||||
1
.changelogs/1.0.6/release_meta.yml
Normal file
1
.changelogs/1.0.6/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2024-12-24
|
||||
11
.changelogs/1.1.0/114_refactor_code_base.yml
Normal file
11
.changelogs/1.1.0/114_refactor_code_base.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
fixed:
|
||||
- Refactored code base for ProxLB [#114]
|
||||
- Switched to `pycodestyle` for linting [#114]
|
||||
- Package building will be done within GitHub actions pipeline [#114]
|
||||
- ProxLB now only returns a warning when no guests for further balancing are not present (instead of quitting) [132#]
|
||||
- All nodes (according to the free resources) will be used now [#130]
|
||||
- Fixed logging outputs where highest/lowest were mixed-up [#129]
|
||||
- Stop balancing when movement would get worste (new force param to enfoce for affinity rules) [#128]
|
||||
- Added requested documentation regarding Proxmox HA groups [#127]
|
||||
- Rewrite of the whole affinity/anti-affinity rules evaluation and placement [#123]
|
||||
- Fixed the `ignore` parameter for nodes where the node and guests on the node will be untouched [#102]
|
||||
@@ -0,0 +1,2 @@
|
||||
feature:
|
||||
- Add Proxmox API authentication support. [#125]
|
||||
2
.changelogs/1.1.0/137_fix_systemd_unit_file.yml
Normal file
2
.changelogs/1.1.0/137_fix_systemd_unit_file.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix the systemd unit file to start ProxLB after pveproxy (by @robertdahlem). [#137]
|
||||
1
.changelogs/1.1.0/release_meta.yml
Normal file
1
.changelogs/1.1.0/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2025-04-01
|
||||
2
.changelogs/1.1.1/163_fix_ignore_vm_tag.yml
Normal file
2
.changelogs/1.1.1/163_fix_ignore_vm_tag.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix tag evluation for VMs for being ignored for further balancing [#163]
|
||||
2
.changelogs/1.1.1/165_improve_logging_servity.yml
Normal file
2
.changelogs/1.1.1/165_improve_logging_servity.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Improve logging verbosity of messages that had a wrong servity [#165]
|
||||
2
.changelogs/1.1.1/168_add_more_flexible_schedules.yml
Normal file
2
.changelogs/1.1.1/168_add_more_flexible_schedules.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
feature:
|
||||
- Add a more flexible way to define schedules in minutes or hours (by @gyptazy) [#168]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix Python path for Docker entrypoint (by @crandler) [#170]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Honor the value when balancing should not be performed and stop balancing [#174]
|
||||
@@ -0,0 +1,2 @@
|
||||
changed:
|
||||
- Change the default behaviour of the daemon mode to active [#176]
|
||||
@@ -0,0 +1,2 @@
|
||||
changed:
|
||||
- Change the default banalcing mode to used instead of assigned [#180]
|
||||
@@ -0,0 +1,2 @@
|
||||
feature:
|
||||
- Add validation for the minimum required permissions of a user in Proxmox [#184]
|
||||
@@ -0,0 +1,2 @@
|
||||
fix:
|
||||
- add handler to log messages with severity less than info to the screen when there is no systemd integration, for instance, inside a docker container (by @glitchvern) [#185]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- allow the use of minutes instead of hours and only accept hours or minutes in the format (by @glitchvern) [#187]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Set cpu_used to the cpu usage, which is a percent, times the total number of cores to get a number where guest cpu_used can be added to nodes cpu_used and be meaningful (by @glitchvern) [#195]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Remove hard coded memory usage from lowest usage node and use method and mode specified in configuration instead (by @glitchvern) [#197]
|
||||
2
.changelogs/1.1.1/200_requery_zero_guest_cpu_used.yml
Normal file
2
.changelogs/1.1.1/200_requery_zero_guest_cpu_used.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Requery a guest if that running guest reports 0 cpu usage (by @glitchvern) [#200]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix the guest type relationship in the logs when a migration job failed (by @gyptazy) [#204]
|
||||
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Providing the API upstream error message when migration fails in debug mode (by @gyptazy) [#205]
|
||||
1
.changelogs/1.1.1/release_meta.yml
Normal file
1
.changelogs/1.1.1/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2025-04-20
|
||||
2
.changelogs/1.1.2/137_fix_systemd_unit_file.yml
Normal file
2
.changelogs/1.1.2/137_fix_systemd_unit_file.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix systemd unit file to run after network on non PVE nodes (by @robertdahlem) [#137]
|
||||
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add a configurable retry mechanism when connecting to the Proxmox API (by @gyptazy) [#157]
|
||||
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add 1-to-1 relationships between guest and hypervisor node to ping a guest on a node (by @gyptazy) [#218]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Force type cast cpu count of guests to int for some corner cases where a str got returned (by @gyptazy). [#222]
|
||||
1
.changelogs/1.1.2/release_meta.yml
Normal file
1
.changelogs/1.1.2/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2025-05-13
|
||||
3
.flake8
3
.flake8
@@ -1,3 +0,0 @@
|
||||
[flake8]
|
||||
per-file-ignores =
|
||||
proxlb: E501,E221,E266,E231,E127,E222,E128
|
||||
27
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
27
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a bug report
|
||||
title: "`Bug`:"
|
||||
labels: bug, needs-analysis
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
## General
|
||||
<-- Describe the bug from a high level perspective. -->
|
||||
|
||||
## Weighting
|
||||
Score: <-- Define a scoring from 0-10 (10 highest, most urgent) -->
|
||||
|
||||
## Config
|
||||
<-- Attach the ProxLB configuration for further analysis. Please take car to NOT publish your API credentials! -->
|
||||
|
||||
## Log
|
||||
<-- Attach the ProxLB debug log for further analysis. Please take car to NOT publish your API credentials! -->
|
||||
|
||||
## Meta
|
||||
Please provide some more information about your setup. This includes where you obtained ProxLB (e.g., as a `.deb` file, from the repository or container image) and also which version you're running in which mode. You can obtain the used version from you image version, your local repository information or by running `proxlb -v`.
|
||||
|
||||
Version: <-- DEFINE_VERSION -->
|
||||
Installed from: <-- DEFINE_INSTALL_SOURCE -->
|
||||
Running as: <-- Container, local on Proxmox, local on all Proxmox, dedicated -->
|
||||
14
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
14
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Create a new request for a missing feature
|
||||
title: "`Feature`: "
|
||||
labels: feature, needs-analysis
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
## General
|
||||
<-- Describe the feature idea from a high level perspective. -->
|
||||
|
||||
## Details
|
||||
<-- Provide some more details about the new feature request and provide examples. -->
|
||||
26
.github/workflows/02-create-package.yml
vendored
26
.github/workflows/02-create-package.yml
vendored
@@ -1,26 +0,0 @@
|
||||
name: Run basic pipeline on push
|
||||
on: [push]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python for ProxLB
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies for ProxLB
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest proxmoxer flake8
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Run Python linting
|
||||
run: |
|
||||
python3 -m flake8 proxlb
|
||||
- name: Create distro packages
|
||||
run: |
|
||||
cd packaging
|
||||
./01_package.sh
|
||||
21
.github/workflows/10-code-liniting.yml
vendored
Normal file
21
.github/workflows/10-code-liniting.yml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
name: Code linting
|
||||
on: [push]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Setup dependencies for code linting
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install additional dependencies for code linting
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install python3-pycodestyle pycodestyle
|
||||
- name: Run code linting on ProxLB Python code
|
||||
run: |
|
||||
pycodestyle proxlb/*
|
||||
78
.github/workflows/20-pipeline-build-deb-package.yml
vendored
Normal file
78
.github/workflows/20-pipeline-build-deb-package.yml
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
name: "Build package: .deb"
|
||||
on: [push]
|
||||
jobs:
|
||||
lint-code-proxlb:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Setup dependencies for code linting
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install additional dependencies for code linting
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install python3-pycodestyle pycodestyle
|
||||
- name: Run code linting on ProxLB Python code
|
||||
run: |
|
||||
pycodestyle proxlb/* && \
|
||||
echo "OK: Code linting successfully performed on ProxLB code."
|
||||
|
||||
build-package-debian:
|
||||
needs: lint-code-proxlb
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.ref }}
|
||||
|
||||
- name: Set up Docker with Debian image
|
||||
run: |
|
||||
docker pull debian:latest
|
||||
|
||||
- name: Build DEB package in Docker container
|
||||
run: |
|
||||
docker run --rm -v $(pwd):/workspace -w /workspace debian:latest bash -c "
|
||||
# Install dependencies
|
||||
apt-get update && \
|
||||
apt-get install -y python3 python3-setuptools debhelper dh-python python3-pip python3-stdeb python3-proxmoxer python3-requests python3-urllib3 devscripts python3-all && \
|
||||
# Build package using stdeb / setuptools
|
||||
# python3 setup.py --command-packages=stdeb.command bdist_deb && \
|
||||
# Build native package
|
||||
dpkg-buildpackage -us -uc && \
|
||||
mkdir package && \
|
||||
mv ../*.deb package/ && \
|
||||
echo 'OK: Debian package successfully created.'
|
||||
"
|
||||
|
||||
- name: Upload Debian package python3-proxlb as artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: debian-package
|
||||
path: package/*.deb
|
||||
|
||||
integration-test-debian:
|
||||
needs: build-package-debian
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download Debian package artifact
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: debian-package
|
||||
path: package/
|
||||
|
||||
- name: Set up Docker with Debian image
|
||||
run: docker pull debian:latest
|
||||
|
||||
- name: Install and test Debian package in Docker container
|
||||
run: |
|
||||
docker run --rm -v $(pwd)/package:/package -w /package debian:latest bash -c "
|
||||
apt-get update && \
|
||||
apt-get install -y systemd && \
|
||||
apt-get install -y ./proxlb*.deb && \
|
||||
python3 -c 'import proxlb; print(\"OK: Debian package successfully installed.\")'
|
||||
"
|
||||
96
.github/workflows/20-pipeline-build-rpm-package.yml
vendored
Normal file
96
.github/workflows/20-pipeline-build-rpm-package.yml
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
name: "Build package: .rpm"
|
||||
on: [push]
|
||||
jobs:
|
||||
lint-code-proxlb:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Setup dependencies for code linting
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install additional dependencies for code linting
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install python3-pycodestyle pycodestyle
|
||||
- name: Run code linting on ProxLB Python code
|
||||
run: |
|
||||
pycodestyle proxlb/* && \
|
||||
echo "OK: Code linting successfully performed on ProxLB code."
|
||||
|
||||
build-package-rpm:
|
||||
needs: lint-code-proxlb
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: 'development'
|
||||
|
||||
- name: Set up Docker with Debian image
|
||||
run: |
|
||||
docker pull debian:latest
|
||||
|
||||
- name: Build DEB package in Docker container
|
||||
run: |
|
||||
docker run --rm -v $(pwd):/workspace -w /workspace debian:latest bash -c "
|
||||
# Install dependencies
|
||||
apt-get update && \
|
||||
apt-get install -y python3 python3-setuptools rpm debhelper dh-python python3-pip python3-stdeb python3-proxmoxer python3-requests python3-urllib3 && \
|
||||
# Build package
|
||||
python3 setup.py --command-packages=stdeb.command bdist_rpm && \
|
||||
echo 'OK: RPM package successfully created.'
|
||||
"
|
||||
|
||||
- name: Upload RPM package python3-proxlb as artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: rpm-package
|
||||
path: dist/*.rpm
|
||||
|
||||
# integration-test-rpm-rockylinux-9:
|
||||
# needs: build-package-rpm
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Download RPM package artifact
|
||||
# uses: actions/download-artifact@v4
|
||||
# with:
|
||||
# name: rpm-package
|
||||
# path: dist/
|
||||
|
||||
# - name: Set up Docker with RockyLinux 9 image
|
||||
# run: docker pull rockylinux:9
|
||||
|
||||
# - name: Install and test RPM package in Rocky Linux Docker container
|
||||
# run: |
|
||||
# docker run --rm -v $(pwd)/dist:/dist -w /dist rockylinux:9 bash -c "
|
||||
# # DNF does not handle wildcards well
|
||||
# rpm_file=\$(ls proxlb*.noarch.rpm) && \
|
||||
# dnf install -y \$rpm_file && \
|
||||
# python3 -c 'import proxlb; print(\"OK: RPM package successfully installed.\")'
|
||||
# "
|
||||
|
||||
# integration-test-rpm-rockylinux-8:
|
||||
# needs: build-package-rpm
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Download RPM package artifact
|
||||
# uses: actions/download-artifact@v4
|
||||
# with:
|
||||
# name: rpm-package
|
||||
# path: dist/
|
||||
|
||||
# - name: Set up Docker with RockyLinux 8 image
|
||||
# run: docker pull rockylinux:8
|
||||
|
||||
# - name: Install and test RPM package in Rocky Linux Docker container
|
||||
# run: |
|
||||
# docker run --rm -v $(pwd)/dist:/dist -w /dist rockylinux:8 bash -c "
|
||||
# # DNF does not handle wildcards well
|
||||
# rpm_file=\$(ls proxlb*.noarch.rpm) && \
|
||||
# dnf install -y \$rpm_file && \
|
||||
# python3 -c 'import proxlb; print(\"OK: RPM package successfully installed.\")'
|
||||
# "
|
||||
9
.gitignore
vendored
9
.gitignore
vendored
@@ -1,2 +1,7 @@
|
||||
packaging/changelog-fragments-creator/
|
||||
dev/
|
||||
__pycache__
|
||||
*.pyc
|
||||
.DS_Store
|
||||
build/
|
||||
dist/
|
||||
*.egg-info/
|
||||
proxlb_dev.yaml
|
||||
|
||||
138
CHANGELOG.md
138
CHANGELOG.md
@@ -5,35 +5,157 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [1.1.2] - 2025-05-13
|
||||
|
||||
### Added
|
||||
|
||||
- Add a configurable retry mechanism when connecting to the Proxmox API (by @gyptazy) [#157]
|
||||
- Add 1-to-1 relationships between guest and hypervisor node to ping a guest on a node (by @gyptazy) [#218]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Force type cast cpu count of guests to int for some corner cases where a str got returned (by @gyptazy). [#222]
|
||||
- Fix systemd unit file to run after network on non PVE nodes (by @robertdahlem) [#137]
|
||||
|
||||
|
||||
## [1.1.1] - 2025-04-20
|
||||
|
||||
### Added
|
||||
|
||||
- Providing the API upstream error message when migration fails in debug mode (by @gyptazy) [#205]
|
||||
|
||||
### Changed
|
||||
|
||||
- Change the default behaviour of the daemon mode to active [#176]
|
||||
- Change the default banalcing mode to used instead of assigned [#180]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Set cpu_used to the cpu usage, which is a percent, times the total number of cores to get a number where guest cpu_used can be added to nodes cpu_used and be meaningful (by @glitchvern) [#195]
|
||||
- Fix tag evluation for VMs for being ignored for further balancing [#163]
|
||||
- Honor the value when balancing should not be performed and stop balancing [#174]
|
||||
- allow the use of minutes instead of hours and only accept hours or minutes in the format (by @glitchvern) [#187]
|
||||
- Remove hard coded memory usage from lowest usage node and use method and mode specified in configuration instead (by @glitchvern) [#197]
|
||||
- Fix the guest type relationship in the logs when a migration job failed (by @gyptazy) [#204]
|
||||
- Requery a guest if that running guest reports 0 cpu usage (by @glitchvern) [#200]
|
||||
- Fix Python path for Docker entrypoint (by @crandler) [#170]
|
||||
- Improve logging verbosity of messages that had a wrong servity [#165]
|
||||
|
||||
|
||||
## [1.1.0] - 2025-04-01
|
||||
|
||||
### Fixed
|
||||
|
||||
- Refactored code base for ProxLB [#114]
|
||||
- Switched to `pycodestyle` for linting [#114]
|
||||
- Package building will be done within GitHub actions pipeline [#114]
|
||||
- ProxLB now only returns a warning when no guests for further balancing are not present (instead of quitting) [132#]
|
||||
- All nodes (according to the free resources) will be used now [#130]
|
||||
- Fixed logging outputs where highest/lowest were mixed-up [#129]
|
||||
- Stop balancing when movement would get worste (new force param to enfoce for affinity rules) [#128]
|
||||
- Added requested documentation regarding Proxmox HA groups [#127]
|
||||
- Rewrite of the whole affinity/anti-affinity rules evaluation and placement [#123]
|
||||
- Fixed the `ignore` parameter for nodes where the node and guests on the node will be untouched [#102]
|
||||
|
||||
|
||||
## [1.0.6] - 2024-12-24
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix maintenance mode when using cli arg and config mode by using the merged list (by @CartCaved). [#119]
|
||||
- Fix that a scheduler time definition of 1 (int) gets wrongly interpreted as a bool (by @gyptazy). [#115]
|
||||
|
||||
|
||||
## [1.0.5] - 2024-10-30
|
||||
|
||||
### Changed
|
||||
|
||||
- Change docs to make bool usage in configs more clear (by @gyptazy). [#104]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix node (and its objects) evaluation when not reachable, e.g., maintenance (by @gyptazy). [#107]
|
||||
- Fix migration from local disks (by @greenlogles). [#113]
|
||||
- Fix evaluation of maintenance mode where comparing list & string resulted in a crash (by @glitchvern). [#106]
|
||||
- Fix allowed values (add DEBUG, WARNING) for log verbosity (by @gyptazy). [#98]
|
||||
|
||||
|
||||
## [1.0.4] - 2024-10-11
|
||||
|
||||
### Added
|
||||
|
||||
- Add maintenance mode to evacuate a node and move workloads for other nodes in the cluster. [#58]
|
||||
- Add feature to make API timeout configureable. [#91]
|
||||
- Add version output cli arg. [#89]
|
||||
|
||||
### Changed
|
||||
|
||||
- Run storage balancing only on supported shared storages. [#79]
|
||||
- Run storage balancing only when needed to save time. [#79]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix CPU balancing where calculations are done in float instead of int. (by @glitchvern) [#75]
|
||||
- Fix documentation for the underlying infrastructure. [#81]
|
||||
|
||||
|
||||
## [1.0.3] - 2024-09-12
|
||||
|
||||
### Added
|
||||
|
||||
- Add cli arg `-b` to return the next best node for next VM/CT placement. [#8]
|
||||
- Add a convert function to cast all bool alike options from configparser to bools. [#53]
|
||||
- Add a config parser options for future features. [#53]
|
||||
- Add a config versio schema that must be supported by ProxLB. [#53]
|
||||
- Add feature to allow the API hosts being provided as a comma separated list. [#60]
|
||||
- Add doc how to add dedicated user for authentication. (by @Dulux-Oz)
|
||||
- Add storage balancing function. [#51]
|
||||
|
||||
### Changed
|
||||
|
||||
- Provide a more reasonable output when HA services are not active in a Proxmox cluster. [#68]
|
||||
- Improve the underlying code base for future implementations. [#53]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix anti-affinity rules not evaluating a new and different node. [#67]
|
||||
- Fixed `master_only` function by inverting the condition.
|
||||
- Fix documentation for the master_only parameter placed in the wrong config section. [#74]
|
||||
- Fix bug in the `proxlb.conf` in the vm_balancing section.
|
||||
- Fix handling of unset `ignore_nodes` and `ignore_vms` resulted in an attribute error. [#71]
|
||||
- Improved the overall validation and error handling. [#64]
|
||||
|
||||
|
||||
## [1.0.2] - 2024-08-13
|
||||
|
||||
### Added
|
||||
|
||||
- Add option to run migration in parallel or sequentially. [#41]
|
||||
- Add option to run ProxLB only on the Proxmox's master node in the cluster (reg. HA feature). [#40]
|
||||
- Add option to run migrations in parallel or sequentially. [#41]
|
||||
|
||||
### Changed
|
||||
|
||||
- Fix daemon timer to use hours instead of minutes. [#45]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix CMake packaging for Debian package to avoid overwriting the config file. [#49]
|
||||
- Fix wonkey code style.
|
||||
|
||||
|
||||
## [1.0.0] - 2024-08-01
|
||||
|
||||
### Added
|
||||
|
||||
- Add feature to prevent VMs from being relocated by defining a wildcard pattern. [#7]
|
||||
- Add feature to make log verbosity configurable [#17].
|
||||
- Add option_mode to rebalance by node's free resources in percent (instead of bytes). [#29]
|
||||
- Add option to rebalance by assigned VM resources to avoid over provisioning. [#16]
|
||||
- Add Docker/Podman support. [#10 by @daanbosch]
|
||||
- Add exclude grouping feature to rebalance VMs from being located together to new nodes. [#4]
|
||||
- Add feature to prevent VMs from being relocated by defining the 'plb_ignore_vm' tag. [#7]
|
||||
- Add feature to prevent VMs from being relocated by defining a wildcard pattern. [#7]
|
||||
- Add Docker/Podman support. [#10 by @daanbosch]
|
||||
- Add option to rebalance by assigned VM resources to avoid overprovisioning. [#16]
|
||||
- Add feature to make log verbosity configurable [#17].
|
||||
- Add dry-run support to see what kind of rebalancing would be done. [#6]
|
||||
- Add LXC/Container integration. [#27]
|
||||
- Add exclude grouping feature to rebalance VMs from being located together to new nodes. [#4]
|
||||
- Add include grouping feature to rebalance VMs bundled to new nodes. [#3]
|
||||
- Add option_mode to rebalance by node's free resources in percent (instead of bytes). [#29]
|
||||
|
||||
### Changed
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ Before submitting a pull request, ensure that your changes sucessfully perform t
|
||||
|
||||
1. **Install pytest if you haven't already:**
|
||||
```sh
|
||||
pip install fake8
|
||||
pip install flake8
|
||||
```
|
||||
|
||||
2. **Run the lintin:**
|
||||
@@ -116,6 +116,6 @@ By participating in this project, you agree to abide by our [Code of Conduct](CO
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you need help or have any questions, feel free to reach out by creating an issue or by joining our [discussion forum](https://github.com/gyptazy/proxlb/discussions). You can also refer to our [documentation](https://github.com/gyptazy/ProxLB/tree/main/docs) for more information about the project or join our [chat room](https://matrix.to/#/#proxlb:gyptazy.ch) in Matrix.
|
||||
If you need help or have any questions, feel free to reach out by creating an issue or by joining our [discussion forum](https://github.com/gyptazy/proxlb/discussions). You can also refer to our [documentation](https://github.com/gyptazy/ProxLB/tree/main/docs) for more information about the project or join our [chat room](https://matrix.to/#/#proxlb:gyptazy.com) in Matrix.
|
||||
|
||||
Thank you for contributing to ProxLB! Together, we can enhance the efficiency and performance of Proxmox clusters.
|
||||
31
Dockerfile
31
Dockerfile
@@ -1,20 +1,16 @@
|
||||
# Use the official Debian 12 base image
|
||||
FROM debian:12
|
||||
# Use the latest Alpine image
|
||||
FROM alpine:latest
|
||||
|
||||
# Labels
|
||||
LABEL maintainer="gyptazy@gyptazy.ch"
|
||||
LABEL org.label-schema.schema-version="0.9"
|
||||
LABEL org.label-schema.description="ProxLB - Rebalance VM workloads across nodes in a Proxmox cluster."
|
||||
LABEL org.label-schema.url="https://github.com/gyptazy/ProxLB"
|
||||
LABEL maintainer="gyptazy@gyptazy.com"
|
||||
LABEL org.label-schema.name="ProxLB"
|
||||
LABEL org.label-schema.description="ProxLB - An advanced load balancer for Proxmox clusters."
|
||||
LABEL org.label-schema.vendor="gyptazy"
|
||||
LABEL org.label-schema.url="https://proxlb.de"
|
||||
LABEL org.label-schema.vcs-url="https://github.com/gyptazy/ProxLB"
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install python3 and python3-venv
|
||||
RUN apt-get update && \
|
||||
apt-get install -y python3 python3-pip python3-venv && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
# Install Python3
|
||||
RUN apk add --no-cache python3 py3-pip
|
||||
|
||||
# Create a directory for the app
|
||||
WORKDIR /app
|
||||
@@ -22,14 +18,11 @@ WORKDIR /app
|
||||
# Copy the python program from the current directory to /app
|
||||
COPY proxlb /app/proxlb
|
||||
|
||||
# Create a virtual environment
|
||||
RUN python3 -m venv venv
|
||||
|
||||
# Copy requirements to the container
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
|
||||
# Install dependencies in the virtual environment
|
||||
RUN . venv/bin/activate && pip install -r /app/requirements.txt
|
||||
RUN pip install --break-system-packages -r /app/requirements.txt
|
||||
|
||||
# Set the entry point to use the virtual environment's python
|
||||
ENTRYPOINT ["/app/venv/bin/python3", "/app/proxlb"]
|
||||
ENTRYPOINT ["/usr/bin/python3", "/app/proxlb/main.py"]
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -671,4 +671,4 @@ into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
||||
550
README.md
550
README.md
@@ -1,225 +1,199 @@
|
||||
# ProxLB - (Re)Balance VM Workloads in Proxmox Clusters
|
||||
<img align="left" src="https://cdn.gyptazy.ch/images/Prox-LB-logo.jpg"/>
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/Prox-LB-logo.jpg"/>
|
||||
<br>
|
||||
|
||||
<p float="center"><img src="https://img.shields.io/github/license/gyptazy/ProxLB"/><img src="https://img.shields.io/github/contributors/gyptazy/ProxLB"/><img src="https://img.shields.io/github/last-commit/gyptazy/ProxLB/main"/><img src="https://img.shields.io/github/issues-raw/gyptazy/ProxLB"/><img src="https://img.shields.io/github/issues-pr/gyptazy/ProxLB"/></p>
|
||||
|
||||
|
||||
## Table of Contents
|
||||
- [ProxLB - (Re)Balance VM Workloads in Proxmox Clusters](#proxlb---rebalance-vm-workloads-in-proxmox-clusters)
|
||||
- [Table of Contents](#table-of-contents)
|
||||
- [Introduction](#introduction)
|
||||
- [Video of Migration](#video-of-migration)
|
||||
- [Features](#features)
|
||||
- [How does it work?](#how-does-it-work)
|
||||
- [Usage](#usage)
|
||||
- [Dependencies](#dependencies)
|
||||
- [Options](#options)
|
||||
- [Parameters](#parameters)
|
||||
- [Balancing](#balancing)
|
||||
- [General](#general)
|
||||
- [By Used Memory of VMs/CTs](#by-used-memory-of-vmscts)
|
||||
- [By Assigned Memory of VMs/CTs](#by-assigned-memory-of-vmscts)
|
||||
- [Grouping](#grouping)
|
||||
- [Include (Stay Together)](#include-stay-together)
|
||||
- [Exclude (Stay Separate)](#exclude-stay-separate)
|
||||
- [Ignore VMs (Tag Style)](#ignore-vms-tag-style)
|
||||
- [Systemd](#systemd)
|
||||
- [Manual](#manual)
|
||||
- [Proxmox GUI Integration](#proxmox-gui-integration)
|
||||
- [Quick Start](#quick-start)
|
||||
- [Container Quick Start (Docker/Podman)](#container-quick-start-dockerpodman)
|
||||
- [Logging](#logging)
|
||||
- [Motivation](#motivation)
|
||||
- [References](#references)
|
||||
- [Downloads](#downloads)
|
||||
- [Packages](#packages)
|
||||
- [Repository](#repository)
|
||||
- [Container Images (Docker/Podman)](#container-images-dockerpodman)
|
||||
- [Misc](#misc)
|
||||
- [Bugs](#bugs)
|
||||
- [Contributing](#contributing)
|
||||
- [Support](#support)
|
||||
- [Author(s)](#authors)
|
||||
1. [Introduction](#introduction)
|
||||
2. [Features](#features)
|
||||
3. [How does it work?](#how-does-it-work)
|
||||
4. [Installation](#installation)
|
||||
1. [Requirements / Dependencies](#requirements--dependencies)
|
||||
2. [Debian Package](#debian-package)
|
||||
4. [Container / Docker](#container--docker)
|
||||
5. [Source](#source)
|
||||
5. [Usage / Configuration](#usage--configuration)
|
||||
1. [GUI Integration](#gui-integration)
|
||||
2. [Proxmox HA Integration](#proxmox-ha-integration)
|
||||
3. [Options](#options)
|
||||
6. [Affinity & Anti-Affinity Rules](#affinity--anti-affinity-rules)
|
||||
1. [Affinity Rules](#affinity-rules)
|
||||
2. [Anti-Affinity Rules](#anti-affinity-rules)
|
||||
3. [Ignore VMs](#ignore-vms)
|
||||
4. [Pin VMs to Hypervisor Nodes](#pin-vms-to-hypervisor-nodes)
|
||||
7. [Maintenance](#maintenance)
|
||||
8. [Misc](#misc)
|
||||
1. [Bugs](#bugs)
|
||||
2. [Contributing](#contributing)
|
||||
3. [Documentation](#documentation)
|
||||
4. [Support](#support)
|
||||
9. [Author(s)](#authors)
|
||||
|
||||
|
||||
## Introduction
|
||||
`ProxLB` (PLB) is an advanced tool designed to enhance the efficiency and performance of Proxmox clusters by optimizing the distribution of virtual machines (VMs) or Containers (CTs) across the cluster nodes by using the Proxmox API. ProxLB meticulously gathers and analyzes a comprehensive set of resource metrics from both the cluster nodes and the running VMs. These metrics include CPU usage, memory consumption, and disk utilization, specifically focusing on local disk resources.
|
||||
ProxLB is an advanced load balancing solution specifically designed for Proxmox clusters, addressing the absence of a Dynamic Resource Scheduler (DRS) that is familiar to VMware users. As a third-party solution, ProxLB enhances the management and efficiency of Proxmox clusters by intelligently distributing workloads across available nodes. Workloads can be balanced by different times like the guest's memory, CPU or disk usage or their assignment to avoid overprovisioning and ensuring resources.
|
||||
|
||||
PLB collects resource usage data from each node in the Proxmox cluster, including CPU, (local) disk and memory utilization. Additionally, it gathers resource usage statistics from all running VMs, ensuring a granular understanding of the cluster's workload distribution.
|
||||
One of the key advantages of ProxLB is that it is fully open-source and free, making it accessible for anyone to use, modify, and contribute to. This ensures transparency and fosters community-driven improvements. ProxLB supports filtering and ignoring specific nodes and guests through configuration files and API calls, providing administrators with the flexibility to tailor the load balancing behavior to their specific needs.
|
||||
|
||||
Intelligent rebalancing is a key feature of ProxLB where it re-balances VMs based on their memory, disk or CPU usage, ensuring that no node is overburdened while others remain underutilized. The rebalancing capabilities of PLB significantly enhance cluster performance and reliability. By ensuring that resources are evenly distributed, PLB helps prevent any single node from becoming a performance bottleneck, improving the reliability and stability of the cluster. Efficient rebalancing leads to better utilization of available resources, potentially reducing the need for additional hardware investments and lowering operational costs.
|
||||
A standout feature of ProxLB is its maintenance mode. When enabled, all guest workloads are automatically moved to other nodes within the cluster, ensuring that a node can be safely updated, rebooted, or undergo hardware maintenance without disrupting the overall cluster operation. Additionally, ProxLB supports both affinity and anti-affinity rules, allowing operators to group multiple guests to run together on the same node or ensure that certain guests do not run on the same node, depending on the cluster's node count. This feature is crucial for optimizing performance and maintaining high availability.
|
||||
|
||||
Automated rebalancing reduces the need for manual actions, allowing operators to focus on other critical tasks, thereby increasing operational efficiency.
|
||||
ProxLB can also return the best next node for guest placement, which can be integrated into CI/CD pipelines using tools like Ansible or Terraform. This capability streamlines the deployment process and ensures efficient resource utilization. Furthermore, ProxLB leverages the Proxmox API, including the entire ACL (Access Control List) system, for secure and efficient operation. Unlike some solutions, it does not require SSH access, enhancing security and simplifying configuration.
|
||||
|
||||
Overall, ProxLB significantly enhances resource management by intelligently distributing workloads, reducing downtime through its maintenance mode, and providing improved flexibility with affinity and anti-affinity rules. Its seamless integration with CI/CD tools and reliance on the Proxmox API make it a robust and secure solution for optimizing Proxmox cluster performance.
|
||||
|
||||
### Video of Migration
|
||||
<img src="https://cdn.gyptazy.ch/images/proxlb-rebalancing-demo.gif"/>
|
||||
<img src="https://cdn.gyptazy.com/images/proxlb-rebalancing-demo.gif"/>
|
||||
|
||||
## Features
|
||||
* Rebalance the cluster by:
|
||||
ProxLB's key features are by enabling automatic rebalancing of VMs and CTs across a Proxmox cluster based on memory, CPU, and local disk usage while identifying optimal nodes for automation. It supports maintenance mode, affinity rules, and seamless Proxmox API integration with ACL support, offering flexible usage as a one-time operation, a daemon, or through the Proxmox Web GUI.
|
||||
|
||||
**Features**
|
||||
* Rebalance VMs/CTs in the cluster by:
|
||||
* Memory
|
||||
* Disk (only local storage)
|
||||
* CPU
|
||||
* Performing
|
||||
* Periodically
|
||||
* One-shot solution
|
||||
* Types
|
||||
* Rebalance only VMs
|
||||
* Rebalance only CTs
|
||||
* Rebalance all (VMs and CTs)
|
||||
* Filter
|
||||
* Exclude nodes
|
||||
* Exclude virtual machines
|
||||
* Grouping
|
||||
* Include groups (VMs that are rebalanced to nodes together)
|
||||
* Exclude groups (VMs that must run on different nodes)
|
||||
* Ignore groups (VMs that should be untouched)
|
||||
* Dry-run support
|
||||
* Human readable output in CLI
|
||||
* JSON output for further parsing
|
||||
* Migrate VM workloads away (e.g. maintenance preparation)
|
||||
* Get best nodes for further automation
|
||||
* Supported Guest Types
|
||||
* VMs
|
||||
* CTs
|
||||
* Maintenance Mode
|
||||
* Set node(s) into maintenance
|
||||
* Move all workloads to different nodes
|
||||
* Affinity / Anti-Affinity Rules
|
||||
* Fully based on Proxmox API
|
||||
* Fully integrated into the Proxmox ACL
|
||||
* No SSH required
|
||||
* Usage
|
||||
* One-Shot (one-shot)
|
||||
* Periodically (daemon)
|
||||
* Proxmox Web GUI Integration (optional)
|
||||
* One-Time
|
||||
* Daemon
|
||||
* Proxmox Web GUI Integration
|
||||
|
||||
## How does it work?
|
||||
ProxLB is a load-balancing system designed to optimize the distribution of virtual machines (VMs) and containers (CTs) across a cluster. It works by first gathering resource usage metrics from all nodes in the cluster through the Proxmox API. This includes detailed resource metrics for each VM and CT on every node. ProxLB then evaluates the difference between the maximum and minimum resource usage of the nodes, referred to as "Balanciness." If this difference exceeds a predefined threshold (which is configurable), the system initiates the rebalancing process.
|
||||
|
||||
Before starting any migrations, ProxLB validates that rebalancing actions are necessary and beneficial. Depending on the selected balancing mode — such as CPU, memory, or disk — it creates a balancing matrix. This matrix sorts the VMs by their maximum used or assigned resources, identifying the VM with the highest usage. ProxLB then places this VM on the node with the most free resources in the selected balancing type. This process runs recursively until the operator-defined Balanciness is achieved. Balancing can be defined for the used or max. assigned resources of VMs/CTs.
|
||||
|
||||
## Usage
|
||||
Running PLB is easy and it runs almost everywhere since it just depends on `Python3` and the `proxmoxer` library. Therefore, it can directly run on a Proxmox node, dedicated systems like Debian, RedHat, or even FreeBSD, as long as the API is reachable by the client running PLB.
|
||||
## Installation
|
||||
|
||||
### Dependencies
|
||||
* Python3
|
||||
* proxmoxer (Python module)
|
||||
### Requirements / Dependencies
|
||||
* Python3.x
|
||||
* proxmoxer
|
||||
* requests
|
||||
* urllib3
|
||||
* pyyaml
|
||||
|
||||
### Options
|
||||
The following options can be set in the `proxlb.conf` file:
|
||||
|
||||
| Option | Example | Description |
|
||||
|------|:------:|:------:|
|
||||
| api_host | hypervisor01.gyptazy.ch | Host or IP address of the remote Proxmox API. |
|
||||
| api_user | root@pam | Username for the API. |
|
||||
| api_pass | FooBar | Password for the API. |
|
||||
| verify_ssl | 1 | Validate SSL certificates (1) or ignore (0). (default: 1) |
|
||||
| method | memory | Defines the balancing method (default: memory) where you can use `memory`, `disk` or `cpu`. |
|
||||
| mode | used | Rebalance by `used` resources (efficiency) or `assigned` (avoid overprovisioning) resources. (default: used)|
|
||||
| mode_option | byte | Rebalance by node's resources in `bytes` or `percent`. (default: bytes) |
|
||||
| type | vm | Rebalance only `vm` (virtual machines), `ct` (containers) or `all` (virtual machines & containers). (default: vm)|
|
||||
| balanciness | 10 | Value of the percentage of lowest and highest resource consumption on nodes may differ before rebalancing. (default: 10) |
|
||||
| parallel_migrations | 1 | Defines if migrations should be done parallely or sequentially. (default: 1) |
|
||||
| ignore_nodes | dummynode01,dummynode02,test* | Defines a comma separated list of nodes to exclude. |
|
||||
| ignore_vms | testvm01,testvm02 | Defines a comma separated list of VMs to exclude. (`*` as suffix wildcard or tags are also supported) |
|
||||
| master_only | 0 | Defines is this should only be performed (1) on the cluster master node or not (0). (default: 0) |
|
||||
| daemon | 1 | Run as a daemon (1) or one-shot (0). (default: 1) |
|
||||
| schedule | 24 | Hours to rebalance in hours. (default: 24) |
|
||||
| log_verbosity | INFO | Defines the log level (default: CRITICAL) where you can use `INFO`, `WARN` or `CRITICAL` |
|
||||
|
||||
An example of the configuration file looks like:
|
||||
The dependencies can simply be installed with `pip` by running the following command:
|
||||
```
|
||||
[proxmox]
|
||||
api_host: hypervisor01.gyptazy.ch
|
||||
api_user: root@pam
|
||||
api_pass: FooBar
|
||||
verify_ssl: 1
|
||||
[balancing]
|
||||
method: memory
|
||||
mode: used
|
||||
type: vm
|
||||
# Balanciness defines how much difference may be
|
||||
# between the lowest & highest resource consumption
|
||||
# of nodes before rebalancing will be done.
|
||||
# Examples:
|
||||
# Rebalancing: node01: 41% memory consumption :: node02: 52% consumption
|
||||
# No rebalancing: node01: 43% memory consumption :: node02: 50% consumption
|
||||
balanciness: 10
|
||||
# Enable parallel migrations. If set to 0 it will wait for completed migrations
|
||||
# before starting next migration.
|
||||
parallel_migrations: 1
|
||||
ignore_nodes: dummynode01,dummynode02
|
||||
ignore_vms: testvm01,testvm02
|
||||
[service]
|
||||
# The master_only option might be usuful if running ProxLB on all nodes in a cluster
|
||||
# but only a single one should do the balancing. The master node is obtained from the Proxmox
|
||||
# HA status.
|
||||
master_only: 0
|
||||
daemon: 1
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Parameters
|
||||
The following options and parameters are currently supported:
|
||||
*Note: Distribution packages, such like the provided `.deb` package will automatically resolve and install all required dependencies by using already packaged version from the distribution's repository. By using the Docker (container) image or Debian packages, you do not need to take any care of the requirements listed here.*
|
||||
|
||||
| Option | Long Option | Description | Default |
|
||||
|------|:------:|------:|------:|
|
||||
| -c | --config | Path to a config file. | /etc/proxlb/proxlb.conf (default) |
|
||||
| -d | --dry-run | Perform a dry-run without doing any actions. | Unset |
|
||||
| -j | --json | Return a JSON of the VM movement. | Unset |
|
||||
### Debian Package
|
||||
ProxLB is a powerful and flexible load balancer designed to work across various architectures, including `amd64`, `arm64`, `rv64` and many other ones that support Python. It runs independently of the underlying hardware, making it a versatile choice for different environments. This chapter covers the step-by-step process to install ProxLB on Debian-based systems, including Debian clones like Ubuntu.
|
||||
|
||||
### Balancing
|
||||
#### General
|
||||
In general, virtual machines and containers can be rebalanced and moved around nodes in the cluster. Often, this also works without downtime without any further downtimes. However, this does **not** work with containers. LXC based containers will be shutdown, copied and started on the new node. Also to note, live migrations can work fluently without any issues but there are still several things to be considered. This is out of scope for ProxLB and applies in general to Proxmox and your cluster setup. You can find more details about this here: https://pve.proxmox.com/wiki/Migrate_to_Proxmox_VE.
|
||||
#### Quick-Start
|
||||
You can simply use this snippet to install the repository and to install ProxLB on your system.
|
||||
|
||||
#### By Used Memory of VMs/CTs
|
||||
By continuously monitoring the current resource usage of VMs, ProxLB intelligently reallocates workloads to prevent any single node from becoming overloaded. This approach ensures that resources are balanced efficiently, providing consistent and optimal performance across the entire cluster at all times. To activate this balancing mode, simply activate the following option in your ProxLB configuration:
|
||||
```
|
||||
mode: used
|
||||
```bash
|
||||
echo "deb https://repo.gyptazy.com/stable /" > /etc/apt/sources.list.d/proxlb.list
|
||||
wget -O /etc/apt/trusted.gpg.d/proxlb.asc https://repo.gyptazy.com/repository.gpg
|
||||
apt-get update && apt-get -y install proxlb
|
||||
cp /etc/proxlb/proxlb_example.yaml /etc/proxlb/proxlb.yaml
|
||||
# Adjust the config to your needs
|
||||
vi /etc/proxlb/proxlb.yaml
|
||||
systemctl start proxlb
|
||||
```
|
||||
|
||||
Afterwards, restart the service (if running in daemon mode) to activate this rebalancing mode.
|
||||
Afterwards, ProxLB is running in the background and balances your cluster by your defined balancing method (default: memory).
|
||||
|
||||
#### Details
|
||||
ProxLB provides two different repositories:
|
||||
* https://repo.gyptazy.com/stable (only stable release)
|
||||
* https://repo.gyptazy.com/testing (bleeding edge - not recommended)
|
||||
|
||||
The repository is signed and the GPG key can be found at:
|
||||
* https://repo.gyptazy.com/repository.gpg
|
||||
|
||||
You can also simply import it by running:
|
||||
|
||||
#### By Assigned Memory of VMs/CTs
|
||||
By ensuring that resources are always available for each VM, ProxLB prevents over-provisioning and maintains a balanced load across all nodes. This guarantees that users have consistent access to the resources they need. However, if the total assigned resources exceed the combined capacity of the cluster, ProxLB will issue a warning, indicating potential over-provisioning despite its best efforts to balance the load. To activate this balancing mode, simply activate the following option in your ProxLB configuration:
|
||||
```
|
||||
mode: assigned
|
||||
# KeyID: 17169F23F9F71A14AD49EDADDB51D3EB01824F4C
|
||||
# UID: gyptazy Solutions Repository <contact@gyptazy.com>
|
||||
# SHA256: 52c267e6f4ec799d40cdbdb29fa518533ac7942dab557fa4c217a76f90d6b0f3 repository.gpg
|
||||
|
||||
wget -O /etc/apt/trusted.gpg.d/proxlb.asc https://repo.gyptazy.com/repository.gpg
|
||||
```
|
||||
|
||||
Afterwards, restart the service (if running in daemon mode) to activate this rebalancing mode.
|
||||
*Note: The defined repositories `repo.gyptazy.com` and `repo.proxlb.de` are the same!*
|
||||
|
||||
### Grouping
|
||||
#### Include (Stay Together)
|
||||
<img align="left" src="https://cdn.gyptazy.ch/images/plb-rebalancing-include-balance-group.jpg"/> Access the Proxmox Web UI by opening your web browser and navigating to your Proxmox VE web interface, then log in with your credentials. Navigate to the VM you want to tag by selecting it from the left-hand navigation panel. Click on the "Options" tab to view the VM's options, then select "Edit" or "Add" (depending on whether you are editing an existing tag or adding a new one). In the tag field, enter plb_include_ followed by your unique identifier, for example, plb_include_group1. Save the changes to apply the tag to the VM. Repeat these steps for each VM that should be included in the group.
|
||||
#### Debian Packages (.deb files)
|
||||
If you do not want to use the repository you can also find the debian packages as a .deb file on gyptazy's CDN at:
|
||||
* https://cdn.gyptazy.com/files/os/debian/proxlb/
|
||||
|
||||
#### Exclude (Stay Separate)
|
||||
<img align="left" src="https://cdn.gyptazy.ch/images/plb-rebalancing-exclude-balance-group.jpg"/> Access the Proxmox Web UI by opening your web browser and navigating to your Proxmox VE web interface, then log in with your credentials. Navigate to the VM you want to tag by selecting it from the left-hand navigation panel. Click on the "Options" tab to view the VM's options, then select "Edit" or "Add" (depending on whether you are editing an existing tag or adding a new one). In the tag field, enter plb_exclude_ followed by your unique identifier, for example, plb_exclude_critical. Save the changes to apply the tag to the VM. Repeat these steps for each VM that should be excluded from being on the same node.
|
||||
Afterwards, you can simply install the package by running:
|
||||
```bash
|
||||
dpkg -i proxlb_*.deb
|
||||
cp /etc/proxlb/proxlb_example.yaml /etc/proxlb/proxlb.yaml
|
||||
# Adjust the config to your needs
|
||||
vi /etc/proxlb/proxlb.yaml
|
||||
systemctl start proxlb
|
||||
```
|
||||
|
||||
#### Ignore VMs (Tag Style)
|
||||
<img align="left" src="https://cdn.gyptazy.ch/images/plb-rebalancing-ignore-vm.jpg"/> In Proxmox, you can ensure that certain VMs are ignored during the rebalancing process by setting a specific tag within the Proxmox Web UI, rather than solely relying on configurations in the ProxLB config file. This can be achieved by adding the tag 'plb_ignore_vm' to the VM. Once this tag is applied, the VM will be excluded from any further rebalancing operations, simplifying the management process.
|
||||
### Container Images / Docker
|
||||
Using the ProxLB container images is straight forward and only requires you to mount the config file.
|
||||
|
||||
### Systemd
|
||||
When installing a Linux distribution (such as .deb or .rpm) file, this will be shipped with a systemd unit file. The default configuration file will be sourced from `/etc/proxlb/proxlb.conf`.
|
||||
```bash
|
||||
# Pull the image
|
||||
docker pull cr.gyptazy.com/proxlb/proxlb:latest
|
||||
# Download the config
|
||||
wget -O proxlb.yaml https://raw.githubusercontent.com/gyptazy/ProxLB/refs/heads/main/config/proxlb_example.yaml
|
||||
# Adjust the config to your needs
|
||||
vi proxlb.yaml
|
||||
# Start the ProxLB container image with the ProxLB config
|
||||
docker run -it --rm -v $(pwd)/proxlb.yaml:/etc/proxlb/proxlb.yaml proxlb
|
||||
```
|
||||
|
||||
| Unit Name | Options |
|
||||
*Note: ProxLB container images are officially only available at cr.proxlb.de and cr.gyptazy.com.*
|
||||
|
||||
#### Overview of Images
|
||||
| Version | Image |
|
||||
|------|:------:|
|
||||
| proxlb | start, stop, status, restart |
|
||||
| latest | cr.gyptazy.com/proxlb/proxlb:latest |
|
||||
| v1.1.2 | cr.gyptazy.com/proxlb/proxlb:v1.1.2 |
|
||||
| v1.1.1 | cr.gyptazy.com/proxlb/proxlb:v1.1.1 |
|
||||
| v1.1.0 | cr.gyptazy.com/proxlb/proxlb:v1.1.0 |
|
||||
| v1.0.6 | cr.gyptazy.com/proxlb/proxlb:v1.0.6 |
|
||||
| v1.0.5 | cr.gyptazy.com/proxlb/proxlb:v1.0.5 |
|
||||
| v1.0.4 | cr.gyptazy.com/proxlb/proxlb:v1.0.4 |
|
||||
| v1.0.3 | cr.gyptazy.com/proxlb/proxlb:v1.0.3 |
|
||||
| v1.0.2 | cr.gyptazy.com/proxlb/proxlb:v1.0.2 |
|
||||
| v1.0.0 | cr.gyptazy.com/proxlb/proxlb:v1.0.0 |
|
||||
| v0.9.9 | cr.gyptazy.com/proxlb/proxlb:v0.9.9 |
|
||||
|
||||
### Manual
|
||||
A manual installation is possible and also supports BSD based systems. Proxmox Rebalancing Service relies on mainly two important files:
|
||||
* proxlb (Python Executable)
|
||||
* proxlb.conf (Config file)
|
||||
### Source
|
||||
ProxLB can also easily be used from the provided sources - for traditional systems but also as a Docker/Podman container image.
|
||||
|
||||
The executable must be able to read the config file, if no dedicated config file is given by the `-c` argument, PLB tries to read it from `/etc/proxlb/proxlb.conf`.
|
||||
|
||||
### Proxmox GUI Integration
|
||||
<img align="left" src="https://cdn.gyptazy.ch/images/proxlb-GUI-integration.jpg"/> PLB can also be directly be used from the Proxmox Web UI by installing the optional package `pve-proxmoxlb-service-ui` package which has a dependency on the `proxlb` package. For the Web UI integration, it requires to be installed (in addition) on the nodes on the cluster. Afterwards, a new menu item is present in the HA chapter called `Rebalancing`. This chapter provides two possibilities:
|
||||
* Rebalancing VM workloads
|
||||
* Migrate VM workloads away from a defined node (e.g. maintenance preparation)
|
||||
|
||||
### Quick Start
|
||||
The easiest way to get started is by using the ready-to-use packages that I provide on my CDN and to run it on a Linux Debian based system. This can also be one of the Proxmox nodes itself.
|
||||
|
||||
```
|
||||
wget https://cdn.gyptazy.ch/files/amd64/debian/proxlb/proxlb_1.0.2_amd64.deb
|
||||
dpkg -i proxlb_1.0.2_amd64.deb
|
||||
# Adjust your config
|
||||
vi /etc/proxlb/proxlb.conf
|
||||
systemctl restart proxlb
|
||||
systemctl status proxlb
|
||||
#### Traditional System
|
||||
Setting up and running ProxLB from the sources is simple and requires just a few commands. Ensure Python 3 and the Python dependencies are installed on your system, then run ProxLB using the following command:
|
||||
```bash
|
||||
git clone https://github.com/gyptazy/ProxLB.git
|
||||
cd ProxLB
|
||||
```
|
||||
|
||||
### Container Quick Start (Docker/Podman)
|
||||
Creating a container image of ProxLB is straightforward using the provided Dockerfile. The Dockerfile simplifies the process by automating the setup and configuration required to get ProxLB running in a container. Simply follow the steps in the Dockerfile to build the image, ensuring all dependencies and configurations are correctly applied. For those looking for an even quicker setup, a ready-to-use ProxLB container image is also available, eliminating the need for manual building and allowing for immediate deployment.
|
||||
Afterwards simply adjust the config file to your needs:
|
||||
```bash
|
||||
vi config/proxlb.yaml
|
||||
```
|
||||
|
||||
Start ProxLB by Python3 on the system:
|
||||
```bash
|
||||
python3 proxlb/main.py -c config/proxlb.yaml
|
||||
```
|
||||
|
||||
#### Container Image
|
||||
Creating a container image of ProxLB is straightforward using the provided Dockerfile. The Dockerfile simplifies the process by automating the setup and configuration required to get ProxLB running in an Alpine container. Simply follow the steps in the Dockerfile to build the image, ensuring all dependencies and configurations are correctly applied. For those looking for an even quicker setup, a ready-to-use ProxLB container image is also available, eliminating the need for manual building and allowing for immediate deployment.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/gyptazy/ProxLB.git
|
||||
@@ -228,84 +202,196 @@ docker build -t proxlb .
|
||||
```
|
||||
|
||||
Afterwards simply adjust the config file to your needs:
|
||||
```
|
||||
vi /etc/proxlb/proxlb.conf
|
||||
```bash
|
||||
vi config/proxlb.yaml
|
||||
```
|
||||
|
||||
Finally, start the created container.
|
||||
```bash
|
||||
docker run -it --rm -v $(pwd)/proxlb.conf:/etc/proxlb/proxlb.conf proxlb
|
||||
docker run -it --rm -v $(pwd)/proxlb.yaml:/etc/proxlb/proxlb.yaml proxlb
|
||||
```
|
||||
|
||||
### Logging
|
||||
ProxLB uses the `SystemdHandler` for logging. You can find all your logs in your systemd unit log or in the `journalctl`. In default, ProxLB only logs critical events. However, for further understanding of the balancing it might be useful to change this to `INFO` or `DEBUG` which can simply be done in the [proxlb.conf](https://github.com/gyptazy/ProxLB/blob/main/proxlb.conf#L14) file by changing the `log_verbosity` parameter.
|
||||
## Usage / Configuration
|
||||
Running ProxLB is straightforward and versatile, as it only requires `Python3` and the `proxmoxer` library. This means ProxLB can be executed directly on a Proxmox node or on dedicated systems such as Debian, RedHat, or even FreeBSD, provided that the Proxmox API is accessible from the client running ProxLB. ProxLB can also run inside a Container - Docker or LXC - and is simply up to you.
|
||||
|
||||
Available logging values:
|
||||
| Verbosity | Description |
|
||||
|------|:------:|
|
||||
| DEBUG | This option logs everything and is needed for debugging the code. |
|
||||
| INFO | This option provides insides behind the scenes. What/why has been something done and with which values. |
|
||||
| WARNING | This option provides only warning messages, which might be a problem in general but not for the application itself. |
|
||||
| CRITICAL | This option logs all critical events that will avoid running ProxLB. |
|
||||
### GUI Integration
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-GUI-integration.jpg"/> ProxLB can also be accessed through the Proxmox Web UI by installing the optional `pve-proxmoxlb-service-ui` package, which depends on the proxlb package. For full Web UI integration, this package must be installed on all nodes within the cluster. Once installed, a new menu item - `Rebalancing`, appears in the cluster level under the HA section. Once installed, it offers two key functionalities:
|
||||
* Rebalancing VM workloads
|
||||
* Migrate VM workloads away from a defined node (e.g. maintenance preparation)
|
||||
|
||||
## Motivation
|
||||
As a developer managing a cluster of virtual machines for my projects, I often encountered the challenge of resource imbalance. Nodes within the cluster would become unevenly loaded, with some nodes being overburdened while others remained underutilized. This imbalance led to inefficiencies, performance bottlenecks, and increased operational costs. Frustrated by the lack of an adequate solution to address this issue, I decided to develop the ProxLB (PLB) to ensure better resource distribution across my clusters.
|
||||
**Note:** This package is currently discontinued and will be readded at a later time. See also: [#44: How to install pve-proxmoxlb-service-ui package](https://github.com/gyptazy/ProxLB/issues/44).
|
||||
|
||||
My primary motivation for creating PLB stemmed from my work on my BoxyBSD project, where I consistently faced the difficulty of maintaining balanced nodes while running various VM workloads but also on my personal clusters. The absence of an efficient rebalancing mechanism made it challenging to achieve optimal performance and stability. Recognizing the necessity for a tool that could gather and analyze resource metrics from both the cluster nodes and the running VMs, I embarked on developing ProxLB.
|
||||
### Proxmox HA Integration
|
||||
Proxmox HA (High Availability) groups are designed to ensure that virtual machines (VMs) remain running within a Proxmox cluster. HA groups define specific rules for where VMs should be started or migrated in case of node failures, ensuring minimal downtime and automatic recovery.
|
||||
|
||||
PLB meticulously collects detailed resource usage data from each node in a Proxmox cluster, including CPU load, memory usage, and local disk space utilization. It also gathers comprehensive statistics from all running VMs, providing a granular understanding of the workload distribution. With this data, PLB intelligently redistributes VMs based on memory usage, local disk usage, and CPU usage. This ensures that no single node is overburdened, storage resources are evenly distributed, and the computational load is balanced, enhancing overall cluster performance.
|
||||
However, when used in conjunction with ProxLB, the built-in load balancer for Proxmox, conflicts can arise. ProxLB operates with its own logic for workload distribution, taking into account affinity and anti-affinity rules. While it effectively balances guest workloads, it may re-shift and redistribute VMs in a way that does not align with HA group constraints, potentially leading to unsuitable placements.
|
||||
|
||||
As an advocate of the open-source philosophy, I believe in the power of community and collaboration. By sharing solutions like PLB, I aim to contribute to the collective knowledge and tools available to developers facing similar challenges. Open source fosters innovation, transparency, and mutual support, enabling developers to build on each other's work and create better solutions together.
|
||||
Due to these conflicts, it is currently not recommended to use both HA groups and ProxLB simultaneously. The interaction between the two mechanisms can lead to unexpected behavior, where VMs might not adhere to HA group rules after being moved by ProxLB.
|
||||
|
||||
Developing PLB was driven by a desire to solve a real problem I faced in my projects. However, the spirit behind this effort was to provide a valuable resource to the community. By open-sourcing PLB, I hope to help other developers manage their clusters more efficiently, optimize their resource usage, and reduce operational costs. Sharing this solution aligns with the core principles of open source, where the goal is not only to solve individual problems but also to contribute to the broader ecosystem.
|
||||
A solution to improve compatibility between HA groups and ProxLB is under evaluation, aiming to ensure that both features can work together without disrupting VM placement strategies.
|
||||
|
||||
## References
|
||||
Here you can find some overviews of references for and about the ProxLB (PLB):
|
||||
See also: [#65: Host groups: Honour HA groups](https://github.com/gyptazy/ProxLB/issues/65).
|
||||
|
||||
| Description | Link |
|
||||
|------|:------:|
|
||||
| General introduction into ProxLB | https://gyptazy.ch/blog/proxlb-rebalancing-vm-workloads-across-nodes-in-proxmox-clusters/ |
|
||||
| Howto install and use ProxLB on Debian to rebalance vm workloads in a Proxmox cluster | https://gyptazy.ch/howtos/howto-install-and-use-proxlb-to-rebalance-vm-workloads-across-nodes-in-proxmox-clusters/ |
|
||||
### Options
|
||||
The following options can be set in the configuration file `proxlb.yaml`:
|
||||
|
||||
## Downloads
|
||||
ProxLB can be obtained in man different ways, depending on which use case you prefer. You can use simply copy the code from GitHub, use created packages for Debian or RedHat based systems, use a Repository to keep ProxLB always up to date or simply use a Container image for Docker/Podman.
|
||||
|
||||
### Packages
|
||||
Ready to use packages can be found at:
|
||||
* https://cdn.gyptazy.ch/files/amd64/debian/proxlb/
|
||||
* https://cdn.gyptazy.ch/files/amd64/ubuntu/proxlb/
|
||||
* https://cdn.gyptazy.ch/files/amd64/redhat/proxlb/
|
||||
* https://cdn.gyptazy.ch/files/amd64/freebsd/proxlb/
|
||||
| Section | Option | Sub Option | Example | Type | Description |
|
||||
|---------|:------:|:----------:|:-------:|:----:|:-----------:|
|
||||
| `proxmox_api` | | | | | |
|
||||
| | hosts | | ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe'] | `List` | List of Proxmox nodes. Can be IPv4, IPv6 or mixed. |
|
||||
| | user | | root@pam | `Str` | Username for the API. |
|
||||
| | pass | | FooBar | `Str` | Password for the API. (Recommended: Use API token authorization!) |
|
||||
| | token_id | | proxlb | `Str` | Token ID of the user for the API. |
|
||||
| | token_secret | | 430e308f-1337-1337-beef-1337beefcafe | `Str` | Secret of the token ID for the API. |
|
||||
| | ssl_verification | | True | `Bool` | Validate SSL certificates (1) or ignore (0). [values: `1` (default), `0`] |
|
||||
| | timeout | | 10 | `Int` | Timeout for the Proxmox API in sec. |
|
||||
| | retries | | 1 | `Int` | How often a connection attempt to the defined API host should be performed. |
|
||||
| | wait_time | | 1 | `Int` | How many seconds should be waited before performing another connection attempt to the API host. |
|
||||
| `proxmox_cluster` | | | | | |
|
||||
| | maintenance_nodes | | ['virt66.example.com'] | `List` | A list of Proxmox nodes that are defined to be in a maintenance. |
|
||||
| | ignore_nodes | | [] | `List` | A list of Proxmox nodes that are defined to be ignored. |
|
||||
| | overprovisioning | | False | `Bool` | Avoids balancing when nodes would become overprovisioned. |
|
||||
| `balancing` | | | | | |
|
||||
| | enable | | True | `Bool` | Enables the guest balancing.|
|
||||
| | enforce_affinity | | True | `Bool` | Enforcing affinity/anti-affinity rules but balancing might become worse. |
|
||||
| | parallel | | False | `Bool` | If guests should be moved in parallel or sequentially.|
|
||||
| | live | | True | `Bool` | If guests should be moved live or shutdown.|
|
||||
| | with_local_disks | | True | `Bool` | If balancing of guests should include local disks.|
|
||||
| | balance_types | | ['vm', 'ct'] | `List` | Defined the types of guests that should be honored. [values: `vm`, `ct`]|
|
||||
| | max_job_validation | | 1800 | `Int` | How long a job validation may take in seconds. (default: 1800) |
|
||||
| | balanciness | | 10 | `Int` | The maximum delta of resource usage between node with highest and lowest usage. |
|
||||
| | method | | memory | `Str` | The balancing method that should be used. [values: `memory` (default), `cpu`, `disk`]|
|
||||
| | mode | | used | `Str` | The balancing mode that should be used. [values: `used` (default), `assigned`] |
|
||||
| `service` | | | | | |
|
||||
| | daemon | | True | `Bool` | If daemon mode should be activated. |
|
||||
| | `schedule` | | | `Dict` | Schedule config block for rebalancing. |
|
||||
| | | interval | 12 | `Int` | How often rebalancing should occur in daemon mode.|
|
||||
| | | format | hours | `Str` | Sets the time format. [values: `hours` (default), `minutes`]|
|
||||
| | log_level | | INFO | `Str` | Defines the default log level that should be logged. [values: `INFO` (default), `WARNING`, `CRITICAL`, `DEBUG`] |
|
||||
|
||||
|
||||
### Repository
|
||||
Debian based systems can also use the repository by adding the following line to their apt sources:
|
||||
|
||||
An example of the configuration file looks like:
|
||||
```
|
||||
deb https://repo.gyptazy.ch/ /
|
||||
proxmox_api:
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe']
|
||||
user: root@pam
|
||||
pass: crazyPassw0rd!
|
||||
# API Token method
|
||||
# token_id: proxlb
|
||||
# token_secret: 430e308f-1337-1337-beef-1337beefcafe
|
||||
ssl_verification: True
|
||||
timeout: 10
|
||||
# API Connection retries
|
||||
# retries: 1
|
||||
# wait_time: 1
|
||||
|
||||
proxmox_cluster:
|
||||
maintenance_nodes: ['virt66.example.com']
|
||||
ignore_nodes: []
|
||||
overprovisioning: True
|
||||
|
||||
balancing:
|
||||
enable: True
|
||||
enforce_affinity: False
|
||||
parallel: False
|
||||
live: True
|
||||
with_local_disks: True
|
||||
balance_types: ['vm', 'ct']
|
||||
max_job_validation: 1800
|
||||
balanciness: 5
|
||||
method: memory
|
||||
mode: used
|
||||
|
||||
service:
|
||||
daemon: True
|
||||
schedule:
|
||||
interval: 12
|
||||
format: hours
|
||||
log_level: INFO
|
||||
```
|
||||
|
||||
The Repository's GPG key can be found at: `https://repo.gyptazy.ch/repo/KEY.gpg`
|
||||
### Parameters
|
||||
The following options and parameters are currently supported:
|
||||
|
||||
You can also simply import it by running:
|
||||
| Option | Long Option | Description | Default |
|
||||
|------|:------:|------:|------:|
|
||||
| -c | --config | Path to a config file. | /etc/proxlb/proxlb.yaml (default) |
|
||||
| -d | --dry-run | Performs a dry-run without doing any actions. | False |
|
||||
| -j | --json | Returns a JSON of the VM movement. | False |
|
||||
| -b | --best-node | Returns the best next node for a VM/CT placement (useful for further usage with Terraform/Ansible). | False |
|
||||
| -v | --version | Returns the ProxLB version on stdout. | False |
|
||||
|
||||
## Affinity & Anti-Affinity Rules
|
||||
ProxLB provides an advanced mechanism to define affinity and anti-affinity rules, enabling precise control over virtual machine (VM) placement. These rules help manage resource distribution, improve high availability configurations, and optimize performance within a Proxmox Virtual Environment (PVE) cluster. By leveraging Proxmox’s integrated access management, ProxLB ensures that users can only define and manage rules for guests they have permission to access.
|
||||
|
||||
ProxLB implements affinity and anti-affinity rules through a tag-based system within the Proxmox web interface. Each guest (virtual machine or container) can be assigned specific tags, which then dictate its placement behavior. This method maintains a streamlined and secure approach to managing VM relationships while preserving Proxmox’s inherent permission model.
|
||||
|
||||
### Affinity Rules
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-affinity-rules.jpg"/> Affinity rules are used to group certain VMs together, ensuring that they run on the same host whenever possible. This can be beneficial for workloads requiring low-latency communication, such as clustered databases or application servers that frequently exchange data.
|
||||
|
||||
To define an affinity rule which keeps all guests assigned to this tag together on a node, users assign a tag with the prefix `plb_affinity_$TAG`:
|
||||
|
||||
#### Example for Screenshot
|
||||
```
|
||||
# KeyID: DEB76ADF7A0BAADB51792782FD6A7A70C11226AA
|
||||
# SHA256: 5e44fffa09c747886ee37cc6e9e7eaf37c6734443cc648eaf0a9241a89084383 KEY.gpg
|
||||
|
||||
wget -O /etc/apt/trusted.gpg.d/proxlb.asc https://repo.gyptazy.ch/repo/KEY.gpg
|
||||
plb_affinity_talos
|
||||
```
|
||||
|
||||
*Note: The defined repositories `repo.gyptazy.ch` and `repo.proxlb.de` are the same!*
|
||||
As a result, ProxLB will attempt to place all VMs with the `plb_affinity_web` tag on the same host (see also the attached screenshot with the same node).
|
||||
|
||||
### Container Images (Docker/Podman)
|
||||
Container Images for Podman, Docker etc., can be found at:
|
||||
| Version | Image |
|
||||
|------|:------:|
|
||||
| latest | cr.gyptazy.ch/proxlb/proxlb:latest |
|
||||
| v1.0.2 | cr.gyptazy.ch/proxlb/proxlb:v1.0.2 |
|
||||
| v1.0.0 | cr.gyptazy.ch/proxlb/proxlb:v1.0.0 |
|
||||
| v0.9.9 | cr.gyptazy.ch/proxlb/proxlb:v0.9.9 |
|
||||
### Anti-Affinity Rules
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-anti-affinity-rules.jpg"/> Conversely, anti-affinity rules ensure that designated VMs do not run on the same physical host. This is particularly useful for high-availability setups, where redundancy is crucial. Ensuring that critical services are distributed across multiple hosts reduces the risk of a single point of failure.
|
||||
|
||||
To define an anti-affinity rule that ensures to not move systems within this group to the same node, users assign a tag with the prefix:
|
||||
|
||||
#### Example for Screenshot
|
||||
```
|
||||
plb_anti_affinity_ntp
|
||||
```
|
||||
|
||||
As a result, ProxLB will try to place the VMs with the `plb_anti_affinity_ntp` tag on different hosts (see also the attached screenshot with the different nodes).
|
||||
|
||||
**Note:** While this ensures that ProxLB tries distribute these VMs across different physical hosts within the Proxmox cluster this may not always work. If you have more guests attached to the group than nodes in the cluster, we still need to run them anywhere. If this case occurs, the next one with the most free resources will be selected.
|
||||
|
||||
### Ignore VMs
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-ignore-vm-movement.jpg"/> Guests, such as VMs or CTs, can also be completely ignored. This means, they won't be affected by any migration (even when (anti-)affinity rules are enforced). To ensure a proper resource evaluation, these guests are still collected and evaluated but simply skipped for balancing actions. Another thing is the implementation. While ProxLB might have a very restricted configuration file including the file permissions, this file is only read- and writeable by the Proxmox administrators. However, we might have user and groups who want to define on their own that their systems shouldn't be moved. Therefore, these users can simpy set a specific tag to the guest object - just like the (anti)affinity rules.
|
||||
|
||||
To define a guest to be ignored from the balancing, users assign a tag with the prefix `plb_ignore_$TAG`:
|
||||
|
||||
#### Example for Screenshot
|
||||
```
|
||||
plb_ignore_dev
|
||||
```
|
||||
|
||||
As a result, ProxLB will not migrate this guest with the `plb_ignore_dev` tag to any other node.
|
||||
|
||||
**Note:** Ignored guests are really ignored. Even by enforcing affinity rules this guest will be ignored.
|
||||
|
||||
### Pin VMs to Specific Hypervisor Nodes
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-tag-node-pinning.jpg"/> Guests, such as VMs or CTs, can also be pinned to specific nodes in the cluster. This might be usefull when running applications with some special licensing requirements that are only fulfilled on certain nodes. It might also be interesting, when some physical hardware is attached to a node, that is not available in general within the cluster.
|
||||
|
||||
To pin a guest to a specific cluster node, users assign a tag with the prefix `plb_pin_$nodename` to the desired guest:
|
||||
|
||||
#### Example for Screenshot
|
||||
```
|
||||
plb_pin_node03
|
||||
```
|
||||
|
||||
As a result, ProxLB will pin the guest `dev-vm01` to the node `virt03`.
|
||||
|
||||
**Note:** The given node names from the tag are validated. This means, ProxLB validated if the given node name is really part of the cluster. In case of a wrongly defined or unavailable node name it continous to use the regular processes to make sure the guest keeps running.
|
||||
|
||||
## Maintenance
|
||||
<img src="https://cdn.gyptazy.com/images/proxlb-rebalancing-demo.gif"/>
|
||||
|
||||
The `maintenance_nodes` option allows operators to designate one or more Proxmox nodes for maintenance mode. When a node is set to maintenance, no new guest workloads will be assigned to it, and all existing workloads will be migrated to other available nodes within the cluster. This process ensures that (anti)-affinity rules and resource availability are respected, preventing disruptions while maintaining optimal performance across the infrastructure.
|
||||
|
||||
### Adding / Removing Nodes from Maintenance
|
||||
Within the section `proxmox_cluster` you can define the key `maintenance_nodes` as a list object. Simply add/remove one or more nodes with their equal name in the cluster and restart the daemon.
|
||||
```
|
||||
proxmox_cluster:
|
||||
maintenance_nodes: ['virt66.example.com']
|
||||
```
|
||||
Afterwards, all guest objects will be moved to other nodes in the cluster by ensuring the best balancing.
|
||||
|
||||
## Misc
|
||||
### Bugs
|
||||
@@ -314,14 +400,22 @@ Bugs can be reported via the GitHub issue tracker [here](https://github.com/gypt
|
||||
### Contributing
|
||||
Feel free to add further documentation, to adjust already existing one or to contribute with code. Please take care about the style guide and naming conventions. You can find more in our [CONTRIBUTING.md](https://github.com/gyptazy/ProxLB/blob/main/CONTRIBUTING.md) file.
|
||||
|
||||
### Documentation
|
||||
You can also find additional and more detailed documentation within the [docs/](https://github.com/gyptazy/ProxLB/tree/main/docs) directory.
|
||||
|
||||
### Support
|
||||
If you need assistance or have any questions, we offer support through our dedicated [chat room](https://matrix.to/#/#proxlb:gyptazy.ch) in Matrix and on Reddit. Join our community for real-time help, advice, and discussions. Connect with us in our dedicated chat room for immediate support and live interaction with other users and developers. You can also visit our [Reddit community](https://www.reddit.com/r/Proxmox/comments/1e78ap3/introducing_proxlb_rebalance_your_vm_workloads/) to post your queries, share your experiences, and get support from fellow community members and moderators. You may also just open directly an issue [here](https://github.com/gyptazy/ProxLB/issues) on GitHub. We are here to help and ensure you have the best experience possible.
|
||||
If you need assistance or have any questions, we offer support through our dedicated [chat room](https://matrix.to/#/#proxlb:gyptazy.com) in Matrix or [Discord](https://discord.gg/JemGu7WbfQ). Join our community for real-time help, advice, and discussions. The Matrix and Discord room are bridged to ensure that the communication is not splitted - so simply feel free to join which fits most to you!
|
||||
|
||||
Connect with us in our dedicated chat room for immediate support and live interaction with other users and developers. You can also visit our [GitHub Community](https://github.com/gyptazy/ProxLB/discussions/) to post your queries, share your experiences, and get support from fellow community members and moderators. You may also just open directly an issue [here](https://github.com/gyptazy/ProxLB/issues) on GitHub.
|
||||
|
||||
| Support Channel | Link |
|
||||
|------|:------:|
|
||||
| Matrix | [#proxlb:gyptazy.ch](https://matrix.to/#/#proxlb:gyptazy.ch) |
|
||||
| Reddit | [Reddit community](https://www.reddit.com/r/Proxmox/comments/1e78ap3/introducing_proxlb_rebalance_your_vm_workloads/) |
|
||||
| Matrix | [#proxlb:gyptazy.com](https://matrix.to/#/#proxlb:gyptazy.com) |
|
||||
| Discord | [Discord](https://discord.gg/JemGu7WbfQ) |
|
||||
| GitHub Community | [GitHub Community](https://github.com/gyptazy/ProxLB/discussions/)
|
||||
| GitHub | [ProxLB GitHub](https://github.com/gyptazy/ProxLB/issues) |
|
||||
|
||||
**Note:** Please always keep in mind that this is a one-man show project without any further help. This includes coding, testing, packaging and all the infrastructure around it to keep this project up and running.
|
||||
|
||||
### Author(s)
|
||||
* Florian Paul Azim Hoberg @gyptazy (https://gyptazy.ch)
|
||||
* Florian Paul Azim Hoberg @gyptazy (https://gyptazy.com)
|
||||
|
||||
36
config/proxlb_example.yaml
Normal file
36
config/proxlb_example.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
proxmox_api:
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe']
|
||||
user: root@pam
|
||||
pass: crazyPassw0rd!
|
||||
# API Token method
|
||||
# token_id: proxlb
|
||||
# token_secret: 430e308f-1337-1337-beef-1337beefcafe
|
||||
ssl_verification: True
|
||||
timeout: 10
|
||||
# API Connection retries
|
||||
# retries: 1
|
||||
# wait_time: 1
|
||||
|
||||
proxmox_cluster:
|
||||
maintenance_nodes: ['virt66.example.com']
|
||||
ignore_nodes: []
|
||||
overprovisioning: True
|
||||
|
||||
balancing:
|
||||
enable: True
|
||||
enforce_affinity: False
|
||||
parallel: False
|
||||
live: True
|
||||
with_local_disks: True
|
||||
balance_types: ['vm', 'ct']
|
||||
max_job_validation: 1800
|
||||
balanciness: 5
|
||||
method: memory
|
||||
mode: used
|
||||
|
||||
service:
|
||||
daemon: True
|
||||
schedule:
|
||||
interval: 12
|
||||
format: hours
|
||||
log_level: INFO
|
||||
30
debian/changelog
vendored
Normal file
30
debian/changelog
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
proxlb (1.1.2) stable; urgency=medium
|
||||
|
||||
* Add a configurable retry mechanism when connecting to the Proxmox API. (Closed: #157)
|
||||
* Add 1-to-1 relationships between guest and hypervisor node to ping a guest on a node. (Closes #218)
|
||||
* Force type cast cpu count of guests to int for some corner cases where a str got returned. (Closed #222)
|
||||
* Fix systemd unit file to run after network on non PVE nodes. (Closes #137)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Mon, 13 May 2025 18:12:04 +0000
|
||||
|
||||
proxlb (1.1.1) stable; urgency=medium
|
||||
|
||||
* Fix tag evluation for VMs for being ignored for further balancing. (Closes: #163)
|
||||
* Improve logging verbosity of messages that had a wrong servity. (Closes: #165)
|
||||
* Providing the API upstream error message when migration fails in debug mode (Closes: #205)
|
||||
* Change the default behaviour of the daemon mode to active. (Closes: #176)
|
||||
* Change the default banalcing mode to used instead of assigned. (Closes: #180)
|
||||
* Set cpu_used to the cpu usage, which is a percent, times the total number of cores to get a number where guest cpu_used can be added to nodes cpu_used and be meaningful. (Closes: #195)
|
||||
* Honor the value when balancing should not be performed and stop balancing. (Closes: #174)
|
||||
* Allow the use of minutes instead of hours and only accept hours or minutes in the format. (Closes: #187)
|
||||
* Remove hard coded memory usage from lowest usage node and use method and mode specified in configuration instead. (Closes: #197)
|
||||
* Fix the guest type relationship in the logs when a migration job failed. (Closes: #204)
|
||||
* Requery a guest if that running guest reports 0 cpu usage. (Closes: #200)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Sat, 20 Apr 2025 20:55:02 +0000
|
||||
|
||||
proxlb (1.1.0) stable; urgency=medium
|
||||
|
||||
* Refactored code base of ProxLB. (Closes: #114)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Mon, 17 Mar 2025 18:55:02 +0000
|
||||
12
debian/control
vendored
Normal file
12
debian/control
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
Source: proxlb
|
||||
Maintainer: Florian Paul Azim Hoberg <gyptazy@gyptazy.com>
|
||||
Section: admin
|
||||
Priority: optional
|
||||
Standards-Version: 4.5.0
|
||||
Build-Depends: debhelper-compat (= 13), dh-python, python3-all, python3-setuptools
|
||||
|
||||
Package: proxlb
|
||||
Architecture: all
|
||||
Depends: ${python3:Depends}, ${misc:Depends}, python3-requests, python3-urllib3, python3-proxmoxer, python3-yaml
|
||||
Description: A DRS alike Load Balancer for Proxmox Clusters
|
||||
An advanced DRS alike loadbalancer for Proxmox clusters that also supports maintenance modes and affinity/anti-affinity rules.
|
||||
2
debian/install
vendored
Normal file
2
debian/install
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
proxlb /usr/lib/python3/dist-packages/
|
||||
service/proxlb.service /lib/systemd/system/
|
||||
16
debian/postinst
vendored
Executable file
16
debian/postinst
vendored
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
#DEBHELPER#
|
||||
if [ "$1" = "configure" ]; then
|
||||
systemctl enable proxlb.service
|
||||
systemctl restart proxlb.service || true
|
||||
|
||||
# Create the 'plb' user if it does not exist
|
||||
if ! id "plb" &>/dev/null; then
|
||||
useradd --system --home /var/lib/proxlb --create-home --shell /usr/sbin/nologin --group nogroup plb
|
||||
echo "User 'plb' created."
|
||||
else
|
||||
echo "User 'plb' already exists, skipping creation."
|
||||
fi
|
||||
fi
|
||||
16
debian/prerm
vendored
Executable file
16
debian/prerm
vendored
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
#DEBHELPER#
|
||||
if [ "$1" = "remove" ]; then
|
||||
systemctl stop proxlb.service || true
|
||||
systemctl disable proxlb.service || true
|
||||
|
||||
# Remove the 'plb' user if it exists
|
||||
if id "plb" &>/dev/null; then
|
||||
userdel --remove plb
|
||||
echo "User 'plb' removed."
|
||||
else
|
||||
echo "User 'plb' does not exist, skipping removal."
|
||||
fi
|
||||
fi
|
||||
4
debian/rules
vendored
Normal file
4
debian/rules
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/make -f
|
||||
%:
|
||||
dh $@ --with python3 --buildsystem=pybuild
|
||||
|
||||
1
debian/source/format
vendored
Normal file
1
debian/source/format
vendored
Normal file
@@ -0,0 +1 @@
|
||||
3.0 (native)
|
||||
@@ -1,32 +0,0 @@
|
||||
# Installation
|
||||
|
||||
## Packages
|
||||
The easiest way to get started is by using the ready-to-use packages that I provide on my CDN and to run it on a Linux Debian based system. This can also be one of the Proxmox nodes itself.
|
||||
|
||||
```
|
||||
wget https://cdn.gyptazy.ch/files/amd64/debian/proxlb/proxlb_0.9.9_amd64.deb
|
||||
dpkg -i proxlb_0.9.9_amd64.deb
|
||||
# Adjust your config
|
||||
vi /etc/proxlb/proxlb.conf
|
||||
systemctl restart proxlb
|
||||
systemctl status proxlb
|
||||
```
|
||||
|
||||
## Container (Docker/Podman)
|
||||
Creating a container image of ProxLB is straightforward using the provided Dockerfile. The Dockerfile simplifies the process by automating the setup and configuration required to get ProxLB running in a container. Simply follow the steps in the Dockerfile to build the image, ensuring all dependencies and configurations are correctly applied. For those looking for an even quicker setup, a ready-to-use ProxLB container image is also available, eliminating the need for manual building and allowing for immediate deployment.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/gyptazy/ProxLB.git
|
||||
cd ProxLB
|
||||
build -t proxlb .
|
||||
```
|
||||
|
||||
Afterwards simply adjust the config file to your needs:
|
||||
```
|
||||
vi /etc/proxlb/proxlb.conf
|
||||
```
|
||||
|
||||
Finally, start the created container.
|
||||
```bash
|
||||
docker run -it --rm -v $(pwd)/proxlb.conf:/etc/proxlb/proxlb.conf proxlb
|
||||
```
|
||||
65
docs/01_requirements.md
Normal file
65
docs/01_requirements.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# Table of Contents
|
||||
|
||||
- [Requirements](#requirements)
|
||||
- [Where To Run?](#where-to-run)
|
||||
|
||||
## Requirements
|
||||
ProxLB is a sophisticated load balancer designed to enhance the management and distribution of workloads within a Proxmox cluster. By fully utilizing the Proxmox API, ProxLB eliminates the need for additional SSH access, streamlining cluster management while maintaining robust security. This chapter outlines the general requirements necessary to deploy and operate ProxLB effectively.
|
||||
|
||||
### Proxmox Cluster Requirements
|
||||
To use ProxLB, you must have an existing Proxmox cluster consisting of at least two nodes. While traditional load balancers often struggle to manage minimal node configurations, ProxLB is optimized to provide efficient load distribution even in a two-node environment. The more nodes present in the cluster, the better ProxLB can optimize resource usage and manage workloads.
|
||||
|
||||
### ProxLB Package Requirements
|
||||
Next to the previously mentioned requirements, ProxLB also requires you to fit the following ones:
|
||||
* Python3.x
|
||||
* proxmoxer
|
||||
* requests
|
||||
* urllib3
|
||||
* pyyaml
|
||||
|
||||
### Seamless API Integration
|
||||
ProxLB relies exclusively on the Proxmox API for all management tasks. This eliminates the need for direct SSH access, ensuring a cleaner and more secure interaction with the cluster. The API integration allows ProxLB to:
|
||||
|
||||
- Monitor cluster health and node resource utilization
|
||||
- Migrate virtual machines (VMs) and containers as needed
|
||||
- Manage storage utilization and distribution
|
||||
- Implement load balancing policies
|
||||
|
||||
### Authentication and Security Standards
|
||||
ProxLB fully supports Proxmox’s integrated user management system, providing robust authentication and access control. Key features include:
|
||||
|
||||
- **Multi-Factor Authentication (MFA):** Enhances security by requiring multiple verification methods.
|
||||
- **API Key Support:** ProxLB can utilize API keys for authentication instead of traditional username/password combinations, minimizing exposure to credentials.
|
||||
- **Role-Based Access Control (RBAC):** Ensures administrators have fine-grained control over user permissions.
|
||||
|
||||
### Flexible Storage Support
|
||||
ProxLB offers versatile storage management options, supporting both local and shared storage types. It efficiently balances storage workloads across the cluster using the following storage systems:
|
||||
|
||||
- **Local Storage:** Direct-attached storage on each node.
|
||||
- **Shared Storage:** Includes options like iSCSI, NVMeOF, and NFS for centralized storage solutions.
|
||||
- **Ceph:** Integrated support for Ceph distributed storage, providing high availability and fault tolerance.
|
||||
|
||||
### Network Infrastructure Requirements
|
||||
For optimal performance, ProxLB requires a reliable and high-speed network connection between the nodes in the cluster. Ensure that the network infrastructure meets the following criteria:
|
||||
|
||||
- **Low Latency:** Essential for real-time load balancing and VM migration.
|
||||
- **Sufficient Bandwidth:** Adequate to handle storage access, data replication, and migration traffic.
|
||||
- **Redundant Network Paths:** Recommended for increased fault tolerance and uptime.
|
||||
|
||||
### System Resource Allocation
|
||||
ProxLB itself requires minimal system resources to operate. However, for managing larger clusters or high workloads, ensure the node running ProxLB has adequate resources available:
|
||||
|
||||
- **CPU:** A modern multi-core processor.
|
||||
- **Memory:** At least 2 GB of RAM.
|
||||
- **Storage:** Minimal disk space for configuration files and logs.
|
||||
|
||||
|
||||
## Where To Run?
|
||||
ProxLB can run on pretty anthing and only requires you to have a network connectivity to any of the Proxmox host's API (usually on tcp/8006).
|
||||
|
||||
Therefore, you can simply run ProxLB on:
|
||||
* Bare-metal Systems
|
||||
* VMs (even inside the Proxmox cluster)
|
||||
* Docker/Podman Container
|
||||
* LXC Container
|
||||
* On a Proxmox node
|
||||
@@ -1,26 +0,0 @@
|
||||
# Configuration
|
||||
|
||||
## Balancing
|
||||
### By Used Memmory of VMs
|
||||
By continuously monitoring the current resource usage of VMs, ProxLB intelligently reallocates workloads to prevent any single node from becoming overloaded. This approach ensures that resources are balanced efficiently, providing consistent and optimal performance across the entire cluster at all times. To activate this balancing mode, simply activate the following option in your ProxLB configuration:
|
||||
```
|
||||
mode: used
|
||||
```
|
||||
Afterwards, restart the service (if running in daemon mode) to activate this rebalancing mode.
|
||||
|
||||
### By Assigned Memory of VMs
|
||||
By ensuring that resources are always available for each VM, ProxLB prevents over-provisioning and maintains a balanced load across all nodes. This guarantees that users have consistent access to the resources they need. However, if the total assigned resources exceed the combined capacity of the cluster, ProxLB will issue a warning, indicating potential over-provisioning despite its best efforts to balance the load. To activate this balancing mode, simply activate the following option in your ProxLB configuration:
|
||||
```
|
||||
mode: assigned
|
||||
```
|
||||
Afterwards, restart the service (if running in daemon mode) to activate this rebalancing mode.
|
||||
|
||||
## Grouping
|
||||
### Include (Stay Together)
|
||||
<img align="left" src="https://cdn.gyptazy.ch/images/plb-rebalancing-include-balance-group.jpg"/> Access the Proxmox Web UI by opening your web browser and navigating to your Proxmox VE web interface, then log in with your credentials. Navigate to the VM you want to tag by selecting it from the left-hand navigation panel. Click on the "Options" tab to view the VM's options, then select "Edit" or "Add" (depending on whether you are editing an existing tag or adding a new one). In the tag field, enter plb_include_ followed by your unique identifier, for example, plb_include_group1. Save the changes to apply the tag to the VM. Repeat these steps for each VM that should be included in the group.
|
||||
|
||||
### Exclude (Stay Separate)
|
||||
<img align="left" src="https://cdn.gyptazy.ch/images/plb-rebalancing-exclude-balance-group.jpg"/> Access the Proxmox Web UI by opening your web browser and navigating to your Proxmox VE web interface, then log in with your credentials. Navigate to the VM you want to tag by selecting it from the left-hand navigation panel. Click on the "Options" tab to view the VM's options, then select "Edit" or "Add" (depending on whether you are editing an existing tag or adding a new one). In the tag field, enter plb_exclude_ followed by your unique identifier, for example, plb_exclude_critical. Save the changes to apply the tag to the VM. Repeat these steps for each VM that should be excluded from being on the same node.
|
||||
|
||||
### Ignore VMs (tag style)
|
||||
<img align="left" src="https://cdn.gyptazy.ch/images/plb-rebalancing-ignore-vm.jpg"/> In Proxmox, you can ensure that certain VMs are ignored during the rebalancing process by setting a specific tag within the Proxmox Web UI, rather than solely relying on configurations in the ProxLB config file. This can be achieved by adding the tag 'plb_ignore_vm' to the VM. Once this tag is applied, the VM will be excluded from any further rebalancing operations, simplifying the management process.
|
||||
164
docs/02_installation.md
Normal file
164
docs/02_installation.md
Normal file
@@ -0,0 +1,164 @@
|
||||
# Table of Contents
|
||||
|
||||
- [Installation](#installation)
|
||||
- [Requirements / Dependencies](#requirements--dependencies)
|
||||
- [Debian Package](#debian-package)
|
||||
- [Quick-Start](#quick-start)
|
||||
- [Details](#details)
|
||||
- [Debian Packages (.deb files)](#debian-packages-deb-files)
|
||||
- [RedHat Package](#redhat-package)
|
||||
- [Container Images / Docker](#container-images--docker)
|
||||
- [Overview of Images](#overview-of-images)
|
||||
- [Source](#source)
|
||||
- [Traditional System](#traditional-system)
|
||||
- [Container Image](#container-image)
|
||||
- [Upgrading](#upgrading)
|
||||
- [Upgrading from < 1.1.0](#upgrading-from--110)
|
||||
- [Upgrading from >= 1.1.0](#upgrading-from--110)
|
||||
|
||||
|
||||
## Installation
|
||||
### Requirements / Dependencies
|
||||
* Python3.x
|
||||
* proxmoxer
|
||||
* requests
|
||||
* urllib3
|
||||
* pyyaml
|
||||
|
||||
The dependencies can simply be installed with `pip` by running the following command:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
*Note: Distribution packages, such like the provided `.deb` package will automatically resolve and install all required dependencies by using already packaged version from the distribution's repository. By using the Docker (container) image or Debian packages, you do not need to take any care of the requirements listed here.*
|
||||
|
||||
### Debian Package
|
||||
ProxLB is a powerful and flexible load balancer designed to work across various architectures, including `amd64`, `arm64`, `rv64` and many other ones that support Python. It runs independently of the underlying hardware, making it a versatile choice for different environments. This chapter covers the step-by-step process to install ProxLB on Debian-based systems, including Debian clones like Ubuntu.
|
||||
|
||||
#### Quick-Start
|
||||
You can simply use this snippet to install the repository and to install ProxLB on your system.
|
||||
|
||||
```bash
|
||||
echo "deb https://repo.gyptazy.com/stable /" > /etc/apt/sources.list.d/proxlb.list
|
||||
wget -O /etc/apt/trusted.gpg.d/proxlb.asc https://repo.gyptazy.com/repository.gpg
|
||||
apt-get update && apt-get -y install proxlb
|
||||
cp /etc/proxlb/proxlb_example.yaml /etc/proxlb/proxlb.yaml
|
||||
# Adjust the config to your needs
|
||||
vi /etc/proxlb/proxlb.yaml
|
||||
systemctl start proxlb
|
||||
```
|
||||
|
||||
Afterwards, ProxLB is running in the background and balances your cluster by your defined balancing method (default: memory).
|
||||
|
||||
#### Details
|
||||
ProxLB provides two different repositories:
|
||||
* https://repo.gyptazy.com/stable (only stable release)
|
||||
* https://repo.gyptazy.com/testing (bleeding edge - not recommended)
|
||||
|
||||
The repository is signed and the GPG key can be found at:
|
||||
* https://repo.gyptazy.com/repository.gpg
|
||||
|
||||
You can also simply import it by running:
|
||||
|
||||
```
|
||||
# KeyID: 17169F23F9F71A14AD49EDADDB51D3EB01824F4C
|
||||
# UID: gyptazy Solutions Repository <contact@gyptazy.com>
|
||||
# SHA256: 52c267e6f4ec799d40cdbdb29fa518533ac7942dab557fa4c217a76f90d6b0f3 repository.gpg
|
||||
|
||||
wget -O /etc/apt/trusted.gpg.d/proxlb.asc https://repo.gyptazy.com/repository.gpg
|
||||
```
|
||||
|
||||
*Note: The defined repositories `repo.gyptazy.com` and `repo.proxlb.de` are the same!*
|
||||
|
||||
#### Debian Packages (.deb files)
|
||||
If you do not want to use the repository you can also find the debian packages as a .deb file on gyptazy's CDN at:
|
||||
* https://cdn.gyptazy.com/files/os/debian/proxlb/
|
||||
|
||||
Afterwards, you can simply install the package by running:
|
||||
```bash
|
||||
dpkg -i proxlb_*.deb
|
||||
cp /etc/proxlb/proxlb_example.yaml /etc/proxlb/proxlb.yaml
|
||||
# Adjust the config to your needs
|
||||
vi /etc/proxlb/proxlb.yaml
|
||||
systemctl start proxlb
|
||||
```
|
||||
|
||||
### RedHat Package
|
||||
There's currently no official support for RedHat based systems. However, there's a dummy .rpm package for such systems in the pipeline which can be found here:
|
||||
* https://github.com/gyptazy/ProxLB/actions/workflows/20-pipeline-build-rpm-package.yml
|
||||
|
||||
|
||||
### Container Images / Docker
|
||||
Using the ProxLB container images is straight forward and only requires you to mount the config file.
|
||||
|
||||
```bash
|
||||
# Pull the image
|
||||
docker pull cr.gyptazy.com/proxlb/proxlb:latest
|
||||
# Download the config
|
||||
wget -O proxlb.yaml https://raw.githubusercontent.com/gyptazy/ProxLB/refs/heads/main/config/proxlb_example.yaml
|
||||
# Adjust the config to your needs
|
||||
vi proxlb.yaml
|
||||
# Start the ProxLB container image with the ProxLB config
|
||||
docker run -it --rm -v $(pwd)/proxlb.yaml:/etc/proxlb/proxlb.yaml proxlb
|
||||
```
|
||||
|
||||
*Note: ProxLB container images are officially only available at cr.proxlb.de and cr.gyptazy.com.*
|
||||
|
||||
#### Overview of Images
|
||||
| Version | Image |
|
||||
|------|:------:|
|
||||
| latest | cr.gyptazy.com/proxlb/proxlb:latest |
|
||||
| v1.1.0 | cr.gyptazy.com/proxlb/proxlb:v1.1.0 |
|
||||
| v1.0.6 | cr.gyptazy.com/proxlb/proxlb:v1.0.6 |
|
||||
| v1.0.5 | cr.gyptazy.com/proxlb/proxlb:v1.0.5 |
|
||||
| v1.0.4 | cr.gyptazy.com/proxlb/proxlb:v1.0.4 |
|
||||
| v1.0.3 | cr.gyptazy.com/proxlb/proxlb:v1.0.3 |
|
||||
| v1.0.2 | cr.gyptazy.com/proxlb/proxlb:v1.0.2 |
|
||||
| v1.0.0 | cr.gyptazy.com/proxlb/proxlb:v1.0.0 |
|
||||
| v0.9.9 | cr.gyptazy.com/proxlb/proxlb:v0.9.9 |
|
||||
|
||||
### Source
|
||||
ProxLB can also easily be used from the provided sources - for traditional systems but also as a Docker/Podman container image.
|
||||
|
||||
#### Traditional System
|
||||
Setting up and running ProxLB from the sources is simple and requires just a few commands. Ensure Python 3 and the Python dependencies are installed on your system, then run ProxLB using the following command:
|
||||
```bash
|
||||
git clone https://github.com/gyptazy/ProxLB.git
|
||||
cd ProxLB
|
||||
```
|
||||
|
||||
Afterwards simply adjust the config file to your needs:
|
||||
```bash
|
||||
vi config/proxlb.yaml
|
||||
```
|
||||
|
||||
Start ProxLB by Python3 on the system:
|
||||
```bash
|
||||
python3 proxlb/main.py -c config/proxlb.yaml
|
||||
```
|
||||
|
||||
#### Container Image
|
||||
Creating a container image of ProxLB is straightforward using the provided Dockerfile. The Dockerfile simplifies the process by automating the setup and configuration required to get ProxLB running in an Alpine container. Simply follow the steps in the Dockerfile to build the image, ensuring all dependencies and configurations are correctly applied. For those looking for an even quicker setup, a ready-to-use ProxLB container image is also available, eliminating the need for manual building and allowing for immediate deployment.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/gyptazy/ProxLB.git
|
||||
cd ProxLB
|
||||
docker build -t proxlb .
|
||||
```
|
||||
|
||||
Afterwards simply adjust the config file to your needs:
|
||||
```bash
|
||||
vi config/proxlb.yaml
|
||||
```
|
||||
|
||||
Finally, start the created container.
|
||||
```bash
|
||||
docker run -it --rm -v $(pwd)/proxlb.yaml:/etc/proxlb/proxlb.yaml proxlb
|
||||
```
|
||||
|
||||
## Upgrading
|
||||
### Upgrading from < 1.1.0
|
||||
Upgrading ProxLB is not supported due to a fundamental redesign introduced in version 1.1.x. With this update, ProxLB transitioned from a monolithic application to a pure Python-style project, embracing a more modular and flexible architecture. This shift aimed to improve maintainability and extensibility while keeping up with modern development practices. Additionally, ProxLB moved away from traditional ini-style configuration files and adopted YAML for configuration management. This change simplifies configuration handling, reduces the need for extensive validation, and ensures better type casting, ultimately providing a more streamlined and user-friendly experience.
|
||||
|
||||
### Upgrading from >= 1.1.0
|
||||
Uprading within the current stable versions, starting from 1.1.0, will be possible in all supported ways.
|
||||
@@ -1,77 +0,0 @@
|
||||
## FAQ
|
||||
|
||||
### Could not import all dependencies
|
||||
ProxLB requires the Python library `proxmoxer`. This can simply be installed by the most
|
||||
system repositories. If you encounter this error message you simply need to install it.
|
||||
|
||||
|
||||
```
|
||||
# systemctl status proxlb
|
||||
x proxlb.service - Proxmox Rebalancing Service
|
||||
Loaded: loaded (/etc/systemd/system/proxlb.service; static)
|
||||
Active: failed (Result: exit-code) since Sat 2024-07-06 10:25:16 UTC; 1s ago
|
||||
Duration: 239ms
|
||||
Process: 7285 ExecStart=/usr/bin/proxlb -c /etc/proxlb/proxlb.conf (code=exited, status=2)
|
||||
Main PID: 7285 (code=exited, status=2)
|
||||
CPU: 129ms
|
||||
|
||||
Jul 06 10:25:16 build01 systemd[1]: Started proxlb.service - ProxLB.
|
||||
Jul 06 10:25:16 build01 proxlb[7285]: proxlb: Error: [python-imports]: Could not import all dependencies. Please install "proxmoxer".
|
||||
```
|
||||
|
||||
Debian/Ubuntu: apt-get install python3-proxmoxer
|
||||
If the package is not provided by your systems repository, you can also install it by running `pip3 install proxmoxer`.
|
||||
|
||||
### How does it work?
|
||||
ProxLB is a load-balancing system designed to optimize the distribution of virtual machines (VMs) and containers (CTs) across a cluster. It works by first gathering resource usage metrics from all nodes in the cluster through the Proxmox API. This includes detailed resource metrics for each VM and CT on every node. ProxLB then evaluates the difference between the maximum and minimum resource usage of the nodes, referred to as "Balanciness." If this difference exceeds a predefined threshold (which is configurable), the system initiates the rebalancing process.
|
||||
|
||||
Before starting any migrations, ProxLB validates that rebalancing actions are necessary and beneficial. Depending on the selected balancing mode — such as CPU, memory, or disk — it creates a balancing matrix. This matrix sorts the VMs by their maximum used or assigned resources, identifying the VM with the highest usage. ProxLB then places this VM on the node with the most free resources in the selected balancing type. This process runs recursively until the operator-defined Balanciness is achieved. Balancing can be defined for the used or max. assigned resources of VMs/CTs.
|
||||
|
||||
### Logging
|
||||
ProxLB uses the `SystemdHandler` for logging. You can find all your logs in your systemd unit log or in the `journalctl`. In default, ProxLB only logs critical events. However, for further understanding of the balancing it might be useful to change this to `INFO` or `DEBUG` which can simply be done in the [proxlb.conf](https://github.com/gyptazy/ProxLB/blob/main/proxlb.conf#L14) file by changing the `log_verbosity` parameter.
|
||||
|
||||
Available logging values:
|
||||
| Verbosity | Description |
|
||||
|------|:------:|
|
||||
| DEBUG | This option logs everything and is needed for debugging the code. |
|
||||
| INFO | This option provides insides behind the scenes. What/why has been something done and with which values. |
|
||||
| WARNING | This option provides only warning messages, which might be a problem in general but not for the application itself. |
|
||||
| CRITICAL | This option logs all critical events that will avoid running ProxLB. |
|
||||
|
||||
### Motivation
|
||||
As a developer managing a cluster of virtual machines for my projects, I often encountered the challenge of resource imbalance. Nodes within the cluster would become unevenly loaded, with some nodes being overburdened while others remained underutilized. This imbalance led to inefficiencies, performance bottlenecks, and increased operational costs. Frustrated by the lack of an adequate solution to address this issue, I decided to develop the ProxLB (PLB) to ensure better resource distribution across my clusters.
|
||||
|
||||
My primary motivation for creating PLB stemmed from my work on my BoxyBSD project, where I consistently faced the difficulty of maintaining balanced nodes while running various VM workloads but also on my personal clusters. The absence of an efficient rebalancing mechanism made it challenging to achieve optimal performance and stability. Recognizing the necessity for a tool that could gather and analyze resource metrics from both the cluster nodes and the running VMs, I embarked on developing ProxLB.
|
||||
|
||||
PLB meticulously collects detailed resource usage data from each node in a Proxmox cluster, including CPU load, memory usage, and local disk space utilization. It also gathers comprehensive statistics from all running VMs, providing a granular understanding of the workload distribution. With this data, PLB intelligently redistributes VMs based on memory usage, local disk usage, and CPU usage. This ensures that no single node is overburdened, storage resources are evenly distributed, and the computational load is balanced, enhancing overall cluster performance.
|
||||
|
||||
As an advocate of the open-source philosophy, I believe in the power of community and collaboration. By sharing solutions like PLB, I aim to contribute to the collective knowledge and tools available to developers facing similar challenges. Open source fosters innovation, transparency, and mutual support, enabling developers to build on each other's work and create better solutions together.
|
||||
|
||||
Developing PLB was driven by a desire to solve a real problem I faced in my projects. However, the spirit behind this effort was to provide a valuable resource to the community. By open-sourcing PLB, I hope to help other developers manage their clusters more efficiently, optimize their resource usage, and reduce operational costs. Sharing this solution aligns with the core principles of open source, where the goal is not only to solve individual problems but also to contribute to the broader ecosystem.
|
||||
|
||||
### Packages / Container Images
|
||||
Ready to use packages can be found at:
|
||||
* https://cdn.gyptazy.ch/files/amd64/debian/proxlb/
|
||||
* https://cdn.gyptazy.ch/files/amd64/ubuntu/proxlb/
|
||||
* https://cdn.gyptazy.ch/files/amd64/redhat/proxlb/
|
||||
* https://cdn.gyptazy.ch/files/amd64/freebsd/proxlb/
|
||||
|
||||
Container Images for Podman, Docker etc., can be found at:
|
||||
| Version | Image |
|
||||
|------|:------:|
|
||||
| latest | cr.gyptazy.ch/proxlb/proxlb:latest |
|
||||
|
||||
### Bugs
|
||||
Bugs can be reported via the GitHub issue tracker [here](https://github.com/gyptazy/ProxLB/issues). You may also report bugs via email or deliver PRs to fix them on your own. Therefore, you might also see the contributing chapter.
|
||||
|
||||
### Contributing
|
||||
Feel free to add further documentation, to adjust already existing one or to contribute with code. Please take care about the style guide and naming conventions. You can find more in our [CONTRIBUTING.md](https://github.com/gyptazy/ProxLB/blob/main/CONTRIBUTING.md) file.
|
||||
|
||||
### Support
|
||||
If you need assistance or have any questions, we offer support through our dedicated [chat room](https://matrix.to/#/#proxlb:gyptazy.ch) in Matrix and on Reddit. Join our community for real-time help, advice, and discussions. Connect with us in our dedicated chat room for immediate support and live interaction with other users and developers. You can also visit our [Reddit community](https://www.reddit.com/r/Proxmox/comments/1e78ap3/introducing_proxlb_rebalance_your_vm_workloads/) to post your queries, share your experiences, and get support from fellow community members and moderators. You may also just open directly an issue [here](https://github.com/gyptazy/ProxLB/issues) on GitHub. We are here to help and ensure you have the best experience possible.
|
||||
|
||||
| Support Channel | Link |
|
||||
|------|:------:|
|
||||
| Matrix | [#proxlb:gyptazy.ch](https://matrix.to/#/#proxlb:gyptazy.ch) |
|
||||
| Reddit | [Reddit community](https://www.reddit.com/r/Proxmox/comments/1e78ap3/introducing_proxlb_rebalance_your_vm_workloads/) |
|
||||
| GitHub | [ProxLB GitHub](https://github.com/gyptazy/ProxLB/issues) |
|
||||
210
docs/03_configuration.md
Normal file
210
docs/03_configuration.md
Normal file
@@ -0,0 +1,210 @@
|
||||
# Table of Contents
|
||||
|
||||
1. [Authentication / User Accounts / Permissions](#authentication--user-accounts--permissions)
|
||||
1. [Authentication](#authentication)
|
||||
2. [Creating a Dedicated User](#creating-a-dedicated-user)
|
||||
3. [Creating an API Token for a User](#creating-an-api-token-for-a-user)
|
||||
4. [Required Permissions for a User](#required-permissions-for-a-user)
|
||||
2. [Configuration](#configuration)
|
||||
1. [Affinity & Anti-Affinity Rules](#affinity--anti-affinity-rules)
|
||||
1. [Affinity Rules](#affinity-rules)
|
||||
2. [Anti-Affinity Rules](#anti-affinity-rules)
|
||||
3. [Affinity / Anti-Affinity Enforcing](#affinity--anti-affinity-enforcing)
|
||||
4. [Ignore VMs](#ignore-vms)
|
||||
5. [Pin VMs to Hypervisor Nodes](#pin-vms-to-hypervisor-nodes)
|
||||
2. [API Loadbalancing](#api-loadbalancing)
|
||||
3. [Ignore Host-Nodes or Guests](#ignore-host-nodes-or-guests)
|
||||
4. [IPv6 Support](#ipv6-support)
|
||||
5. [Logging / Log-Level](#logging--log-level)
|
||||
6. [Parallel Migrations](#parallel-migrations)
|
||||
7. [Run as a Systemd-Service](#run-as-a-systemd-service)
|
||||
8. [SSL Self-Signed Certificates](#ssl-self-signed-certificates)
|
||||
|
||||
## Authentication / User Accounts / Permissions
|
||||
### Authentication
|
||||
ProxLB supports the traditional username and password authentication method, which is familiar to many users. This method requires users to provide their credentials (username and password) to gain access to the Proxmox system. While this method is straightforward and easy to implement, it has several security limitations. Username and password combinations can be vulnerable to brute force attacks, where an attacker systematically attempts various combinations until the correct one is found. If a user's credentials are compromised through phishing, malware, or other means, the attacker can gain unauthorized access to the system. Additionally, traditional authentication does not provide granular control over permissions and access levels, potentially exposing sensitive operations to unauthorized users.
|
||||
|
||||
To enhance security, ProxLB supports API token authentication. API tokens are unique identifiers that are used to authenticate API requests. They offer several advantages over traditional username and password authentication. API tokens are more secure as they are typically long, random strings that are difficult to guess. They can be revoked and regenerated as needed, reducing the risk of unauthorized access. API tokens can be associated with specific user accounts that have only the required permissions, ensuring that users only have access to the resources and operations they need. Furthermore, API tokens can be used for automated scripts and applications, facilitating seamless integration with other systems and services.
|
||||
|
||||
When Multi-Factor Authentication (MFA) or Two-Factor Authentication (2FA) is enabled in the Proxmox cluster, the system enforces the use of API tokens for authentication. This is because traditional username and password authentication is not considered secure enough in conjunction with MFA/2FA. To ensure the highest level of security when using API tokens, follow these best practices: Use dedicated user accounts for API tokens, each with only the necessary permissions. This limits the potential impact of a compromised token. Ensure that API tokens are long, random, and unique. Avoid using easily guessable patterns or sequences. Periodically regenerate and replace API tokens to minimize the risk of long-term exposure. Store API tokens securely, using environment variables or secure vaults. Avoid hardcoding tokens in source code or configuration files. Regularly monitor and audit the usage of API tokens to detect any suspicious activity or unauthorized access.
|
||||
|
||||
### Creating a Dedicated User
|
||||
It is advisable to avoid using the default root@pam user for balancing tasks in ProxLB. Instead, creating a dedicated user account is recommended and can be done easily. You can create a new user through the GUI, API, or CLI. While the detailed roles required for balancing are outlined in the next chapter, you can also use the following CLI commands to create a user with the necessary roles to manage Virtual Machines (VMs) and Containers (CTs):
|
||||
|
||||
```
|
||||
pveum role add proxlb --privs Datastore.Audit,Sys.Audit,VM.Audit,VM.Migrate
|
||||
pveum user add proxlb@pve --password <password>
|
||||
pveum acl modify / --roles proxlb --users proxlb@pve
|
||||
```
|
||||
|
||||
*Note: The user management can also be done on the WebUI without invoking the CLI.*
|
||||
|
||||
### Creating an API Token for a User
|
||||
Create an API token for user proxlb@pve with token ID proxlb and deactivated privilege separation:
|
||||
```
|
||||
pveum user token add proxlb@pve proxlb --privsep 0
|
||||
```
|
||||
|
||||
Afterwards, you get the token secret returned. You can now add those entries to your ProxLB config. Make sure, that you also keep the `user` parameter, next to the new token parameters.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> The parameter `pass` then needs to be **absent**! You should also take care about the privilege and authentication mechanism behind Proxmox. You might want or even might not want to use privilege separation and this is up to your personal needs and use case.
|
||||
|
||||
| Proxmox API | ProxLB Config | Example |
|
||||
|---|---|---|
|
||||
| User | [user](https://github.com/gyptazy/ProxLB/blob/main/config/proxlb_example.yaml#L3) | proxlb@pve |
|
||||
| Token ID | [token_id](https://github.com/gyptazy/ProxLB/blob/main/config/proxlb_example.yaml#L6) | proxlb |
|
||||
| Token Secret | [token_secret](https://github.com/gyptazy/ProxLB/blob/main/config/proxlb_example.yaml#L7) | 430e308f-1337-1337-beef-1337beefcafe |
|
||||
|
||||
*Note: The API token configuration can also be done on the WebUI without invoking the CLI.*
|
||||
|
||||
### Required Permissions for a User
|
||||
To ensure that ProxLB operates effectively and securely, it is essential to assign the appropriate permissions to the user accounts responsible for managing the load balancing tasks. The following permissions are the minimum required for a user to perform essential ProxLB operations:
|
||||
|
||||
* `Datastore.Audit`: Grants the ability to audit and view datastore information.
|
||||
* `Sys.Audit`: Allows the user to audit and view system information.
|
||||
* `VM.Audit`: Enables the user to audit and view virtual machine details.
|
||||
* `VM.Migrate`: Provides the permission to migrate virtual machines.
|
||||
|
||||
Assigning these permissions ensures that the user can access necessary information and perform critical operations related to load balancing without granting excessive privileges. This practice helps maintain a secure and efficient ProxLB environment.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Affinity & Anti-Affinity Rules
|
||||
ProxLB provides an advanced mechanism to define affinity and anti-affinity rules, enabling precise control over virtual machine (VM) placement. These rules help manage resource distribution, improve high availability configurations, and optimize performance within a Proxmox Virtual Environment (PVE) cluster. By leveraging Proxmox’s integrated access management, ProxLB ensures that users can only define and manage rules for guests they have permission to access.
|
||||
|
||||
ProxLB implements affinity and anti-affinity rules through a tag-based system within the Proxmox web interface. Each guest (virtual machine or container) can be assigned specific tags, which then dictate its placement behavior. This method maintains a streamlined and secure approach to managing VM relationships while preserving Proxmox’s inherent permission model.
|
||||
|
||||
#### Affinity Rules
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-affinity-rules.jpg"/> Affinity rules are used to group certain VMs together, ensuring that they run on the same host whenever possible. This can be beneficial for workloads requiring low-latency communication, such as clustered databases or application servers that frequently exchange data.
|
||||
|
||||
To define an affinity rule which keeps all guests assigned to this tag together on a node, users assign a tag with the prefix `plb_affinity_$TAG`:
|
||||
|
||||
##### Example for Screenshot
|
||||
```
|
||||
plb_affinity_talos
|
||||
```
|
||||
|
||||
As a result, ProxLB will attempt to place all VMs with the `plb_affinity_web` tag on the same host (see also the attached screenshot with the same node).
|
||||
|
||||
#### Anti-Affinity Rules
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-anti-affinity-rules.jpg"/> Conversely, anti-affinity rules ensure that designated VMs do not run on the same physical host. This is particularly useful for high-availability setups, where redundancy is crucial. Ensuring that critical services are distributed across multiple hosts reduces the risk of a single point of failure.
|
||||
|
||||
To define an anti-affinity rule that ensures to not move systems within this group to the same node, users assign a tag with the prefix:
|
||||
|
||||
##### Example for Screenshot
|
||||
```
|
||||
plb_anti_affinity_ntp
|
||||
```
|
||||
|
||||
As a result, ProxLB will try to place the VMs with the `plb_anti_affinity_ntp` tag on different hosts (see also the attached screenshot with the different nodes).
|
||||
|
||||
**Note:** While this ensures that ProxLB tries distribute these VMs across different physical hosts within the Proxmox cluster this may not always work. If you have more guests attached to the group than nodes in the cluster, we still need to run them anywhere. If this case occurs, the next one with the most free resources will be selected.
|
||||
|
||||
### Affinity / Anti-Affinity Enforcing
|
||||
When a cluster is already balanced and does not require further adjustments, enabling the enforce_affinity parameter ensures that affinity and anti-affinity rules are still respected. This parameter prioritizes the placement of guest objects according to these rules, even if it leads to slight resource imbalances or increased migration overhead. Regularly reviewing and updating these rules, along with monitoring cluster performance, helps maintain optimal performance and reliability. By carefully managing these aspects, you can create a cluster environment that meets your specific needs and maintains a good balance of resources.
|
||||
|
||||
```
|
||||
balancing:
|
||||
enforce_affinity: True
|
||||
```
|
||||
|
||||
*Note: This may have impacts to the cluster. Depending on the created group matrix, the result may also be an unbalanced cluster.*
|
||||
|
||||
### Ignore VMs / CTs
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-ignore-vm-movement.jpg"/> Guests, such as VMs or CTs, can also be completely ignored. This means, they won't be affected by any migration (even when (anti-)affinity rules are enforced). To ensure a proper resource evaluation, these guests are still collected and evaluated but simply skipped for balancing actions. Another thing is the implementation. While ProxLB might have a very restricted configuration file including the file permissions, this file is only read- and writeable by the Proxmox administrators. However, we might have user and groups who want to define on their own that their systems shouldn't be moved. Therefore, these users can simpy set a specific tag to the guest object - just like the (anti)affinity rules.
|
||||
|
||||
To define a guest to be ignored from the balancing, users assign a tag with the prefix `plb_ignore_$TAG`:
|
||||
|
||||
#### Example for Screenshot
|
||||
```
|
||||
plb_ignore_dev
|
||||
```
|
||||
|
||||
As a result, ProxLB will not migrate this guest with the `plb_ignore_dev` tag to any other node.
|
||||
|
||||
**Note:** Ignored guests are really ignored. Even by enforcing affinity rules this guest will be ignored.
|
||||
|
||||
### Pin VMs to Specific Hypervisor Nodes
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-tag-node-pinning.jpg"/> Guests, such as VMs or CTs, can also be pinned to specific nodes in the cluster. This might be usefull when running applications with some special licensing requirements that are only fulfilled on certain nodes. It might also be interesting, when some physical hardware is attached to a node, that is not available in general within the cluster.
|
||||
|
||||
To pin a guest to a specific cluster node, users assign a tag with the prefix `plb_pin_$nodename` to the desired guest:
|
||||
|
||||
#### Example for Screenshot
|
||||
```
|
||||
plb_pin_node03
|
||||
```
|
||||
|
||||
As a result, ProxLB will pin the guest `dev-vm01` to the node `virt03`.
|
||||
|
||||
**Note:** The given node names from the tag are validated. This means, ProxLB validated if the given node name is really part of the cluster. In case of a wrongly defined or unavailable node name it continous to use the regular processes to make sure the guest keeps running.
|
||||
|
||||
### API Loadbalancing
|
||||
ProxLB supports API loadbalancing, where one or more host objects can be defined as a list. This ensures, that you can even operator ProxLB without further changes when one or more nodes are offline or in a maintenance. When defining multiple hosts, the first reachable one will be picked.
|
||||
|
||||
```
|
||||
proxmox_api:
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe']
|
||||
```
|
||||
|
||||
### Ignore Host-Nodes or Guests
|
||||
In managing a Proxmox environment, it's often necessary to exclude certain host nodes and guests from various operations. For host nodes, this exclusion can be achieved by specifying them in the ignore_nodes parameter within the proxmox_api chapter, effectively preventing any automated processes from interacting with these nodes. Guests, on the other hand, can be ignored by assigning them a specific tag that starts with or is equal to plb_ignore, ensuring they are omitted from any automated tasks or monitoring. By implementing these configurations, administrators can fine-tune their Proxmox management to focus only on relevant nodes and guests, optimizing operational efficiency and resource allocation.
|
||||
|
||||
```
|
||||
proxmox_cluster:
|
||||
ignore_nodes: ['node01', 'node02']
|
||||
```
|
||||
|
||||
### IPv6 Support
|
||||
Yes, ProxLB fully supports IPv6.
|
||||
|
||||
### Logging / Log-Level
|
||||
ProxLB supports systemd for seamless service management on Linux distributions. To enable this, create a proxLB.service file in /etc/systemd/system/ from `service/proxlb.service` within this repository.
|
||||
|
||||
On systems without systemd, such as FreeBSD and macOS, ProxLB runs with similar configurations but logs to stdout and stderr. The logging level and verbosity can be set in the `service` section of the configuration file:
|
||||
|
||||
```
|
||||
service:
|
||||
log_level: DEBUG
|
||||
```
|
||||
|
||||
ProxLB only support the following log levels:
|
||||
* INFO
|
||||
* WARNING
|
||||
* CRITICAL
|
||||
* DEBUG
|
||||
|
||||
### Parallel Migrations
|
||||
By default, parallel migrations are deactivated. This means, that a guest object gets migrated and the migration job is being watched until the VM or CT got moved to a new node. However, this may take a lot of time and many environments are fast enough to handle the IO load for multiple guest objects. However, there are always corner cases and this depends on your setup. Parallel migrations can be enabled by setting `parallel` to `True` within the `balancing` chapter:
|
||||
|
||||
```
|
||||
balancing:
|
||||
parallel: False
|
||||
```
|
||||
|
||||
### Run as a Systemd-Service
|
||||
The proxlb systemd unit orchestrates the ProxLB application. ProxLB can be used either as a one-shot solution or run periodically, depending on the configuration specified in the daemon chapter of its configuration file.
|
||||
|
||||
```
|
||||
service:
|
||||
daemon: False
|
||||
schedule:
|
||||
interval: 12
|
||||
format: hours
|
||||
```
|
||||
|
||||
In this configuration:
|
||||
* `daemon`: False indicates that the ProxLB application is not running as a daemon and will execute as a one-shot solution.
|
||||
* `schedule`: 12 defines the interval for the schedule, specifying how often rebalancing should be done if running as a daemon.
|
||||
* `format`: Defines the given format of schedule where you can choose between `hours` or `minutes`.
|
||||
|
||||
### SSL Self-Signed Certificates
|
||||
If you are using SSL self-signed certificates or non-valid certificated in general and do not want to deal with additional trust levels, you may also disable the SSL validation. This may mostly be helpful for dev- & test labs.
|
||||
|
||||
SSL certificate validation can be disabled in the `proxmox_api` section in the config file by setting:
|
||||
```
|
||||
proxmox_api:
|
||||
ssl_verification: False
|
||||
```
|
||||
|
||||
*Note: Disabling SSL certificate validation is not recommended.*
|
||||
24
docs/99-faq.md
Normal file
24
docs/99-faq.md
Normal file
@@ -0,0 +1,24 @@
|
||||
## Table of Contents
|
||||
|
||||
1. [GUI Integration](#gui-integration)
|
||||
- [How to install pve-proxmoxlb-service-ui package](https://github.com/gyptazy/ProxLB/issues/44)
|
||||
2. [Proxmox HA Integration](#proxmox-ha-integration)
|
||||
- [Host groups: Honour HA groups](https://github.com/gyptazy/ProxLB/issues/65)
|
||||
|
||||
### GUI Integration
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-GUI-integration.jpg"/> ProxLB can also be accessed through the Proxmox Web UI by installing the optional `pve-proxmoxlb-service-ui` package, which depends on the proxlb package. For full Web UI integration, this package must be installed on all nodes within the cluster. Once installed, a new menu item - `Rebalancing`, appears in the cluster level under the HA section. Once installed, it offers two key functionalities:
|
||||
* Rebalancing VM workloads
|
||||
* Migrate VM workloads away from a defined node (e.g. maintenance preparation)
|
||||
|
||||
**Note:** This package is currently discontinued and will be readded at a later time. See also: [#44: How to install pve-proxmoxlb-service-ui package](https://github.com/gyptazy/ProxLB/issues/44).
|
||||
|
||||
### Proxmox HA Integration
|
||||
Proxmox HA (High Availability) groups are designed to ensure that virtual machines (VMs) remain running within a Proxmox cluster. HA groups define specific rules for where VMs should be started or migrated in case of node failures, ensuring minimal downtime and automatic recovery.
|
||||
|
||||
However, when used in conjunction with ProxLB, the built-in load balancer for Proxmox, conflicts can arise. ProxLB operates with its own logic for workload distribution, taking into account affinity and anti-affinity rules. While it effectively balances guest workloads, it may re-shift and redistribute VMs in a way that does not align with HA group constraints, potentially leading to unsuitable placements.
|
||||
|
||||
Due to these conflicts, it is currently not recommended to use both HA groups and ProxLB simultaneously. The interaction between the two mechanisms can lead to unexpected behavior, where VMs might not adhere to HA group rules after being moved by ProxLB.
|
||||
|
||||
A solution to improve compatibility between HA groups and ProxLB is under evaluation, aiming to ensure that both features can work together without disrupting VM placement strategies.
|
||||
|
||||
See also: [#65: Host groups: Honour HA groups](https://github.com/gyptazy/ProxLB/issues/65).
|
||||
6
misc/01-replace-version.sh
Normal file
6
misc/01-replace-version.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
VERSION="1.1.2b"
|
||||
|
||||
sed -i "s/^__version__ = .*/__version__ = \"$VERSION\"/" "proxlb/utils/version.py"
|
||||
sed -i "s/version=\"[0-9]*\.[0-9]*\.[0-9]*\"/version=\"$VERSION\"/" setup.py
|
||||
echo "OK: Versions have been sucessfully set to $VERSION"
|
||||
4
misc/02-create-changelog.sh
Normal file
4
misc/02-create-changelog.sh
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
git clone https://github.com/gyptazy/changelog-fragments-creator.git
|
||||
./changelog-fragments-creator/changelog-creator -f .changelogs/ -o CHANGELOG.md
|
||||
echo "Created changelog file"
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/bash
|
||||
sudo apt-get install rpm cmake git make python3-yaml
|
||||
|
||||
git clone https://github.com/gyptazy/changelog-fragments-creator.git
|
||||
./changelog-fragments-creator/changelog-creator -f ../.changelogs/ -o ../CHANGELOG.md
|
||||
mkdir packages
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cpack -G DEB .
|
||||
cpack -G RPM .
|
||||
cp *.deb ../packages
|
||||
cp *.rpm ../packages
|
||||
cd ..
|
||||
rm -rf build
|
||||
echo "Packages created. Packages can be found in directory: packages"
|
||||
@@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
git clone https://github.com/gyptazy/changelog-fragments-creator.git
|
||||
./changelog-fragments-creator/changelog-creator -f ../.changelogs/ -o ../CHANGELOG.md
|
||||
echo "Created changelog file"
|
||||
@@ -1,40 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
project(proxmox-rebalancing-service VERSION 1.0.2)
|
||||
|
||||
install(PROGRAMS ../proxlb DESTINATION /bin)
|
||||
install(FILES ../proxlb.conf DESTINATION /etc/proxlb)
|
||||
install(FILES proxlb.service DESTINATION /etc/systemd/system)
|
||||
|
||||
# General
|
||||
set(CPACK_PACKAGE_NAME "proxlb")
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/../LICENSE")
|
||||
set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/../README.md")
|
||||
set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Florian Paul Azim <gyptazy> Hoberg <gyptazy@gyptazy.ch>")
|
||||
set(CPACK_PACKAGE_CONTACT "Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>")
|
||||
set(CPACK_PACKAGE_VENDOR "gyptazy")
|
||||
|
||||
# RPM packaging
|
||||
set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION})
|
||||
set(CPACK_GENERATOR "RPM")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "amd64")
|
||||
set(CPACK_RPM_PACKAGE_SUMMARY "ProxLB - Rebalance VM workloads across nodes in Proxmox clusters.")
|
||||
set(CPACK_RPM_PACKAGE_DESCRIPTION "ProxLB - Rebalance VM workloads across nodes in Proxmox clusters.")
|
||||
set(CPACK_RPM_CHANGELOG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/changelog_redhat")
|
||||
set(CPACK_PACKAGE_RELEASE 1)
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "GPL 3.0")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "python >= 3.2.0")
|
||||
|
||||
# DEB packaging
|
||||
set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT)
|
||||
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64")
|
||||
set(CPACK_DEBIAN_PACKAGE_SUMMARY "ProxLB - Rebalance VM workloads across nodes in Proxmox clusters.")
|
||||
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "ProxLB - Rebalance VM workloads across nodes in Proxmox clusters.")
|
||||
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/changelog_debian")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "python3, python3-proxmoxer")
|
||||
set(CPACK_DEBIAN_PACKAGE_LICENSE "GPL 3.0")
|
||||
|
||||
# Install
|
||||
set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
|
||||
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/postinst;${CMAKE_CURRENT_SOURCE_DIR}/conffiles")
|
||||
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/postinst")
|
||||
include(CPack)
|
||||
@@ -1,14 +0,0 @@
|
||||
## Build packages
|
||||
Building the packages requires cmake, deb and rpm.
|
||||
For building packages, simly run the following commands:
|
||||
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cpack -G RPM .
|
||||
cpack -G DEB .
|
||||
```
|
||||
|
||||
When running on Debian/Ubuntu you can directly call `01_package.sh`
|
||||
to create your own packages.
|
||||
@@ -1,21 +0,0 @@
|
||||
proxlb (1.0.2) unstable; urgency=low
|
||||
|
||||
* Add option to run migration in parallel or sequentially.
|
||||
* Add option to run ProxLB only on a Proxmox cluster master (req. HA feature).
|
||||
* Fix daemon timer to use hours instead of minutes.
|
||||
* Fix CMake packaging for Debian package to avoid overwriting the config file.
|
||||
* Fix some wonkey code styles.
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.ch> Tue, 13 Aug 2024 17:28:14 +0200
|
||||
|
||||
proxlb (1.0.0) unstable; urgency=low
|
||||
|
||||
* Initial release of ProxLB.
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.ch> Thu, 01 Aug 2024 17:04:12 +0200
|
||||
|
||||
proxlb (0.9.0) unstable; urgency=low
|
||||
|
||||
* Initial development release of ProxLB as a tech preview.
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.ch> Sun, 07 Jul 2024 05:38:41 +0200
|
||||
@@ -1,11 +0,0 @@
|
||||
* Tue Aug 13 2024 Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>
|
||||
- Add option to run migration in parallel or sequentially.
|
||||
- Add option to run ProxLB only on a Proxmox cluster master (req. HA feature).
|
||||
- Fixed daemon timer to use hours instead of minutes.
|
||||
- Fixed some wonkey code styles.
|
||||
|
||||
* Thu Aug 01 2024 Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>
|
||||
- Initial release of ProxLB.
|
||||
|
||||
* Sun Jul 07 2024 Florian Paul Azim Hoberg <gyptazy@gyptazy.ch>
|
||||
- Initial development release of ProxLB as a tech preview.
|
||||
@@ -1 +0,0 @@
|
||||
/etc/proxlb/proxlb.conf
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
useradd -m plb
|
||||
chown plb:plb /etc/proxlb/proxlb.conf
|
||||
chmod 600 /etc/proxlb/proxlb.conf
|
||||
systemctl daemon-reload
|
||||
@@ -1,6 +0,0 @@
|
||||
[Unit]
|
||||
Description=ProxLB - Rebalance VM workloads
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/bin/proxlb -c /etc/proxlb/proxlb.conf
|
||||
User=plb
|
||||
933
proxlb
933
proxlb
@@ -1,933 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# ProxLB
|
||||
# ProxLB (re)balances VM workloads across nodes in Proxmox clusters.
|
||||
# ProxLB obtains current metrics from all nodes within the cluster for
|
||||
# further auto balancing by memory, disk or cpu and rebalances the VMs
|
||||
# over all available nodes in a cluster by having an equal resource usage.
|
||||
# Copyright (C) 2024 Florian Paul Azim Hoberg @gyptazy <gyptazy@gyptazy.ch>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import configparser
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
try:
|
||||
import proxmoxer
|
||||
_imports = True
|
||||
except ImportError:
|
||||
_imports = False
|
||||
import random
|
||||
import re
|
||||
import requests
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import urllib3
|
||||
|
||||
|
||||
# Constants
|
||||
__appname__ = "ProxLB"
|
||||
__version__ = "1.0.2"
|
||||
__author__ = "Florian Paul Azim Hoberg <gyptazy@gyptazy.ch> @gyptazy"
|
||||
__errors__ = False
|
||||
|
||||
|
||||
# Classes
|
||||
## Logging class
|
||||
class SystemdHandler(logging.Handler):
|
||||
""" Class to handle logging options. """
|
||||
PREFIX = {
|
||||
logging.CRITICAL: "<2> " + __appname__ + ": ",
|
||||
logging.ERROR: "<3> " + __appname__ + ": ",
|
||||
logging.WARNING: "<4> " + __appname__ + ": ",
|
||||
logging.INFO: "<6> " + __appname__ + ": ",
|
||||
logging.DEBUG: "<7> " + __appname__ + ": ",
|
||||
logging.NOTSET: "<7 " + __appname__ + ": ",
|
||||
}
|
||||
|
||||
def __init__(self, stream=sys.stdout):
|
||||
self.stream = stream
|
||||
logging.Handler.__init__(self)
|
||||
|
||||
def emit(self, record):
|
||||
try:
|
||||
msg = self.PREFIX[record.levelno] + self.format(record) + "\n"
|
||||
self.stream.write(msg)
|
||||
self.stream.flush()
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
|
||||
# Functions
|
||||
def initialize_logger(log_level, update_log_verbosity=False):
|
||||
""" Initialize ProxLB logging handler. """
|
||||
info_prefix = 'Info: [logger]:'
|
||||
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(log_level)
|
||||
|
||||
if not update_log_verbosity:
|
||||
root_logger.addHandler(SystemdHandler())
|
||||
logging.info(f'{info_prefix} Logger got initialized.')
|
||||
else:
|
||||
logging.info(f'{info_prefix} Logger verbosity got updated to: {log_level}.')
|
||||
|
||||
|
||||
def pre_validations(config_path):
|
||||
""" Run pre-validations as sanity checks. """
|
||||
info_prefix = 'Info: [pre-validations]:'
|
||||
|
||||
__validate_imports()
|
||||
__validate_config_file(config_path)
|
||||
logging.info(f'{info_prefix} All pre-validations done.')
|
||||
|
||||
|
||||
def post_validations():
|
||||
""" Run post-validations as sanity checks. """
|
||||
error_prefix = 'Error: [post-validations]:'
|
||||
info_prefix = 'Info: [post-validations]:'
|
||||
|
||||
if __errors__:
|
||||
logging.critical(f'{error_prefix} Not all post-validations succeeded. Please validate!')
|
||||
else:
|
||||
logging.info(f'{info_prefix} All post-validations succeeded.')
|
||||
|
||||
|
||||
def validate_daemon(daemon, schedule):
|
||||
""" Validate if ProxLB runs as a daemon. """
|
||||
info_prefix = 'Info: [daemon]:'
|
||||
|
||||
if bool(int(daemon)):
|
||||
logging.info(f'{info_prefix} Running in daemon mode. Next run in {schedule} hours.')
|
||||
time.sleep(int(schedule) * 60 * 60)
|
||||
else:
|
||||
logging.info(f'{info_prefix} Not running in daemon mode. Quitting.')
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def __validate_imports():
|
||||
""" Validate if all Python imports succeeded. """
|
||||
error_prefix = 'Error: [python-imports]:'
|
||||
info_prefix = 'Info: [python-imports]:'
|
||||
|
||||
if not _imports:
|
||||
logging.critical(f'{error_prefix} Could not import all dependencies. Please install "proxmoxer".')
|
||||
sys.exit(2)
|
||||
else:
|
||||
logging.info(f'{info_prefix} All required dependencies were imported.')
|
||||
|
||||
|
||||
def __validate_config_file(config_path):
|
||||
""" Validate if all Python imports succeeded. """
|
||||
error_prefix = 'Error: [config]:'
|
||||
info_prefix = 'Info: [config]:'
|
||||
|
||||
if not os.path.isfile(config_path):
|
||||
logging.critical(f'{error_prefix} Could not find config file in: {config_path}.')
|
||||
sys.exit(2)
|
||||
else:
|
||||
logging.info(f'{info_prefix} Configuration file loaded from: {config_path}.')
|
||||
|
||||
|
||||
def initialize_args():
|
||||
""" Initialize given arguments for ProxLB. """
|
||||
argparser = argparse.ArgumentParser(description='ProxLB')
|
||||
argparser.add_argument('-c', '--config', type=str, help='Path to config file.', required=False)
|
||||
argparser.add_argument('-d', '--dry-run', help='Perform a dry-run without doing any actions.', action='store_true', required=False)
|
||||
argparser.add_argument('-j', '--json', help='Return a JSON of the VM movement.', action='store_true', required=False)
|
||||
return argparser.parse_args()
|
||||
|
||||
|
||||
def initialize_config_path(app_args):
|
||||
""" Initialize path to ProxLB config file. """
|
||||
info_prefix = 'Info: [config]:'
|
||||
|
||||
config_path = app_args.config
|
||||
if app_args.config is None:
|
||||
config_path = '/etc/proxlb/proxlb.conf'
|
||||
logging.info(f'{info_prefix} No config file provided. Falling back to: {config_path}.')
|
||||
else:
|
||||
logging.info(f'{info_prefix} Using config file: {config_path}.')
|
||||
return config_path
|
||||
|
||||
|
||||
def initialize_config_options(config_path):
|
||||
""" Read configuration from given config file for ProxLB. """
|
||||
error_prefix = 'Error: [config]:'
|
||||
info_prefix = 'Info: [config]:'
|
||||
|
||||
try:
|
||||
config = configparser.ConfigParser()
|
||||
config.read(config_path)
|
||||
# Proxmox config
|
||||
proxmox_api_host = config['proxmox']['api_host']
|
||||
proxmox_api_user = config['proxmox']['api_user']
|
||||
proxmox_api_pass = config['proxmox']['api_pass']
|
||||
proxmox_api_ssl_v = config['proxmox']['verify_ssl']
|
||||
# Balancing
|
||||
balancing_method = config['balancing'].get('method', 'memory')
|
||||
balancing_mode = config['balancing'].get('mode', 'used')
|
||||
balancing_mode_option = config['balancing'].get('mode_option', 'bytes')
|
||||
balancing_type = config['balancing'].get('type', 'vm')
|
||||
balanciness = config['balancing'].get('balanciness', 10)
|
||||
parallel_migrations = config['balancing'].get('parallel_migrations', 1)
|
||||
ignore_nodes = config['balancing'].get('ignore_nodes', None)
|
||||
ignore_vms = config['balancing'].get('ignore_vms', None)
|
||||
# Service
|
||||
master_only = config['service'].get('master_only', 0)
|
||||
daemon = config['service'].get('daemon', 1)
|
||||
schedule = config['service'].get('schedule', 24)
|
||||
log_verbosity = config['service'].get('log_verbosity', 'CRITICAL')
|
||||
except configparser.NoSectionError:
|
||||
logging.critical(f'{error_prefix} Could not find the required section.')
|
||||
sys.exit(2)
|
||||
except configparser.ParsingError:
|
||||
logging.critical(f'{error_prefix} Unable to parse the config file.')
|
||||
sys.exit(2)
|
||||
except KeyError:
|
||||
logging.critical(f'{error_prefix} Could not find the required options in config file.')
|
||||
sys.exit(2)
|
||||
|
||||
logging.info(f'{info_prefix} Configuration file loaded.')
|
||||
return proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v, balancing_method, balancing_mode, balancing_mode_option, \
|
||||
balancing_type, balanciness, parallel_migrations, ignore_nodes, ignore_vms, master_only, daemon, schedule, log_verbosity
|
||||
|
||||
|
||||
def api_connect(proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v):
|
||||
""" Connect and authenticate to the Proxmox remote API. """
|
||||
error_prefix = 'Error: [api-connection]:'
|
||||
warn_prefix = 'Warning: [api-connection]:'
|
||||
info_prefix = 'Info: [api-connection]:'
|
||||
proxmox_api_ssl_v = bool(int(proxmox_api_ssl_v))
|
||||
|
||||
if not proxmox_api_ssl_v:
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
logging.warning(f'{warn_prefix} API connection does not verify SSL certificate.')
|
||||
|
||||
try:
|
||||
api_object = proxmoxer.ProxmoxAPI(proxmox_api_host, user=proxmox_api_user, password=proxmox_api_pass, verify_ssl=proxmox_api_ssl_v)
|
||||
except urllib3.exceptions.NameResolutionError:
|
||||
logging.critical(f'{error_prefix} Could not resolve the given host: {proxmox_api_host}.')
|
||||
sys.exit(2)
|
||||
except requests.exceptions.ConnectTimeout:
|
||||
logging.critical(f'{error_prefix} Connection time out to host: {proxmox_api_host}.')
|
||||
sys.exit(2)
|
||||
except requests.exceptions.SSLError:
|
||||
logging.critical(f'{error_prefix} SSL certificate verification failed for host: {proxmox_api_host}.')
|
||||
sys.exit(2)
|
||||
|
||||
logging.info(f'{info_prefix} API connection succeeded to host: {proxmox_api_host}.')
|
||||
return api_object
|
||||
|
||||
|
||||
def execute_rebalancing_only_by_master(api_object, master_only):
|
||||
""" Validate if balancing should only be done by the cluster master. Afterwards, validate if this node is the cluster master. """
|
||||
info_prefix = 'Info: [only-on-master-executor]:'
|
||||
master_only = bool(int(master_only))
|
||||
|
||||
if bool(int(master_only)):
|
||||
logging.info(f'{info_prefix} Master only rebalancing is defined. Starting validation.')
|
||||
cluster_master_node = get_cluster_master(api_object)
|
||||
cluster_master = validate_cluster_master(cluster_master_node)
|
||||
return cluster_master, master_only
|
||||
else:
|
||||
logging.info(f'{info_prefix} No master only rebalancing is defined. Skipping validation.')
|
||||
return False, master_only
|
||||
|
||||
|
||||
def get_cluster_master(api_object):
|
||||
""" Get the current master of the Proxmox cluster. """
|
||||
error_prefix = 'Error: [cluster-master-getter]:'
|
||||
info_prefix = 'Info: [cluster-master-getter]:'
|
||||
|
||||
try:
|
||||
ha_status_object = api_object.cluster().ha().status().manager_status().get()
|
||||
logging.info(f'{info_prefix} Master node: {ha_status_object.get("manager_status", None).get("master_node", None)}')
|
||||
except urllib3.exceptions.NameResolutionError:
|
||||
logging.critical(f'{error_prefix} Could not resolve the API.')
|
||||
sys.exit(2)
|
||||
except requests.exceptions.ConnectTimeout:
|
||||
logging.critical(f'{error_prefix} Connection time out to API.')
|
||||
sys.exit(2)
|
||||
except requests.exceptions.SSLError:
|
||||
logging.critical(f'{error_prefix} SSL certificate verification failed for API.')
|
||||
sys.exit(2)
|
||||
|
||||
cluster_master = ha_status_object.get("manager_status", None).get("master_node", None)
|
||||
|
||||
if cluster_master:
|
||||
return cluster_master
|
||||
else:
|
||||
logging.critical(f'{error_prefix} Could not obtain cluster master. Please check your configuration - stopping.')
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
def validate_cluster_master(cluster_master):
|
||||
""" Validate if the current execution node is the cluster master. """
|
||||
info_prefix = 'Info: [cluster-master-validator]:'
|
||||
|
||||
node_executor_hostname = socket.gethostname()
|
||||
logging.info(f'{info_prefix} Node executor hostname is: {node_executor_hostname}')
|
||||
|
||||
if node_executor_hostname != cluster_master:
|
||||
logging.info(f'{info_prefix} {node_executor_hostname} is not the cluster master ({cluster_master}).')
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def get_node_statistics(api_object, ignore_nodes):
|
||||
""" Get statistics of cpu, memory and disk for each node in the cluster. """
|
||||
info_prefix = 'Info: [node-statistics]:'
|
||||
node_statistics = {}
|
||||
ignore_nodes_list = ignore_nodes.split(',')
|
||||
|
||||
for node in api_object.nodes.get():
|
||||
if node['status'] == 'online' and node['node'] not in ignore_nodes_list:
|
||||
node_statistics[node['node']] = {}
|
||||
node_statistics[node['node']]['cpu_total'] = node['maxcpu']
|
||||
node_statistics[node['node']]['cpu_assigned'] = node['cpu']
|
||||
node_statistics[node['node']]['cpu_assigned_percent'] = int((node_statistics[node['node']]['cpu_assigned']) / int(node_statistics[node['node']]['cpu_total']) * 100)
|
||||
node_statistics[node['node']]['cpu_assigned_percent_last_run'] = 0
|
||||
node_statistics[node['node']]['cpu_used'] = 0
|
||||
node_statistics[node['node']]['cpu_free'] = int(node['maxcpu']) - int(node['cpu'])
|
||||
node_statistics[node['node']]['cpu_free_percent'] = int((node_statistics[node['node']]['cpu_free']) / int(node['maxcpu']) * 100)
|
||||
node_statistics[node['node']]['cpu_free_percent_last_run'] = 0
|
||||
node_statistics[node['node']]['memory_total'] = node['maxmem']
|
||||
node_statistics[node['node']]['memory_assigned'] = 0
|
||||
node_statistics[node['node']]['memory_assigned_percent'] = int((node_statistics[node['node']]['memory_assigned']) / int(node_statistics[node['node']]['memory_total']) * 100)
|
||||
node_statistics[node['node']]['memory_assigned_percent_last_run'] = 0
|
||||
node_statistics[node['node']]['memory_used'] = node['mem']
|
||||
node_statistics[node['node']]['memory_free'] = int(node['maxmem']) - int(node['mem'])
|
||||
node_statistics[node['node']]['memory_free_percent'] = int((node_statistics[node['node']]['memory_free']) / int(node['maxmem']) * 100)
|
||||
node_statistics[node['node']]['memory_free_percent_last_run'] = 0
|
||||
node_statistics[node['node']]['disk_total'] = node['maxdisk']
|
||||
node_statistics[node['node']]['disk_assigned'] = 0
|
||||
node_statistics[node['node']]['disk_assigned_percent'] = int((node_statistics[node['node']]['disk_assigned']) / int(node_statistics[node['node']]['disk_total']) * 100)
|
||||
node_statistics[node['node']]['disk_assigned_percent_last_run'] = 0
|
||||
node_statistics[node['node']]['disk_used'] = node['disk']
|
||||
node_statistics[node['node']]['disk_free'] = int(node['maxdisk']) - int(node['disk'])
|
||||
node_statistics[node['node']]['disk_free_percent'] = int((node_statistics[node['node']]['disk_free']) / int(node['maxdisk']) * 100)
|
||||
node_statistics[node['node']]['disk_free_percent_last_run'] = 0
|
||||
logging.info(f'{info_prefix} Added node {node["node"]}.')
|
||||
|
||||
logging.info(f'{info_prefix} Created node statistics.')
|
||||
return node_statistics
|
||||
|
||||
|
||||
def get_vm_statistics(api_object, ignore_vms, balancing_type):
|
||||
""" Get statistics of cpu, memory and disk for each vm in the cluster. """
|
||||
info_prefix = 'Info: [vm-statistics]:'
|
||||
warn_prefix = 'Warn: [vm-statistics]:'
|
||||
vm_statistics = {}
|
||||
ignore_vms_list = ignore_vms.split(',')
|
||||
group_include = None
|
||||
group_exclude = None
|
||||
vm_ignore = None
|
||||
vm_ignore_wildcard = False
|
||||
|
||||
# Wildcard support: Initially validate if we need to honour
|
||||
# any wildcards within the vm_ignore list.
|
||||
vm_ignore_wildcard = __validate_ignore_vm_wildcard(ignore_vms)
|
||||
|
||||
for node in api_object.nodes.get():
|
||||
|
||||
# Add all virtual machines if type is vm or all.
|
||||
if balancing_type == 'vm' or balancing_type == 'all':
|
||||
for vm in api_object.nodes(node['node']).qemu.get():
|
||||
|
||||
# Get the VM tags from API.
|
||||
vm_tags = __get_vm_tags(api_object, node, vm['vmid'], 'vm')
|
||||
if vm_tags is not None:
|
||||
group_include, group_exclude, vm_ignore = __get_proxlb_groups(vm_tags)
|
||||
|
||||
# Get wildcard match for VMs to ignore if a wildcard pattern was
|
||||
# previously found. Wildcards may slow down the task when using
|
||||
# many patterns in the ignore list. Therefore, run this only if
|
||||
# a wildcard pattern was found. We also do not need to validate
|
||||
# this if the VM is already being ignored by a defined tag.
|
||||
if vm_ignore_wildcard and not vm_ignore:
|
||||
vm_ignore = __check_vm_name_wildcard_pattern(vm['name'], ignore_vms_list)
|
||||
|
||||
if vm['status'] == 'running' and vm['name'] not in ignore_vms_list and not vm_ignore:
|
||||
vm_statistics[vm['name']] = {}
|
||||
vm_statistics[vm['name']]['group_include'] = group_include
|
||||
vm_statistics[vm['name']]['group_exclude'] = group_exclude
|
||||
vm_statistics[vm['name']]['cpu_total'] = vm['cpus']
|
||||
vm_statistics[vm['name']]['cpu_used'] = vm['cpu']
|
||||
vm_statistics[vm['name']]['memory_total'] = vm['maxmem']
|
||||
vm_statistics[vm['name']]['memory_used'] = vm['mem']
|
||||
vm_statistics[vm['name']]['disk_total'] = vm['maxdisk']
|
||||
vm_statistics[vm['name']]['disk_used'] = vm['disk']
|
||||
vm_statistics[vm['name']]['vmid'] = vm['vmid']
|
||||
vm_statistics[vm['name']]['node_parent'] = node['node']
|
||||
vm_statistics[vm['name']]['type'] = 'vm'
|
||||
# Rebalancing node will be overwritten after calculations.
|
||||
# If the vm stays on the node, it will be removed at a
|
||||
# later time.
|
||||
vm_statistics[vm['name']]['node_rebalance'] = node['node']
|
||||
logging.info(f'{info_prefix} Added vm {vm["name"]}.')
|
||||
|
||||
# Add all containers if type is ct or all.
|
||||
if balancing_type == 'ct' or balancing_type == 'all':
|
||||
for vm in api_object.nodes(node['node']).lxc.get():
|
||||
|
||||
logging.warning(f'{warn_prefix} Rebalancing on LXC containers (CT) always requires them to shut down.')
|
||||
logging.warning(f'{warn_prefix} {vm["name"]} is from type CT and cannot be live migrated!')
|
||||
# Get the VM tags from API.
|
||||
vm_tags = __get_vm_tags(api_object, node, vm['vmid'], 'ct')
|
||||
if vm_tags is not None:
|
||||
group_include, group_exclude, vm_ignore = __get_proxlb_groups(vm_tags)
|
||||
|
||||
# Get wildcard match for VMs to ignore if a wildcard pattern was
|
||||
# previously found. Wildcards may slow down the task when using
|
||||
# many patterns in the ignore list. Therefore, run this only if
|
||||
# a wildcard pattern was found. We also do not need to validate
|
||||
# this if the VM is already being ignored by a defined tag.
|
||||
if vm_ignore_wildcard and not vm_ignore:
|
||||
vm_ignore = __check_vm_name_wildcard_pattern(vm['name'], ignore_vms_list)
|
||||
|
||||
if vm['status'] == 'running' and vm['name'] not in ignore_vms_list and not vm_ignore:
|
||||
vm_statistics[vm['name']] = {}
|
||||
vm_statistics[vm['name']]['group_include'] = group_include
|
||||
vm_statistics[vm['name']]['group_exclude'] = group_exclude
|
||||
vm_statistics[vm['name']]['cpu_total'] = vm['cpus']
|
||||
vm_statistics[vm['name']]['cpu_used'] = vm['cpu']
|
||||
vm_statistics[vm['name']]['memory_total'] = vm['maxmem']
|
||||
vm_statistics[vm['name']]['memory_used'] = vm['mem']
|
||||
vm_statistics[vm['name']]['disk_total'] = vm['maxdisk']
|
||||
vm_statistics[vm['name']]['disk_used'] = vm['disk']
|
||||
vm_statistics[vm['name']]['vmid'] = vm['vmid']
|
||||
vm_statistics[vm['name']]['node_parent'] = node['node']
|
||||
vm_statistics[vm['name']]['type'] = 'ct'
|
||||
# Rebalancing node will be overwritten after calculations.
|
||||
# If the vm stays on the node, it will be removed at a
|
||||
# later time.
|
||||
vm_statistics[vm['name']]['node_rebalance'] = node['node']
|
||||
logging.info(f'{info_prefix} Added vm {vm["name"]}.')
|
||||
|
||||
logging.info(f'{info_prefix} Created VM statistics.')
|
||||
return vm_statistics
|
||||
|
||||
|
||||
def update_node_statistics(node_statistics, vm_statistics):
|
||||
""" Update node statistics by VMs statistics. """
|
||||
info_prefix = 'Info: [node-update-statistics]:'
|
||||
warn_prefix = 'Warning: [node-update-statistics]:'
|
||||
|
||||
for vm, vm_value in vm_statistics.items():
|
||||
node_statistics[vm_value['node_parent']]['cpu_assigned'] = node_statistics[vm_value['node_parent']]['cpu_assigned'] + int(vm_value['cpu_total'])
|
||||
node_statistics[vm_value['node_parent']]['cpu_assigned_percent'] = (node_statistics[vm_value['node_parent']]['cpu_assigned'] / node_statistics[vm_value['node_parent']]['cpu_total']) * 100
|
||||
node_statistics[vm_value['node_parent']]['memory_assigned'] = node_statistics[vm_value['node_parent']]['memory_assigned'] + int(vm_value['memory_total'])
|
||||
node_statistics[vm_value['node_parent']]['memory_assigned_percent'] = (node_statistics[vm_value['node_parent']]['memory_assigned'] / node_statistics[vm_value['node_parent']]['memory_total']) * 100
|
||||
node_statistics[vm_value['node_parent']]['disk_assigned'] = node_statistics[vm_value['node_parent']]['disk_assigned'] + int(vm_value['disk_total'])
|
||||
node_statistics[vm_value['node_parent']]['disk_assigned_percent'] = (node_statistics[vm_value['node_parent']]['disk_assigned'] / node_statistics[vm_value['node_parent']]['disk_total']) * 100
|
||||
|
||||
if node_statistics[vm_value['node_parent']]['cpu_assigned_percent'] > 99:
|
||||
logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for CPU by {int(node_statistics[vm_value["node_parent"]]["cpu_assigned_percent"])}%.')
|
||||
|
||||
if node_statistics[vm_value['node_parent']]['memory_assigned_percent'] > 99:
|
||||
logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for memory by {int(node_statistics[vm_value["node_parent"]]["memory_assigned_percent"])}%.')
|
||||
|
||||
if node_statistics[vm_value['node_parent']]['disk_assigned_percent'] > 99:
|
||||
logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for disk by {int(node_statistics[vm_value["node_parent"]]["disk_assigned_percent"])}%.')
|
||||
|
||||
logging.info(f'{info_prefix} Updated node resource assignments by all VMs.')
|
||||
logging.debug('node_statistics')
|
||||
return node_statistics
|
||||
|
||||
|
||||
def __validate_ignore_vm_wildcard(ignore_vms):
|
||||
""" Validate if a wildcard is used for ignored VMs. """
|
||||
if '*' in ignore_vms:
|
||||
return True
|
||||
|
||||
|
||||
def __check_vm_name_wildcard_pattern(vm_name, ignore_vms_list):
|
||||
""" Validate if the VM name is in the ignore list pattern included. """
|
||||
for ignore_vm in ignore_vms_list:
|
||||
if '*' in ignore_vm:
|
||||
if ignore_vm[:-1] in vm_name:
|
||||
return True
|
||||
|
||||
|
||||
def __get_vm_tags(api_object, node, vmid, balancing_type):
|
||||
""" Get tags for a VM/CT for a given VMID. """
|
||||
info_prefix = 'Info: [api-get-vm-tags]:'
|
||||
|
||||
if balancing_type == 'vm':
|
||||
vm_config = api_object.nodes(node['node']).qemu(vmid).config.get()
|
||||
|
||||
if balancing_type == 'ct':
|
||||
vm_config = api_object.nodes(node['node']).lxc(vmid).config.get()
|
||||
|
||||
logging.info(f'{info_prefix} Got VM/CT tag from API.')
|
||||
return vm_config.get('tags', None)
|
||||
|
||||
|
||||
def __get_proxlb_groups(vm_tags):
|
||||
""" Get ProxLB related include and exclude groups. """
|
||||
info_prefix = 'Info: [api-get-vm-include-exclude-tags]:'
|
||||
group_include = None
|
||||
group_exclude = None
|
||||
vm_ignore = None
|
||||
|
||||
group_list = re.split(";", vm_tags)
|
||||
for group in group_list:
|
||||
|
||||
if group.startswith('plb_include_'):
|
||||
logging.info(f'{info_prefix} Got PLB include group.')
|
||||
group_include = group
|
||||
|
||||
if group.startswith('plb_exclude_'):
|
||||
logging.info(f'{info_prefix} Got PLB include group.')
|
||||
group_exclude = group
|
||||
|
||||
if group.startswith('plb_ignore_vm'):
|
||||
logging.info(f'{info_prefix} Got PLB ignore group.')
|
||||
vm_ignore = True
|
||||
|
||||
return group_include, group_exclude, vm_ignore
|
||||
|
||||
|
||||
def balancing_calculations(balancing_method, balancing_mode, balancing_mode_option, node_statistics, vm_statistics, balanciness, rebalance, processed_vms):
|
||||
""" Calculate re-balancing of VMs on present nodes across the cluster. """
|
||||
info_prefix = 'Info: [rebalancing-calculator]:'
|
||||
|
||||
# Validate for a supported balancing method, mode and if rebalancing is required.
|
||||
__validate_balancing_method(balancing_method)
|
||||
__validate_balancing_mode(balancing_mode)
|
||||
__validate_vm_statistics(vm_statistics)
|
||||
rebalance = __validate_balanciness(balanciness, balancing_method, balancing_mode, node_statistics)
|
||||
|
||||
if rebalance:
|
||||
# Get most used/assigned resources of the VM and the most free or less allocated node.
|
||||
resources_vm_most_used, processed_vms = __get_most_used_resources_vm(balancing_method, balancing_mode, vm_statistics, processed_vms)
|
||||
resources_node_most_free = __get_most_free_resources_node(balancing_method, balancing_mode, balancing_mode_option, node_statistics)
|
||||
|
||||
# Update resource statistics for VMs and nodes.
|
||||
node_statistics, vm_statistics = __update_resource_statistics(resources_vm_most_used, resources_node_most_free,
|
||||
vm_statistics, node_statistics, balancing_method, balancing_mode)
|
||||
|
||||
# Start recursion until we do not have any needs to rebalance anymore.
|
||||
balancing_calculations(balancing_method, balancing_mode, balancing_mode_option, node_statistics, vm_statistics, balanciness, rebalance, processed_vms)
|
||||
|
||||
# Honour groupings for include and exclude groups for rebalancing VMs.
|
||||
node_statistics, vm_statistics = __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method, balancing_mode)
|
||||
node_statistics, vm_statistics = __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method, balancing_mode)
|
||||
|
||||
# Remove VMs that are not being relocated.
|
||||
vms_to_remove = [vm_name for vm_name, vm_info in vm_statistics.items() if 'node_rebalance' in vm_info and vm_info['node_rebalance'] == vm_info.get('node_parent')]
|
||||
for vm_name in vms_to_remove:
|
||||
del vm_statistics[vm_name]
|
||||
|
||||
logging.info(f'{info_prefix} Balancing calculations done.')
|
||||
return node_statistics, vm_statistics
|
||||
|
||||
|
||||
def __validate_balancing_method(balancing_method):
|
||||
""" Validate for valid and supported balancing method. """
|
||||
error_prefix = 'Error: [balancing-method-validation]:'
|
||||
info_prefix = 'Info: [balancing-method-validation]:'
|
||||
|
||||
if balancing_method not in ['memory', 'disk', 'cpu']:
|
||||
logging.error(f'{error_prefix} Invalid balancing method: {balancing_method}')
|
||||
sys.exit(2)
|
||||
else:
|
||||
logging.info(f'{info_prefix} Valid balancing method: {balancing_method}')
|
||||
|
||||
|
||||
def __validate_balancing_mode(balancing_mode):
|
||||
""" Validate for valid and supported balancing mode. """
|
||||
error_prefix = 'Error: [balancing-mode-validation]:'
|
||||
info_prefix = 'Info: [balancing-mode-validation]:'
|
||||
|
||||
if balancing_mode not in ['used', 'assigned']:
|
||||
logging.error(f'{error_prefix} Invalid balancing method: {balancing_mode}')
|
||||
sys.exit(2)
|
||||
else:
|
||||
logging.info(f'{info_prefix} Valid balancing method: {balancing_mode}')
|
||||
|
||||
|
||||
def __validate_vm_statistics(vm_statistics):
|
||||
""" Validate for at least a single object of type CT/VM to rebalance. """
|
||||
error_prefix = 'Error: [balancing-vm-stats-validation]:'
|
||||
|
||||
if len(vm_statistics) == 0:
|
||||
logging.error(f'{error_prefix} Not a single CT/VM found in cluster.')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def __validate_balanciness(balanciness, balancing_method, balancing_mode, node_statistics):
|
||||
""" Validate for balanciness to ensure further rebalancing is needed. """
|
||||
info_prefix = 'Info: [balanciness-validation]:'
|
||||
node_resource_percent_list = []
|
||||
node_assigned_percent_match = []
|
||||
|
||||
# Remap balancing mode to get the related values from nodes dict.
|
||||
if balancing_mode == 'used':
|
||||
node_resource_selector = 'free'
|
||||
if balancing_mode == 'assigned':
|
||||
node_resource_selector = 'assigned'
|
||||
|
||||
for node_name, node_info in node_statistics.items():
|
||||
|
||||
# Save information of nodes from current run to compare them in the next recursion.
|
||||
if node_statistics[node_name][f'{balancing_method}_{node_resource_selector}_percent_last_run'] == node_statistics[node_name][f'{balancing_method}_{node_resource_selector}_percent']:
|
||||
node_statistics[node_name][f'{balancing_method}_{node_resource_selector}_percent_match'] = True
|
||||
else:
|
||||
node_statistics[node_name][f'{balancing_method}_{node_resource_selector}_percent_match'] = False
|
||||
# Update value to the current value of the recursion run.
|
||||
node_statistics[node_name][f'{balancing_method}_{node_resource_selector}_percent_last_run'] = node_statistics[node_name][f'{balancing_method}_{node_resource_selector}_percent']
|
||||
|
||||
# If all node resources are unchanged, the recursion can be left.
|
||||
for key, value in node_statistics.items():
|
||||
node_assigned_percent_match.append(value.get(f'{balancing_method}_{node_resource_selector}_percent_match', False))
|
||||
|
||||
if False not in node_assigned_percent_match:
|
||||
return False
|
||||
|
||||
# Add node information to resource list.
|
||||
node_resource_percent_list.append(int(node_info[f'{balancing_method}_{node_resource_selector}_percent']))
|
||||
logging.debug(f'{info_prefix} Node: {node_name} with values: {node_info}')
|
||||
|
||||
# Create a sorted list of the delta + balanciness between the node resources.
|
||||
node_resource_percent_list_sorted = sorted(node_resource_percent_list)
|
||||
node_lowest_percent = node_resource_percent_list_sorted[0]
|
||||
node_highest_percent = node_resource_percent_list_sorted[-1]
|
||||
|
||||
# Validate if the recursion should be proceeded for further rebalancing.
|
||||
if (int(node_lowest_percent) + int(balanciness)) < int(node_highest_percent):
|
||||
logging.info(f'{info_prefix} Rebalancing for {balancing_method} is needed. Highest usage: {int(node_highest_percent)}% | Lowest usage: {int(node_lowest_percent)}%.')
|
||||
return True
|
||||
else:
|
||||
logging.info(f'{info_prefix} Rebalancing for {balancing_method} is not needed. Highest usage: {int(node_highest_percent)}% | Lowest usage: {int(node_lowest_percent)}%.')
|
||||
return False
|
||||
|
||||
|
||||
def __get_most_used_resources_vm(balancing_method, balancing_mode, vm_statistics, processed_vms):
|
||||
""" Get and return the most used resources of a VM by the defined balancing method. """
|
||||
info_prefix = 'Info: [get-most-used-resources-vm]:'
|
||||
|
||||
# Remap balancing mode to get the related values from nodes dict.
|
||||
if balancing_mode == 'used':
|
||||
vm_resource_selector = 'used'
|
||||
if balancing_mode == 'assigned':
|
||||
vm_resource_selector = 'total'
|
||||
|
||||
vm = max(vm_statistics.items(), key=lambda item: item[1][f'{balancing_method}_{vm_resource_selector}'] if item[0] not in processed_vms else -float('inf'))
|
||||
processed_vms.append(vm[0])
|
||||
|
||||
logging.info(f'{info_prefix} {vm}')
|
||||
return vm, processed_vms
|
||||
|
||||
|
||||
def __get_most_free_resources_node(balancing_method, balancing_mode, balancing_mode_option, node_statistics):
|
||||
""" Get and return the most free resources of a node by the defined balancing method. """
|
||||
info_prefix = 'Info: [get-most-free-resources-nodes]:'
|
||||
|
||||
# Return the node information based on the balancing mode.
|
||||
if balancing_mode == 'used' and balancing_mode_option == 'bytes':
|
||||
node = max(node_statistics.items(), key=lambda item: item[1][f'{balancing_method}_free'])
|
||||
if balancing_mode == 'used' and balancing_mode_option == 'percent':
|
||||
node = max(node_statistics.items(), key=lambda item: item[1][f'{balancing_method}_free_percent'])
|
||||
if balancing_mode == 'assigned':
|
||||
node = min(node_statistics.items(), key=lambda item: item[1][f'{balancing_method}_assigned'] if item[1][f'{balancing_method}_assigned_percent'] > 0 or item[1][f'{balancing_method}_assigned_percent'] < 100 else -float('inf'))
|
||||
|
||||
logging.info(f'{info_prefix} {node}')
|
||||
return node
|
||||
|
||||
|
||||
def __update_resource_statistics(resource_highest_used_resources_vm, resource_highest_free_resources_node, vm_statistics, node_statistics, balancing_method, balancing_mode):
|
||||
""" Update VM and node resource statistics. """
|
||||
info_prefix = 'Info: [rebalancing-resource-statistics-update]:'
|
||||
|
||||
if resource_highest_used_resources_vm[1]['node_parent'] != resource_highest_free_resources_node[0]:
|
||||
vm_name = resource_highest_used_resources_vm[0]
|
||||
vm_node_parent = resource_highest_used_resources_vm[1]['node_parent']
|
||||
vm_node_rebalance = resource_highest_free_resources_node[0]
|
||||
vm_resource_used = vm_statistics[resource_highest_used_resources_vm[0]][f'{balancing_method}_used']
|
||||
vm_resource_total = vm_statistics[resource_highest_used_resources_vm[0]][f'{balancing_method}_total']
|
||||
|
||||
# Update dictionaries for new values
|
||||
# Assign new rebalance node to vm
|
||||
vm_statistics[vm_name]['node_rebalance'] = vm_node_rebalance
|
||||
|
||||
logging.info(f'Moving {vm_name} from {vm_node_parent} to {vm_node_rebalance}')
|
||||
|
||||
# Recalculate values for nodes
|
||||
## Add freed resources to old parent node
|
||||
node_statistics[vm_node_parent][f'{balancing_method}_used'] = int(node_statistics[vm_node_parent][f'{balancing_method}_used']) - int(vm_resource_used)
|
||||
node_statistics[vm_node_parent][f'{balancing_method}_free'] = int(node_statistics[vm_node_parent][f'{balancing_method}_free']) + int(vm_resource_used)
|
||||
node_statistics[vm_node_parent][f'{balancing_method}_free_percent'] = int(int(node_statistics[vm_node_parent][f'{balancing_method}_free']) / int(node_statistics[vm_node_parent][f'{balancing_method}_total']) * 100)
|
||||
node_statistics[vm_node_parent][f'{balancing_method}_assigned'] = int(node_statistics[vm_node_parent][f'{balancing_method}_assigned']) - int(vm_resource_total)
|
||||
node_statistics[vm_node_parent][f'{balancing_method}_assigned_percent'] = int(int(node_statistics[vm_node_parent][f'{balancing_method}_assigned']) / int(node_statistics[vm_node_parent][f'{balancing_method}_total']) * 100)
|
||||
|
||||
## Removed newly allocated resources to new rebalanced node
|
||||
node_statistics[vm_node_rebalance][f'{balancing_method}_used'] = int(node_statistics[vm_node_rebalance][f'{balancing_method}_used']) + int(vm_resource_used)
|
||||
node_statistics[vm_node_rebalance][f'{balancing_method}_free'] = int(node_statistics[vm_node_rebalance][f'{balancing_method}_free']) - int(vm_resource_used)
|
||||
node_statistics[vm_node_rebalance][f'{balancing_method}_free_percent'] = int(int(node_statistics[vm_node_rebalance][f'{balancing_method}_free']) / int(node_statistics[vm_node_rebalance][f'{balancing_method}_total']) * 100)
|
||||
node_statistics[vm_node_rebalance][f'{balancing_method}_assigned'] = int(node_statistics[vm_node_rebalance][f'{balancing_method}_assigned']) + int(vm_resource_total)
|
||||
node_statistics[vm_node_rebalance][f'{balancing_method}_assigned_percent'] = int(int(node_statistics[vm_node_rebalance][f'{balancing_method}_assigned']) / int(node_statistics[vm_node_rebalance][f'{balancing_method}_total']) * 100)
|
||||
|
||||
logging.info(f'{info_prefix} Updated VM and node statistics.')
|
||||
return node_statistics, vm_statistics
|
||||
|
||||
|
||||
def __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method, balancing_mode):
|
||||
""" Get VMs tags for include groups. """
|
||||
info_prefix = 'Info: [rebalancing-tags-group-include]:'
|
||||
tags_include_vms = {}
|
||||
processed_vm = []
|
||||
|
||||
# Create groups of tags with belongings hosts.
|
||||
for vm_name, vm_values in vm_statistics.items():
|
||||
if vm_values.get('group_include', None):
|
||||
if not tags_include_vms.get(vm_values['group_include'], None):
|
||||
tags_include_vms[vm_values['group_include']] = [vm_name]
|
||||
else:
|
||||
tags_include_vms[vm_values['group_include']] = tags_include_vms[vm_values['group_include']] + [vm_name]
|
||||
|
||||
# Update the VMs to the corresponding node to their group assignments.
|
||||
for group, vm_names in tags_include_vms.items():
|
||||
# Do not take care of tags that have only a single host included.
|
||||
if len(vm_names) < 2:
|
||||
logging.info(f'{info_prefix} Only one host in group assignment.')
|
||||
return node_statistics, vm_statistics
|
||||
|
||||
vm_node_rebalance = False
|
||||
logging.info(f'{info_prefix} Create include groups of VM hosts.')
|
||||
for vm_name in vm_names:
|
||||
if vm_name not in processed_vm:
|
||||
if not vm_node_rebalance:
|
||||
vm_node_rebalance = vm_statistics[vm_name]['node_rebalance']
|
||||
else:
|
||||
_mocked_vm_object = (vm_name, vm_statistics[vm_name])
|
||||
node_statistics, vm_statistics = __update_resource_statistics(_mocked_vm_object, [vm_node_rebalance], vm_statistics, node_statistics, balancing_method, balancing_mode)
|
||||
processed_vm.append(vm_name)
|
||||
|
||||
return node_statistics, vm_statistics
|
||||
|
||||
|
||||
def __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method, balancing_mode):
|
||||
""" Get VMs tags for exclude groups. """
|
||||
info_prefix = 'Info: [rebalancing-tags-group-exclude]:'
|
||||
tags_exclude_vms = {}
|
||||
processed_vm = []
|
||||
|
||||
# Create groups of tags with belongings hosts.
|
||||
for vm_name, vm_values in vm_statistics.items():
|
||||
if vm_values.get('group_include', None):
|
||||
if not tags_exclude_vms.get(vm_values['group_include'], None):
|
||||
tags_exclude_vms[vm_values['group_include']] = [vm_name]
|
||||
else:
|
||||
tags_exclude_vms[vm_values['group_include']] = tags_exclude_vms[vm_values['group_include']] + [vm_name]
|
||||
|
||||
# Update the VMs to the corresponding node to their group assignments.
|
||||
for group, vm_names in tags_exclude_vms.items():
|
||||
# Do not take care of tags that have only a single host included.
|
||||
if len(vm_names) < 2:
|
||||
logging.info(f'{info_prefix} Only one host in group assignment.')
|
||||
return node_statistics, vm_statistics
|
||||
|
||||
vm_node_rebalance = False
|
||||
logging.info(f'{info_prefix} Create exclude groups of VM hosts.')
|
||||
for vm_name in vm_names:
|
||||
if vm_name not in processed_vm:
|
||||
if not vm_node_rebalance:
|
||||
random_node = vm_statistics[vm_name]['node_parent']
|
||||
# Get a random node and make sure that it is not by accident the
|
||||
# currently assigned one.
|
||||
while random_node == vm_statistics[vm_name]['node_parent']:
|
||||
random_node = random.choice(list(node_statistics.keys()))
|
||||
else:
|
||||
_mocked_vm_object = (vm_name, vm_statistics[vm_name])
|
||||
node_statistics, vm_statistics = __update_resource_statistics(_mocked_vm_object, [random_node], vm_statistics, node_statistics, balancing_method, balancing_mode)
|
||||
processed_vm.append(vm_name)
|
||||
|
||||
return node_statistics, vm_statistics
|
||||
|
||||
|
||||
def __wait_job_finalized(api_object, node_name, job_id, counter):
|
||||
""" Wait for a job to be finalized. """
|
||||
error_prefix = 'Error: [job-status-getter]:'
|
||||
info_prefix = 'Info: [job-status-getter]:'
|
||||
|
||||
logging.info(f'{info_prefix} Getting job status for job {job_id}.')
|
||||
task = api_object.nodes(node_name).tasks(job_id).status().get()
|
||||
logging.info(f'{info_prefix} {task}')
|
||||
|
||||
if task['status'] == 'running':
|
||||
logging.info(f'{info_prefix} Validating job {job_id} for the {counter} run.')
|
||||
|
||||
# Do not run for infinity this recursion and fail when reaching the limit.
|
||||
if counter == 300:
|
||||
logging.critical(f'{error_prefix} The job {job_id} on node {node_name} did not finished in time for migration.')
|
||||
|
||||
time.sleep(5)
|
||||
counter = counter + 1
|
||||
logging.info(f'{info_prefix} Revalidating job {job_id} in a next run.')
|
||||
__wait_job_finalized(api_object, node_name, job_id, counter)
|
||||
|
||||
logging.info(f'{info_prefix} Job {job_id} for migration from {node_name} terminiated succesfully.')
|
||||
|
||||
|
||||
def __run_vm_rebalancing(api_object, vm_statistics_rebalanced, app_args, parallel_migrations):
|
||||
""" Run & execute the VM rebalancing via API. """
|
||||
error_prefix = 'Error: [rebalancing-executor]:'
|
||||
info_prefix = 'Info: [rebalancing-executor]:'
|
||||
|
||||
if len(vm_statistics_rebalanced) > 0 and not app_args.dry_run:
|
||||
for vm, value in vm_statistics_rebalanced.items():
|
||||
|
||||
try:
|
||||
# Migrate type VM (live migration).
|
||||
if value['type'] == 'vm':
|
||||
logging.info(f'{info_prefix} Rebalancing VM {vm} from node {value["node_parent"]} to node {value["node_rebalance"]}.')
|
||||
job_id = api_object.nodes(value['node_parent']).qemu(value['vmid']).migrate().post(target=value['node_rebalance'],online=1)
|
||||
|
||||
# Migrate type CT (requires restart of container).
|
||||
if value['type'] == 'ct':
|
||||
logging.info(f'{info_prefix} Rebalancing CT {vm} from node {value["node_parent"]} to node {value["node_rebalance"]}.')
|
||||
job_id = api_object.nodes(value['node_parent']).lxc(value['vmid']).migrate().post(target=value['node_rebalance'],restart=1)
|
||||
|
||||
except proxmoxer.core.ResourceException as error_resource:
|
||||
logging.critical(f'{error_prefix} {error_resource}')
|
||||
|
||||
# Wait for migration to be finished unless running parallel migrations.
|
||||
if not bool(int(parallel_migrations)):
|
||||
logging.info(f'{info_prefix} Rebalancing will be performed sequentially.')
|
||||
__wait_job_finalized(api_object, value['node_parent'], job_id, counter=1)
|
||||
else:
|
||||
logging.info(f'{info_prefix} Rebalancing will be performed parallely.')
|
||||
|
||||
else:
|
||||
logging.info(f'{info_prefix} No rebalancing needed.')
|
||||
|
||||
|
||||
def __create_json_output(vm_statistics_rebalanced, app_args):
|
||||
""" Create a machine parsable json output of VM rebalance statitics. """
|
||||
info_prefix = 'Info: [json-output-generator]:'
|
||||
|
||||
if app_args.json:
|
||||
logging.info(f'{info_prefix} Printing json output of VM statistics.')
|
||||
print(json.dumps(vm_statistics_rebalanced))
|
||||
|
||||
|
||||
def __create_cli_output(vm_statistics_rebalanced, app_args):
|
||||
""" Create output for CLI when running in dry-run mode. """
|
||||
info_prefix_dry_run = 'Info: [cli-output-generator-dry-run]:'
|
||||
info_prefix_run = 'Info: [cli-output-generator]:'
|
||||
vm_to_node_list = []
|
||||
|
||||
if app_args.dry_run:
|
||||
info_prefix = info_prefix_dry_run
|
||||
logging.info(f'{info_prefix} Starting dry-run to rebalance vms to their new nodes.')
|
||||
else:
|
||||
info_prefix = info_prefix_run
|
||||
logging.info(f'{info_prefix} Start rebalancing vms to their new nodes.')
|
||||
|
||||
vm_to_node_list.append(['VM', 'Current Node', 'Rebalanced Node', 'VM Type'])
|
||||
for vm_name, vm_values in vm_statistics_rebalanced.items():
|
||||
vm_to_node_list.append([vm_name, vm_values['node_parent'], vm_values['node_rebalance'], vm_values['type']])
|
||||
|
||||
if len(vm_statistics_rebalanced) > 0:
|
||||
logging.info(f'{info_prefix} Printing cli output of VM rebalancing.')
|
||||
__print_table_cli(vm_to_node_list, app_args.dry_run)
|
||||
else:
|
||||
logging.info(f'{info_prefix} No rebalancing needed.')
|
||||
|
||||
|
||||
def __print_table_cli(table, dry_run=False):
|
||||
""" Pretty print a given table to the cli. """
|
||||
info_prefix_dry_run = 'Info: [cli-output-generator-table-dryn-run]:'
|
||||
info_prefix_run = 'Info: [cli-output-generator-table]:'
|
||||
info_prefix = info_prefix_run
|
||||
|
||||
longest_cols = [
|
||||
(max([len(str(row[i])) for row in table]) + 3)
|
||||
for i in range(len(table[0]))
|
||||
]
|
||||
|
||||
row_format = "".join(["{:>" + str(longest_col) + "}" for longest_col in longest_cols])
|
||||
|
||||
for row in table:
|
||||
# Print CLI output when running in dry-run mode to make the user's life easier.
|
||||
if dry_run:
|
||||
info_prefix = info_prefix_dry_run
|
||||
print(row_format.format(*row))
|
||||
|
||||
# Log all items in info mode.
|
||||
logging.info(f'{info_prefix} {row_format.format(*row)}')
|
||||
|
||||
|
||||
def run_vm_rebalancing(api_object, vm_statistics_rebalanced, app_args, parallel_migrations):
|
||||
""" Run rebalancing of vms to new nodes in cluster. """
|
||||
__run_vm_rebalancing(api_object, vm_statistics_rebalanced, app_args, parallel_migrations)
|
||||
__create_json_output(vm_statistics_rebalanced, app_args)
|
||||
__create_cli_output(vm_statistics_rebalanced, app_args)
|
||||
|
||||
|
||||
def main():
|
||||
""" Run ProxLB for balancing VM workloads across a Proxmox cluster. """
|
||||
# Initialize PAS.
|
||||
initialize_logger('CRITICAL')
|
||||
app_args = initialize_args()
|
||||
config_path = initialize_config_path(app_args)
|
||||
pre_validations(config_path)
|
||||
|
||||
# Parse global config.
|
||||
proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v, balancing_method, balancing_mode, balancing_mode_option, balancing_type, \
|
||||
balanciness, parallel_migrations, ignore_nodes, ignore_vms, master_only, daemon, schedule, log_verbosity = initialize_config_options(config_path)
|
||||
|
||||
# Overwrite logging handler with user defined log verbosity.
|
||||
initialize_logger(log_verbosity, update_log_verbosity=True)
|
||||
|
||||
while True:
|
||||
# API Authentication.
|
||||
api_object = api_connect(proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v)
|
||||
|
||||
# Get master node of cluster and ensure that ProxLB is only performed on the
|
||||
# cluster master node to avoid ongoing rebalancing.
|
||||
cluster_master, master_only = execute_rebalancing_only_by_master(api_object, master_only)
|
||||
|
||||
# Validate daemon service and skip following tasks when not being the cluster master.
|
||||
if not cluster_master and master_only:
|
||||
validate_daemon(daemon, schedule)
|
||||
continue
|
||||
|
||||
# Get metric & statistics for vms and nodes.
|
||||
node_statistics = get_node_statistics(api_object, ignore_nodes)
|
||||
vm_statistics = get_vm_statistics(api_object, ignore_vms, balancing_type)
|
||||
node_statistics = update_node_statistics(node_statistics, vm_statistics)
|
||||
|
||||
# Calculate rebalancing of vms.
|
||||
node_statistics_rebalanced, vm_statistics_rebalanced = balancing_calculations(balancing_method, balancing_mode, balancing_mode_option,
|
||||
node_statistics, vm_statistics, balanciness, rebalance=False, processed_vms=[])
|
||||
|
||||
# Rebalance vms to new nodes within the cluster.
|
||||
run_vm_rebalancing(api_object, vm_statistics_rebalanced, app_args, parallel_migrations)
|
||||
|
||||
# Validate for any errors.
|
||||
post_validations()
|
||||
|
||||
# Validate daemon service.
|
||||
validate_daemon(daemon, schedule)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
14
proxlb.conf
14
proxlb.conf
@@ -1,14 +0,0 @@
|
||||
[proxmox]
|
||||
api_host: hypervisor01.gyptazy.ch
|
||||
api_user: root@pam
|
||||
api_pass: FooBar
|
||||
verify_ssl: 1
|
||||
[balancing]
|
||||
method: memory
|
||||
mode: used
|
||||
ignore_nodes: dummynode01,dummynode02
|
||||
ignore_vms: testvm01,testvm02
|
||||
[service]
|
||||
daemon: 1
|
||||
schedule: 24
|
||||
log_verbosity: CRITICAL
|
||||
87
proxlb/main.py
Normal file
87
proxlb/main.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""
|
||||
ProxLB is a load balancing tool for Proxmox Virtual Environment (PVE) clusters.
|
||||
It connects to the Proxmox API, retrieves information about nodes, guests, and groups,
|
||||
and performs calculations to determine the optimal distribution of resources across the
|
||||
cluster. The tool supports daemon mode for continuous operation and can log metrics and
|
||||
perform balancing actions based on the configuration provided. It also includes a CLI
|
||||
parser for handling command-line arguments and a custom logger for systemd integration.
|
||||
"""
|
||||
|
||||
__author__ = "Florian Paul Azim Hoberg <gyptazy>"
|
||||
__copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)"
|
||||
__license__ = "GPL-3.0"
|
||||
|
||||
|
||||
import logging
|
||||
from utils.logger import SystemdLogger
|
||||
from utils.cli_parser import CliParser
|
||||
from utils.config_parser import ConfigParser
|
||||
from utils.proxmox_api import ProxmoxApi
|
||||
from models.nodes import Nodes
|
||||
from models.guests import Guests
|
||||
from models.groups import Groups
|
||||
from models.calculations import Calculations
|
||||
from models.balancing import Balancing
|
||||
from utils.helper import Helper
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
ProxLB main function
|
||||
"""
|
||||
# Initialize logging handler
|
||||
logger = SystemdLogger(level=logging.INFO)
|
||||
|
||||
# Parses arguments passed from the CLI
|
||||
cli_parser = CliParser()
|
||||
cli_args = cli_parser.parse_args()
|
||||
Helper.get_version(cli_args.version)
|
||||
|
||||
# Parse ProxLB config file
|
||||
config_parser = ConfigParser(cli_args.config)
|
||||
proxlb_config = config_parser.get_config()
|
||||
|
||||
# Update log level from config and fallback to INFO if not defined
|
||||
logger.set_log_level(proxlb_config.get('service', {}).get('log_level', 'INFO'))
|
||||
|
||||
# Connect to Proxmox API & create API object
|
||||
proxmox_api = ProxmoxApi(proxlb_config)
|
||||
|
||||
# Overwrite password after creating the API object
|
||||
proxlb_config["proxmox_api"]["pass"] = "********"
|
||||
|
||||
while True:
|
||||
# Get all required objects from the Proxmox cluster
|
||||
meta = {"meta": proxlb_config}
|
||||
nodes = Nodes.get_nodes(proxmox_api, proxlb_config)
|
||||
guests = Guests.get_guests(proxmox_api, nodes, meta)
|
||||
groups = Groups.get_groups(guests, nodes)
|
||||
|
||||
# Merge obtained objects from the Proxmox cluster for further usage
|
||||
proxlb_data = {**meta, **nodes, **guests, **groups}
|
||||
Helper.log_node_metrics(proxlb_data)
|
||||
|
||||
# Update the initial node resource assignments
|
||||
# by the previously created groups.
|
||||
Calculations.set_node_assignments(proxlb_data)
|
||||
Calculations.get_most_free_node(proxlb_data, cli_args.best_node)
|
||||
Calculations.relocate_guests_on_maintenance_nodes(proxlb_data)
|
||||
Calculations.get_balanciness(proxlb_data)
|
||||
Calculations.relocate_guests(proxlb_data)
|
||||
Helper.log_node_metrics(proxlb_data, init=False)
|
||||
|
||||
# Perform balancing actions via Proxmox API
|
||||
if not cli_args.dry_run or not proxlb_data["meta"]["balancing"].get("enable", False):
|
||||
Balancing(proxmox_api, proxlb_data)
|
||||
|
||||
# Validate if the JSON output should be
|
||||
# printed to stdout
|
||||
Helper.print_json(proxlb_data, cli_args.json)
|
||||
# Validate daemon mode
|
||||
Helper.get_daemon_mode(proxlb_config)
|
||||
|
||||
logger.debug(f"Finished: __main__")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
proxlb/models/__init__.py
Normal file
0
proxlb/models/__init__.py
Normal file
201
proxlb/models/balancing.py
Normal file
201
proxlb/models/balancing.py
Normal file
@@ -0,0 +1,201 @@
|
||||
"""
|
||||
The Balancing class is responsible for processing workloads on Proxmox clusters.
|
||||
It processes the previously generated data (held in proxlb_data) and moves guests
|
||||
and other supported types across Proxmox clusters based on the defined values by an operator.
|
||||
"""
|
||||
|
||||
|
||||
__author__ = "Florian Paul Azim Hoberg <gyptazy>"
|
||||
__copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)"
|
||||
__license__ = "GPL-3.0"
|
||||
|
||||
|
||||
import proxmoxer
|
||||
import time
|
||||
from utils.logger import SystemdLogger
|
||||
from typing import Dict, Any
|
||||
|
||||
logger = SystemdLogger()
|
||||
|
||||
|
||||
class Balancing:
|
||||
"""
|
||||
The balancing class is responsible for processing workloads on Proxmox clusters.
|
||||
The previously generated data (hold in proxlb_data) will processed and guests and
|
||||
other supported types will be moved across Proxmox clusters based on the defined
|
||||
values by an operator.
|
||||
|
||||
Methods:
|
||||
__init__(self, proxmox_api: any, proxlb_data: Dict[str, Any]):
|
||||
Initializes the Balancing class with the provided ProxLB data and initiates the rebalancing
|
||||
process for guests.
|
||||
|
||||
exec_rebalancing_vm(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None:
|
||||
Executes the rebalancing of a virtual machine (VM) to a new node within the cluster. Logs the migration
|
||||
process and handles exceptions.
|
||||
|
||||
exec_rebalancing_ct(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None:
|
||||
Executes the rebalancing of a container (CT) to a new node within the cluster. Logs the migration
|
||||
process and handles exceptions.
|
||||
|
||||
get_rebalancing_job_status(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str, guest_current_node: str, job_id: int, retry_counter: int = 1) -> bool:
|
||||
Monitors the status of a rebalancing job on a Proxmox node until it completes or a timeout
|
||||
is reached. Returns True if the job completed successfully, False otherwise.
|
||||
"""
|
||||
|
||||
def __init__(self, proxmox_api: any, proxlb_data: Dict[str, Any]):
|
||||
"""
|
||||
Initializes the Balancing class with the provided ProxLB data.
|
||||
|
||||
Args:
|
||||
proxlb_data (dict): The data required for balancing VMs and CTs.
|
||||
"""
|
||||
for guest_name, guest_meta in proxlb_data["guests"].items():
|
||||
|
||||
# Check if the guest's target is not the same as the current node
|
||||
if guest_meta["node_current"] != guest_meta["node_target"]:
|
||||
# Check if the guest is not ignored and perform the balancing
|
||||
# operation based on the guest type
|
||||
if not guest_meta["ignore"]:
|
||||
guest_id = guest_meta["id"]
|
||||
guest_node_current = guest_meta["node_current"]
|
||||
guest_node_target = guest_meta["node_target"]
|
||||
|
||||
# VM Balancing
|
||||
if guest_meta["type"] == "vm":
|
||||
self.exec_rebalancing_vm(proxmox_api, proxlb_data, guest_name)
|
||||
|
||||
# CT Balancing
|
||||
elif guest_meta["type"] == "ct":
|
||||
self.exec_rebalancing_ct(proxmox_api, proxlb_data, guest_name)
|
||||
|
||||
# Just in case we get a new type of guest in the future
|
||||
else:
|
||||
logger.critical(f"Balancing: Got unexpected guest type: {guest_meta['type']}. Cannot proceed guest: {guest_meta['name']}.")
|
||||
else:
|
||||
logger.debug(f"Balancing: Guest {guest_name} is ignored and will not be rebalanced.")
|
||||
else:
|
||||
logger.debug(f"Balancing: Guest {guest_name} is already on the target node {guest_meta['node_target']} and will not be rebalanced.")
|
||||
|
||||
def exec_rebalancing_vm(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None:
|
||||
"""
|
||||
Executes the rebalancing of a virtual machine (VM) to a new node within the cluster.
|
||||
This function initiates the migration of a specified VM to a target node as part of the
|
||||
load balancing process. It logs the migration process and handles any exceptions that
|
||||
may occur during the migration.
|
||||
Args:
|
||||
proxmox_api (object): The Proxmox API client instance used to interact with the Proxmox cluster.
|
||||
proxlb_data (dict): A dictionary containing data related to the ProxLB load balancing configuration.
|
||||
guest_name (str): The name of the guest VM to be migrated.
|
||||
Raises:
|
||||
proxmox_api.core.ResourceException: If an error occurs during the migration process.
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Starting: exec_rebalancing_vm.")
|
||||
guest_id = proxlb_data["guests"][guest_name]["id"]
|
||||
guest_node_current = proxlb_data["guests"][guest_name]["node_current"]
|
||||
guest_node_target = proxlb_data["guests"][guest_name]["node_target"]
|
||||
|
||||
if proxlb_data["meta"]["balancing"].get("live", True):
|
||||
online_migration = 1
|
||||
else:
|
||||
online_migration = 0
|
||||
|
||||
if proxlb_data["meta"]["balancing"].get("with_local_disks", True):
|
||||
with_local_disks = 1
|
||||
else:
|
||||
with_local_disks = 0
|
||||
|
||||
migration_options = {
|
||||
'target': {guest_node_target},
|
||||
'online': online_migration,
|
||||
'with-local-disks': with_local_disks
|
||||
}
|
||||
|
||||
try:
|
||||
logger.debug(f"Balancing: Starting to migrate guest {guest_name} of type VM.")
|
||||
job_id = proxmox_api.nodes(guest_node_current).qemu(guest_id).migrate().post(**migration_options)
|
||||
self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, guest_node_current, job_id)
|
||||
except proxmoxer.core.ResourceException as proxmox_api_error:
|
||||
logger.critical(f"Balancing: Failed to migrate guest {guest_name} of type VM due to some Proxmox errors. Please check if resource is locked or similar.")
|
||||
logger.debug(f"Balancing: Failed to migrate guest {guest_name} of type VM due to some Proxmox errors: {proxmox_api_error}")
|
||||
logger.debug("Finished: exec_rebalancing_vm.")
|
||||
|
||||
def exec_rebalancing_ct(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None:
|
||||
"""
|
||||
Executes the rebalancing of a container (CT) to a new node within the cluster.
|
||||
This function initiates the migration of a specified CT to a target node as part of the
|
||||
load balancing process. It logs the migration process and handles any exceptions that
|
||||
may occur during the migration.
|
||||
Args:
|
||||
proxmox_api (object): The Proxmox API client instance used to interact with the Proxmox cluster.
|
||||
proxlb_data (dict): A dictionary containing data related to the ProxLB load balancing configuration.
|
||||
guest_name (str): The name of the guest CT to be migrated.
|
||||
Raises:
|
||||
proxmox_api.core.ResourceException: If an error occurs during the migration process.
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Starting: exec_rebalancing_ct.")
|
||||
guest_id = proxlb_data["guests"][guest_name]["id"]
|
||||
guest_node_current = proxlb_data["guests"][guest_name]["node_current"]
|
||||
guest_node_target = proxlb_data["guests"][guest_name]["node_target"]
|
||||
|
||||
try:
|
||||
logger.debug(f"Balancing: Starting to migrate guest {guest_name} of type CT.")
|
||||
job_id = proxmox_api.nodes(guest_node_current).lxc(guest_id).migrate().post(target=guest_node_target, restart=1)
|
||||
self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, guest_node_current, job_id)
|
||||
except proxmoxer.core.ResourceException as proxmox_api_error:
|
||||
logger.critical(f"Balancing: Failed to migrate guest {guest_name} of type CT due to some Proxmox errors. Please check if resource is locked or similar.")
|
||||
logger.debug(f"Balancing: Failed to migrate guest {guest_name} of type CT due to some Proxmox errors: {proxmox_api_error}")
|
||||
logger.debug("Finished: exec_rebalancing_ct.")
|
||||
|
||||
def get_rebalancing_job_status(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str, guest_current_node: str, job_id: int, retry_counter: int = 1) -> bool:
|
||||
"""
|
||||
Monitors the status of a rebalancing job on a Proxmox node until it completes or a timeout is reached.
|
||||
|
||||
Args:
|
||||
proxmox_api (object): The Proxmox API client instance.
|
||||
proxlb_data (dict): The ProxLB configuration data.
|
||||
guest_name (str): The name of the guest (virtual machine) being rebalanced.
|
||||
guest_current_node (str): The current node where the guest is running.
|
||||
job_id (str): The ID of the rebalancing job to monitor.
|
||||
retry_counter (int, optional): The current retry count. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
bool: True if the job completed successfully, False otherwise.
|
||||
"""
|
||||
logger.debug("Starting: get_rebalancing_job_status.")
|
||||
# Parallel migrations can take a huge time and create a higher load, if not defined by an
|
||||
# operator we will use a sequential mode by default
|
||||
if not proxlb_data["meta"]["balancing"].get("parallel", False):
|
||||
job = proxmox_api.nodes(guest_current_node).tasks(job_id).status().get()
|
||||
|
||||
# Watch job id until it finalizes
|
||||
if job["status"] == "running":
|
||||
# Do not hammer the API while
|
||||
# watching the job status
|
||||
time.sleep(10)
|
||||
retry_counter += 1
|
||||
|
||||
# Run recursion until we hit the soft-limit of maximum migration time for a guest
|
||||
if retry_counter < proxlb_data["meta"]["balancing"].get("max_job_validation", 1800):
|
||||
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) for migration is still running... (Run: {retry_counter})")
|
||||
self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, guest_current_node, job_id, retry_counter)
|
||||
else:
|
||||
logger.warning(f"Balancing: Job ID {job_id} (guest: {guest_name}) for migration took too long. Please check manually.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return False
|
||||
|
||||
# Validate job output for errors when finished
|
||||
if job["status"] == "stopped":
|
||||
|
||||
if job["exitstatus"] == "OK":
|
||||
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) was successfully.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return True
|
||||
else:
|
||||
logger.critical(f"Balancing: Job ID {job_id} (guest: {guest_name}) went into an error! Please check manually.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return False
|
||||
378
proxlb/models/calculations.py
Normal file
378
proxlb/models/calculations.py
Normal file
@@ -0,0 +1,378 @@
|
||||
"""
|
||||
The Calculations class is responsible for handling the balancing of virtual machines (VMs)
|
||||
and containers (CTs) across all available nodes in a Proxmox cluster. It provides methods
|
||||
to calculate the optimal distribution of VMs and CTs based on the provided data.
|
||||
"""
|
||||
|
||||
|
||||
__author__ = "Florian Paul Azim Hoberg <gyptazy>"
|
||||
__copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)"
|
||||
__license__ = "GPL-3.0"
|
||||
|
||||
|
||||
import sys
|
||||
from typing import Dict, Any
|
||||
from utils.logger import SystemdLogger
|
||||
|
||||
logger = SystemdLogger()
|
||||
|
||||
|
||||
class Calculations:
|
||||
"""
|
||||
The calculation class is responsible for handling the balancing of virtual machines (VMs)
|
||||
and containers (CTs) across all available nodes in a Proxmox cluster. It provides methods
|
||||
to calculate the optimal distribution of VMs and CTs based on the provided data.
|
||||
|
||||
Methods:
|
||||
__init__(proxlb_data: Dict[str, Any]):
|
||||
Initializes the Calculation class with the provided ProxLB data.
|
||||
|
||||
set_node_assignments(proxlb_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
Sets the assigned resources of the nodes based on the current assigned
|
||||
guest resources by their created groups as an initial base.
|
||||
|
||||
get_balanciness(proxlb_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
Gets the balanciness for further actions where the highest and lowest
|
||||
usage or assignments of Proxmox nodes are compared.
|
||||
|
||||
get_most_free_node(proxlb_data: Dict[str, Any], return_node: bool = False) -> Dict[str, Any]:
|
||||
Gets the name of the Proxmox node in the cluster with the most free resources based on
|
||||
the user-defined method (e.g., memory) and mode (e.g., used).
|
||||
|
||||
relocate_guests_on_maintenance_nodes(proxlb_data: Dict[str, Any]):
|
||||
Relocates guests that are currently on nodes marked for maintenance to
|
||||
nodes with the most available resources.
|
||||
|
||||
relocate_guests(proxlb_data: Dict[str, Any]):
|
||||
Relocates guests within the provided data structure to ensure affinity groups are
|
||||
placed on nodes with the most free resources.
|
||||
|
||||
val_anti_affinity(proxlb_data: Dict[str, Any], guest_name: str):
|
||||
Validates and assigns nodes to guests based on anti-affinity rules.
|
||||
|
||||
update_node_resources(proxlb_data):
|
||||
Updates the resource allocation and usage statistics for nodes when a guest
|
||||
is moved from one node to another.
|
||||
"""
|
||||
|
||||
def __init__(self, proxlb_data: Dict[str, Any]):
|
||||
"""
|
||||
Initializes the Calculation class with the provided ProxLB data.
|
||||
|
||||
Args:
|
||||
proxlb_data (Dict[str, Any]): The data required for balancing VMs and CTs.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def set_node_assignments(proxlb_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Set the assigned resources of the nodes based on the current assigned
|
||||
guest resources by their created groups as an initial base.
|
||||
|
||||
Args:
|
||||
proxlb_data (Dict[str, Any]): The data holding all current statistics.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Updated ProxLB data of nodes section with updated node assigned values.
|
||||
"""
|
||||
logger.debug("Starting: set_node_assignments.")
|
||||
for group_name, group_meta in proxlb_data["groups"]["affinity"].items():
|
||||
|
||||
for guest_name in group_meta["guests"]:
|
||||
guest_node_current = proxlb_data["guests"][guest_name]["node_current"]
|
||||
# Update Hardware assignments
|
||||
# Update assigned values for the current node
|
||||
logger.debug(f"set_node_assignment of guest {guest_name} on node {guest_node_current} with cpu_total: {proxlb_data['guests'][guest_name]['cpu_total']}, memory_total: {proxlb_data['guests'][guest_name]['memory_total']}, disk_total: {proxlb_data['guests'][guest_name]['disk_total']}.")
|
||||
proxlb_data["nodes"][guest_node_current]["cpu_assigned"] += proxlb_data["guests"][guest_name]["cpu_total"]
|
||||
proxlb_data["nodes"][guest_node_current]["memory_assigned"] += proxlb_data["guests"][guest_name]["memory_total"]
|
||||
proxlb_data["nodes"][guest_node_current]["disk_assigned"] += proxlb_data["guests"][guest_name]["disk_total"]
|
||||
# Update assigned percentage values for the current node
|
||||
proxlb_data["nodes"][guest_node_current]["cpu_assigned_percent"] = proxlb_data["nodes"][guest_node_current]["cpu_assigned"] / proxlb_data["nodes"][guest_node_current]["cpu_total"] * 100
|
||||
proxlb_data["nodes"][guest_node_current]["memory_assigned_percent"] = proxlb_data["nodes"][guest_node_current]["memory_assigned"] / proxlb_data["nodes"][guest_node_current]["memory_total"] * 100
|
||||
proxlb_data["nodes"][guest_node_current]["disk_assigned_percent"] = proxlb_data["nodes"][guest_node_current]["disk_assigned"] / proxlb_data["nodes"][guest_node_current]["disk_total"] * 100
|
||||
|
||||
logger.debug("Finished: set_node_assignments.")
|
||||
|
||||
@staticmethod
|
||||
def get_balanciness(proxlb_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the blanaciness for further actions where the highest and lowest
|
||||
usage or assignments of Proxmox nodes are compared. Based on the users
|
||||
provided balanciness delta the balancing will be performed.
|
||||
|
||||
Args:
|
||||
proxlb_data (Dict[str, Any]): The data holding all content of all objects.
|
||||
Returns:
|
||||
Dict[str, Any]: Updated meta data section of the balanciness action defined
|
||||
as a bool.
|
||||
"""
|
||||
logger.debug("Starting: get_balanciness.")
|
||||
proxlb_data["meta"]["balancing"]["balance"] = False
|
||||
|
||||
if len(proxlb_data["groups"]) > 0:
|
||||
method = proxlb_data["meta"]["balancing"].get("method", "memory")
|
||||
mode = proxlb_data["meta"]["balancing"].get("mode", "used")
|
||||
balanciness = proxlb_data["meta"]["balancing"].get("balanciness", 10)
|
||||
method_value = [node_meta[f"{method}_{mode}_percent"] for node_meta in proxlb_data["nodes"].values()]
|
||||
method_value_highest = max(method_value)
|
||||
method_value_lowest = min(method_value)
|
||||
|
||||
if method_value_highest - method_value_lowest > balanciness:
|
||||
proxlb_data["meta"]["balancing"]["balance"] = True
|
||||
logger.debug(f"Guest balancing is required. Highest value: {method_value_highest}, lowest value: {method_value_lowest} balanced by {method} and {mode}.")
|
||||
else:
|
||||
logger.debug(f"Guest balancing is ok. Highest value: {method_value_highest}, lowest value: {method_value_lowest} balanced by {method} and {mode}.")
|
||||
|
||||
else:
|
||||
logger.warning("No guests for balancing found.")
|
||||
|
||||
logger.debug("Finished: get_balanciness.")
|
||||
|
||||
@staticmethod
|
||||
def get_most_free_node(proxlb_data: Dict[str, Any], return_node: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the name of the Proxmox node in the cluster with the most free resources based on
|
||||
the user defined method (e.g.: memory) and mode (e.g.: used).
|
||||
|
||||
Args:
|
||||
proxlb_data (Dict[str, Any]): The data holding all content of all objects.
|
||||
return_node (bool): The indicator to simply return the best node for further
|
||||
assignments.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Updated meta data section of the node with the most free resources that should
|
||||
be used for the next balancing action.
|
||||
"""
|
||||
logger.debug("Starting: get_most_free_node.")
|
||||
proxlb_data["meta"]["balancing"]["balance_next_node"] = ""
|
||||
|
||||
# Do not include nodes that are marked in 'maintenance'
|
||||
filtered_nodes = [node for node in proxlb_data["nodes"].values() if not node["maintenance"]]
|
||||
method = proxlb_data["meta"]["balancing"].get("method", "memory")
|
||||
mode = proxlb_data["meta"]["balancing"].get("mode", "used")
|
||||
lowest_usage_node = min(filtered_nodes, key=lambda x: x[f"{method}_{mode}_percent"])
|
||||
proxlb_data["meta"]["balancing"]["balance_reason"] = 'resources'
|
||||
proxlb_data["meta"]["balancing"]["balance_next_node"] = lowest_usage_node["name"]
|
||||
|
||||
# If executed to simply get the best node for further usage, we return
|
||||
# the best node on stdout and gracefully exit here
|
||||
if return_node:
|
||||
print(lowest_usage_node["name"])
|
||||
sys.exit(0)
|
||||
|
||||
logger.debug("Finished: get_most_free_node.")
|
||||
|
||||
@staticmethod
|
||||
def relocate_guests_on_maintenance_nodes(proxlb_data: Dict[str, Any]):
|
||||
"""
|
||||
Relocates guests that are currently on nodes marked for maintenance to
|
||||
nodes with the most available resources.
|
||||
|
||||
This function iterates over all guests on maintenance nodes and attempts
|
||||
to relocate them to nodes with the most free resources that are not in
|
||||
maintenance mode. It updates the node resources accordingly and logs
|
||||
warnings if the balancing may not be perfect due to the maintenance
|
||||
status of the original node.
|
||||
|
||||
Args:
|
||||
proxlb_data (Dict[str, Any]): The data holding all content of all objects.
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Starting: get_most_free_node.")
|
||||
proxlb_data["meta"]["balancing"]["balance_next_guest"] = ""
|
||||
|
||||
for guest_name in proxlb_data["groups"]["maintenance"]:
|
||||
# Update the node with the most free nodes which is
|
||||
# not in a maintenance
|
||||
proxlb_data["meta"]["balancing"]["balance_next_guest"] = guest_name
|
||||
Calculations.get_most_free_node(proxlb_data)
|
||||
Calculations.update_node_resources(proxlb_data)
|
||||
logger.warning(f"Warning: Balancing may not be perfect because guest {guest_name} was located on a node which is in maintenance mode.")
|
||||
|
||||
logger.debug("Finished: get_most_free_node.")
|
||||
|
||||
@staticmethod
|
||||
def relocate_guests(proxlb_data: Dict[str, Any]):
|
||||
"""
|
||||
Relocates guests within the provided data structure to ensure affinity groups are
|
||||
placed on nodes with the most free resources.
|
||||
|
||||
This function iterates over each affinity group in the provided data, identifies
|
||||
the node with the most free resources, and migrates all guests within the group
|
||||
to that node. It updates the node resources accordingly.
|
||||
|
||||
Args:
|
||||
proxlb_data (Dict[str, Any]): The data holding all content of all objects.
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Starting: relocate_guests.")
|
||||
if proxlb_data["meta"]["balancing"]["balance"] or proxlb_data["meta"]["balancing"].get("enforce_affinity", False):
|
||||
|
||||
if proxlb_data["meta"]["balancing"].get("balance", False):
|
||||
logger.debug("Balancing of guests will be performed. Reason: balanciness")
|
||||
|
||||
if proxlb_data["meta"]["balancing"].get("enforce_affinity", False):
|
||||
logger.debug("Balancing of guests will be performed. Reason: enforce affinity balancing")
|
||||
|
||||
for group_name in proxlb_data["groups"]["affinity"]:
|
||||
|
||||
# We get initially the node with the most free resources and then
|
||||
# migrate all guests within the group to that node to ensure the
|
||||
# affinity.
|
||||
Calculations.get_most_free_node(proxlb_data)
|
||||
|
||||
for guest_name in proxlb_data["groups"]["affinity"][group_name]["guests"]:
|
||||
proxlb_data["meta"]["balancing"]["balance_next_guest"] = guest_name
|
||||
Calculations.val_anti_affinity(proxlb_data, guest_name)
|
||||
Calculations.val_node_relationship(proxlb_data, guest_name)
|
||||
Calculations.update_node_resources(proxlb_data)
|
||||
|
||||
logger.debug("Finished: relocate_guests.")
|
||||
|
||||
@staticmethod
|
||||
def val_anti_affinity(proxlb_data: Dict[str, Any], guest_name: str):
|
||||
"""
|
||||
Validates and assigns nodes to guests based on anti-affinity rules.
|
||||
|
||||
This function iterates over all defined anti-affinity groups in the provided
|
||||
`proxlb_data` and checks if the specified `guest_name` is included in any of
|
||||
these groups. If the guest is included and has not been processed yet, it
|
||||
attempts to assign an unused and non-maintenance node to the guest, ensuring
|
||||
that the anti-affinity rules are respected.
|
||||
|
||||
Parameters:
|
||||
proxlb_data (Dict[str, Any]): The data holding all content of all objects.
|
||||
guest_name (str): The name of the guest to be validated and assigned a node.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Starting: val_anti_affinity.")
|
||||
# Start by iterating over all defined anti-affinity groups
|
||||
for group_name in proxlb_data["groups"]["anti_affinity"].keys():
|
||||
|
||||
# Validate if the provided guest is included in the anti-affinity group
|
||||
if guest_name in proxlb_data["groups"]["anti_affinity"][group_name]['guests'] and not proxlb_data["guests"][guest_name]["processed"]:
|
||||
logger.debug(f"Anti-Affinity: Guest: {guest_name} is included in anti-affinity group: {group_name}.")
|
||||
|
||||
# Iterate over all available nodes
|
||||
for node_name in proxlb_data["nodes"].keys():
|
||||
|
||||
# Only select node if it was not used before and is not in a
|
||||
# maintenance mode. Afterwards, add it to the list of already
|
||||
# used nodes for the current anti-affinity group
|
||||
if node_name not in proxlb_data["groups"]["anti_affinity"][group_name]["used_nodes"]:
|
||||
|
||||
if not proxlb_data["nodes"][node_name]["maintenance"]:
|
||||
# If the node has not been used yet, we assign this node to the guest
|
||||
proxlb_data["meta"]["balancing"]["balance_next_node"] = node_name
|
||||
proxlb_data["groups"]["anti_affinity"][group_name]["used_nodes"].append(node_name)
|
||||
logger.debug(f"Node: {node_name} marked as used for anti-affinity group: {group_name} with guest {guest_name}")
|
||||
break
|
||||
|
||||
else:
|
||||
logger.critical(f"Node: {node_name} already got used for anti-affinity group:: {group_name}. (Tried for guest: {guest_name})")
|
||||
|
||||
else:
|
||||
logger.debug(f"Guest: {guest_name} is not included in anti-affinity group: {group_name}. Skipping.")
|
||||
|
||||
logger.debug("Finished: val_anti_affinity.")
|
||||
|
||||
@staticmethod
|
||||
def val_node_relationship(proxlb_data: Dict[str, Any], guest_name: str):
|
||||
"""
|
||||
Validates and assigns guests to nodes based on defined relationships based on tags.
|
||||
|
||||
Parameters:
|
||||
proxlb_data (Dict[str, Any]): The data holding all content of all objects.
|
||||
guest_name (str): The name of the guest to be validated and assigned a node.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Starting: val_node_relationship.")
|
||||
proxlb_data["guests"][guest_name]["processed"] = True
|
||||
|
||||
if proxlb_data["guests"][guest_name]["node_relationship"]:
|
||||
logger.info(f"Guest '{guest_name}' has a specific relationship defined to node: {proxlb_data['guests'][guest_name]['node_relationship']}. Pinning to node.")
|
||||
|
||||
# Validate if the specified node name is really part of the cluster
|
||||
if proxlb_data['guests'][guest_name]['node_relationship'] in proxlb_data["nodes"].keys():
|
||||
logger.info(f"Guest '{guest_name}' has a specific relationship defined to node: {proxlb_data['guests'][guest_name]['node_relationship']} is a known hypervisor node in the cluster.")
|
||||
# Pin the guest to the specified hypervisor node.
|
||||
proxlb_data["meta"]["balancing"]["balance_next_node"] = proxlb_data['guests'][guest_name]['node_relationship']
|
||||
else:
|
||||
logger.warning(f"Guest '{guest_name}' has a specific relationship defined to node: {proxlb_data['guests'][guest_name]['node_relationship']} but this node name is not known in the cluster!")
|
||||
|
||||
else:
|
||||
logger.info(f"Guest '{guest_name}' does not have any specific node relationships.")
|
||||
|
||||
logger.debug("Finished: val_node_relationship.")
|
||||
|
||||
@staticmethod
|
||||
def update_node_resources(proxlb_data):
|
||||
"""
|
||||
Updates the resource allocation and usage statistics for nodes when a guest
|
||||
is moved from one node to another.
|
||||
|
||||
Parameters:
|
||||
proxlb_data (dict): A dictionary containing information about the nodes and
|
||||
guests, including their resource allocations and usage.
|
||||
|
||||
The function performs the following steps:
|
||||
1. Retrieves the guest name, current node, and target node from the provided data.
|
||||
2. Updates the resource allocations and usage statistics for the target node by
|
||||
adding the resources of the moved guest.
|
||||
3. Updates the resource allocations and usage statistics for the current node by
|
||||
subtracting the resources of the moved guest.
|
||||
4. Logs the start and end of the resource update process, as well as the movement
|
||||
of the guest from the current node to the target node.
|
||||
"""
|
||||
logger.debug("Starting: update_node_resources.")
|
||||
guest_name = proxlb_data["meta"]["balancing"]["balance_next_guest"]
|
||||
node_current = proxlb_data["guests"][guest_name]["node_current"]
|
||||
node_target = proxlb_data["meta"]["balancing"]["balance_next_node"]
|
||||
|
||||
# Update resources for the target node by the moved guest resources
|
||||
# Add assigned resources to the target node
|
||||
proxlb_data["nodes"][node_target]["cpu_assigned"] += proxlb_data["guests"][guest_name]["cpu_total"]
|
||||
proxlb_data["nodes"][node_target]["memory_assigned"] += proxlb_data["guests"][guest_name]["memory_total"]
|
||||
proxlb_data["nodes"][node_target]["disk_assigned"] += proxlb_data["guests"][guest_name]["disk_total"]
|
||||
# Update the assigned percentages of assigned resources for the target node
|
||||
proxlb_data["nodes"][node_target]["cpu_assigned_percent"] = proxlb_data["nodes"][node_target]["cpu_assigned"] / proxlb_data["nodes"][node_target]["cpu_total"] * 100
|
||||
proxlb_data["nodes"][node_target]["memory_assigned_percent"] = proxlb_data["nodes"][node_target]["memory_assigned"] / proxlb_data["nodes"][node_target]["memory_total"] * 100
|
||||
proxlb_data["nodes"][node_target]["disk_assigned_percent"] = proxlb_data["nodes"][node_target]["disk_assigned"] / proxlb_data["nodes"][node_target]["disk_total"] * 100
|
||||
# Add used resources to the target node
|
||||
proxlb_data["nodes"][node_target]["cpu_used"] += proxlb_data["guests"][guest_name]["cpu_used"]
|
||||
proxlb_data["nodes"][node_target]["memory_used"] += proxlb_data["guests"][guest_name]["memory_used"]
|
||||
proxlb_data["nodes"][node_target]["disk_used"] += proxlb_data["guests"][guest_name]["disk_used"]
|
||||
# Update the used percentages of usage resources for the target node
|
||||
proxlb_data["nodes"][node_target]["cpu_used_percent"] = proxlb_data["nodes"][node_target]["cpu_used"] / proxlb_data["nodes"][node_target]["cpu_total"] * 100
|
||||
proxlb_data["nodes"][node_target]["memory_used_percent"] = proxlb_data["nodes"][node_target]["memory_used"] / proxlb_data["nodes"][node_target]["memory_total"] * 100
|
||||
proxlb_data["nodes"][node_target]["disk_used_percent"] = proxlb_data["nodes"][node_target]["disk_used"] / proxlb_data["nodes"][node_target]["disk_total"] * 100
|
||||
|
||||
# Update resources for the current node by the moved guest resources
|
||||
# Add assigned resources to the target node
|
||||
proxlb_data["nodes"][node_current]["cpu_assigned"] -= proxlb_data["guests"][guest_name]["cpu_total"]
|
||||
proxlb_data["nodes"][node_current]["memory_assigned"] -= proxlb_data["guests"][guest_name]["memory_total"]
|
||||
proxlb_data["nodes"][node_current]["disk_assigned"] -= proxlb_data["guests"][guest_name]["disk_total"]
|
||||
# Update the assigned percentages of assigned resources for the target node
|
||||
proxlb_data["nodes"][node_current]["cpu_assigned_percent"] = proxlb_data["nodes"][node_current]["cpu_assigned"] / proxlb_data["nodes"][node_current]["cpu_total"] * 100
|
||||
proxlb_data["nodes"][node_current]["memory_assigned_percent"] = proxlb_data["nodes"][node_current]["memory_assigned"] / proxlb_data["nodes"][node_current]["memory_total"] * 100
|
||||
proxlb_data["nodes"][node_current]["disk_assigned_percent"] = proxlb_data["nodes"][node_current]["disk_assigned"] / proxlb_data["nodes"][node_current]["disk_total"] * 100
|
||||
# Add used resources to the target node
|
||||
proxlb_data["nodes"][node_current]["cpu_used"] -= proxlb_data["guests"][guest_name]["cpu_used"]
|
||||
proxlb_data["nodes"][node_current]["memory_used"] -= proxlb_data["guests"][guest_name]["memory_used"]
|
||||
proxlb_data["nodes"][node_current]["disk_used"] -= proxlb_data["guests"][guest_name]["disk_used"]
|
||||
# Update the used percentages of usage resources for the target node
|
||||
proxlb_data["nodes"][node_current]["cpu_used_percent"] = proxlb_data["nodes"][node_current]["cpu_used"] / proxlb_data["nodes"][node_current]["cpu_total"] * 100
|
||||
proxlb_data["nodes"][node_current]["memory_used_percent"] = proxlb_data["nodes"][node_current]["memory_used"] / proxlb_data["nodes"][node_current]["memory_total"] * 100
|
||||
proxlb_data["nodes"][node_current]["disk_used_percent"] = proxlb_data["nodes"][node_current]["disk_used"] / proxlb_data["nodes"][node_current]["disk_total"] * 100
|
||||
|
||||
# Assign guest to the new target node
|
||||
proxlb_data["guests"][guest_name]["node_target"] = node_target
|
||||
logger.debug(f"Set guest {guest_name} from node {node_current} to node {node_target}.")
|
||||
|
||||
logger.debug("Finished: update_node_resources.")
|
||||
124
proxlb/models/groups.py
Normal file
124
proxlb/models/groups.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
The Groups class is responsible for handling the correlations between the guests
|
||||
and their groups, such as affinity and anti-affinity groups. It ensures proper balancing
|
||||
by grouping guests and evaluating them for further balancing. The class provides methods
|
||||
to initialize with ProxLB data and to generate groups based on guest and node data.
|
||||
"""
|
||||
|
||||
__author__ = "Florian Paul Azim Hoberg <gyptazy>"
|
||||
__copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)"
|
||||
__license__ = "GPL-3.0"
|
||||
|
||||
|
||||
from typing import Dict, Any
|
||||
from utils.logger import SystemdLogger
|
||||
from utils.helper import Helper
|
||||
|
||||
logger = SystemdLogger()
|
||||
|
||||
|
||||
class Groups:
|
||||
"""
|
||||
The groups class is responsible for handling the correlations between the guests
|
||||
and their groups like affinity and anti-affinity groups. To ensure a proper balancing
|
||||
guests will ge grouped and then evaluated for further balancing.
|
||||
|
||||
Methods:
|
||||
__init__(proxlb_data: Dict[str, Any]):
|
||||
Initializes the Groups class.
|
||||
|
||||
get_groups(guests: Dict[str, Any], nodes: Dict[str, Any]) -> Dict[str, Any]:
|
||||
Generates and returns a dictionary of affinity and anti-affinity groups
|
||||
based on the provided data.
|
||||
"""
|
||||
|
||||
def __init__(self, proxlb_data: Dict[str, Any]):
|
||||
"""
|
||||
Initializes the Groups class with the provided ProxLB data.
|
||||
|
||||
Args:
|
||||
proxlb_data (Dict[str, Any]): The data required for balancing VMs and CTs.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_groups(guests: Dict[str, Any], nodes: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generates and returns a dictionary of affinity and anti-affinity groups based on the provided data.
|
||||
|
||||
Args:
|
||||
guests (Dict[str, Any]): A dictionary containing the guest data.
|
||||
nodes (Dict[str, Any]): A dictionary containing the nodes data.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing the created groups that includes:
|
||||
* Affinity groups (or a randon and uniq group)
|
||||
* Anti-affinity groups
|
||||
* A list of guests that are currently placed on a node which
|
||||
is defined to be in maintenance.
|
||||
"""
|
||||
logger.debug("Starting: get_groups.")
|
||||
groups = {'groups': {'affinity': {}, 'anti_affinity': {}, 'maintenance': []}}
|
||||
|
||||
for guest_name, guest_meta in guests["guests"].items():
|
||||
# Create affinity grouping
|
||||
# Use an affinity group if available for the guest
|
||||
if len(guest_meta["affinity_groups"]) > 0:
|
||||
for affinity_group in guest_meta["affinity_groups"]:
|
||||
group_name = affinity_group
|
||||
logger.debug(f'Affinity group {affinity_group} for {guest_name} will be used.')
|
||||
else:
|
||||
# Generate a random uniq group name for the guest if
|
||||
# the guest does not belong to any affinity group
|
||||
random_group = Helper.get_uuid_string()
|
||||
group_name = random_group
|
||||
logger.debug(f'Random uniq group {random_group} for {guest_name} will be used.')
|
||||
|
||||
if not groups["groups"]["affinity"].get(group_name, False):
|
||||
# Create group template with initial guest meta information
|
||||
groups["groups"]["affinity"][group_name] = {}
|
||||
groups["groups"]["affinity"][group_name]["guests"] = []
|
||||
groups["groups"]["affinity"][group_name]["guests"].append(guest_name)
|
||||
groups["groups"]["affinity"][group_name]["counter"] = 1
|
||||
# Create groups resource template by the guests resources
|
||||
groups["groups"]["affinity"][group_name]["cpu_total"] = guest_meta["cpu_total"]
|
||||
groups["groups"]["affinity"][group_name]["cpu_used"] = guest_meta["cpu_used"]
|
||||
groups["groups"]["affinity"][group_name]["memory_total"] = guest_meta["memory_total"]
|
||||
groups["groups"]["affinity"][group_name]["memory_used"] = guest_meta["cpu_used"]
|
||||
groups["groups"]["affinity"][group_name]["disk_total"] = guest_meta["disk_total"]
|
||||
groups["groups"]["affinity"][group_name]["disk_used"] = guest_meta["cpu_used"]
|
||||
else:
|
||||
# Update group templates by guest meta information
|
||||
groups["groups"]["affinity"][group_name]["guests"].append(guest_name)
|
||||
groups["groups"]["affinity"][group_name]["counter"] += 1
|
||||
# Update group resources by guest resources
|
||||
groups["groups"]["affinity"][group_name]["cpu_total"] += guest_meta["cpu_total"]
|
||||
groups["groups"]["affinity"][group_name]["cpu_used"] += guest_meta["cpu_used"]
|
||||
groups["groups"]["affinity"][group_name]["memory_total"] += guest_meta["memory_total"]
|
||||
groups["groups"]["affinity"][group_name]["memory_used"] += guest_meta["cpu_used"]
|
||||
groups["groups"]["affinity"][group_name]["disk_total"] += guest_meta["disk_total"]
|
||||
groups["groups"]["affinity"][group_name]["disk_used"] += guest_meta["cpu_used"]
|
||||
|
||||
# Create anti-affinity grouping
|
||||
if len(guest_meta["anti_affinity_groups"]) > 0:
|
||||
for anti_affinity_group in guest_meta["anti_affinity_groups"]:
|
||||
anti_affinity_group_name = anti_affinity_group
|
||||
logger.debug(f'Anti-affinity group {anti_affinity_group_name} for {guest_name} will be used.')
|
||||
|
||||
if not groups["groups"]["anti_affinity"].get(anti_affinity_group_name, False):
|
||||
groups["groups"]["anti_affinity"][anti_affinity_group_name] = {}
|
||||
groups["groups"]["anti_affinity"][anti_affinity_group_name]["guests"] = []
|
||||
groups["groups"]["anti_affinity"][anti_affinity_group_name]["guests"].append(guest_name)
|
||||
groups["groups"]["anti_affinity"][anti_affinity_group_name]["counter"] = 1
|
||||
groups["groups"]["anti_affinity"][anti_affinity_group_name]["used_nodes"] = []
|
||||
else:
|
||||
groups["groups"]["anti_affinity"][anti_affinity_group_name]["guests"].append(guest_name)
|
||||
groups["groups"]["anti_affinity"][anti_affinity_group_name]["counter"] += 1
|
||||
|
||||
# Create grouping of guests that are currently located on nodes that are
|
||||
# marked as in maintenance and must be migrated
|
||||
if nodes["nodes"][guest_meta["node_current"]]["maintenance"]:
|
||||
logger.debug(f'{guest_name} will be migrated to another node because the underlying node {guest_meta["node_current"]} is defined to be in maintenance.')
|
||||
groups["groups"]["maintenance"].append(guest_name)
|
||||
|
||||
logger.debug("Finished: get_groups.")
|
||||
return groups
|
||||
129
proxlb/models/guests.py
Normal file
129
proxlb/models/guests.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
The Guests class retrieves all running guests on the Proxmox cluster across all available nodes.
|
||||
It handles both VM and CT guest types, collecting their resource metrics.
|
||||
"""
|
||||
|
||||
__author__ = "Florian Paul Azim Hoberg <gyptazy>"
|
||||
__copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)"
|
||||
__license__ = "GPL-3.0"
|
||||
|
||||
|
||||
from typing import Dict, Any
|
||||
from utils.logger import SystemdLogger
|
||||
from models.tags import Tags
|
||||
import time
|
||||
|
||||
logger = SystemdLogger()
|
||||
|
||||
|
||||
class Guests:
|
||||
"""
|
||||
The Guests class retrieves all running guests on the Proxmox cluster across all available nodes.
|
||||
It handles both VM and CT guest types, collecting their resource metrics.
|
||||
|
||||
Methods:
|
||||
__init__:
|
||||
Initializes the Guests class.
|
||||
|
||||
get_guests(proxmox_api: any, nodes: Dict[str, Any]) -> Dict[str, Any]:
|
||||
Retrieves metrics for all running guests (both VMs and CTs) across all nodes in the Proxmox cluster.
|
||||
It collects resource metrics such as CPU, memory, and disk usage, as well as tags and affinity/anti-affinity groups.
|
||||
"""
|
||||
def __init__(self):
|
||||
"""
|
||||
Initializes the Guests class with the provided ProxLB data.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_guests(proxmox_api: any, nodes: Dict[str, Any], meta: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Get metrics of all guests in a Proxmox cluster.
|
||||
|
||||
This method retrieves metrics for all running guests (both VMs and CTs) across all nodes in the Proxmox cluster.
|
||||
It iterates over each node and collects resource metrics for each running guest, including CPU, memory, and disk usage.
|
||||
Additionally, it retrieves tags and affinity/anti-affinity groups for each guest.
|
||||
|
||||
Args:
|
||||
proxmox_api (any): The Proxmox API client instance.
|
||||
nodes (Dict[str, Any]): A dictionary containing information about the nodes in the Proxmox cluster.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary containing metrics and information for all running guests.
|
||||
"""
|
||||
logger.debug("Starting: get_guests.")
|
||||
guests = {"guests": {}}
|
||||
|
||||
# Guest objects are always only in the scope of a node.
|
||||
# Therefore, we need to iterate over all nodes to get all guests.
|
||||
for node in nodes['nodes'].keys():
|
||||
|
||||
# VM objects: Iterate over all VMs on the current node by the qemu API object.
|
||||
# Unlike the nodes we need to keep them even when being ignored to create proper
|
||||
# resource metrics for rebalancing to ensure that we do not overprovisiong the node.
|
||||
for guest in proxmox_api.nodes(node).qemu.get():
|
||||
if guest['status'] == 'running':
|
||||
|
||||
# If the balancing method is set to cpu, we need to wait for the guest to report
|
||||
# cpu usage. This is important for the balancing process to ensure that we do not
|
||||
# wait for a guest for an infinite time.
|
||||
if meta["meta"]["balancing"]["method"] == "cpu":
|
||||
retry_counter = 0
|
||||
while guest['cpu'] == 0 and retry_counter < 10:
|
||||
guest = proxmox_api.nodes(node).qemu(guest['vmid']).status.current.get()
|
||||
logger.debug(f"Guest {guest['name']} (type VM) is reporting {guest['cpu']} cpu usage on retry {retry_counter}.")
|
||||
retry_counter += 1
|
||||
time.sleep(1)
|
||||
|
||||
guests['guests'][guest['name']] = {}
|
||||
guests['guests'][guest['name']]['name'] = guest['name']
|
||||
guests['guests'][guest['name']]['cpu_total'] = int(guest['cpus'])
|
||||
guests['guests'][guest['name']]['cpu_used'] = guest['cpu'] * guest['cpus']
|
||||
guests['guests'][guest['name']]['memory_total'] = guest['maxmem']
|
||||
guests['guests'][guest['name']]['memory_used'] = guest['mem']
|
||||
guests['guests'][guest['name']]['disk_total'] = guest['maxdisk']
|
||||
guests['guests'][guest['name']]['disk_used'] = guest['disk']
|
||||
guests['guests'][guest['name']]['id'] = guest['vmid']
|
||||
guests['guests'][guest['name']]['node_current'] = node
|
||||
guests['guests'][guest['name']]['node_target'] = node
|
||||
guests['guests'][guest['name']]['processed'] = False
|
||||
guests['guests'][guest['name']]['tags'] = Tags.get_tags_from_guests(proxmox_api, node, guest['vmid'], 'vm')
|
||||
guests['guests'][guest['name']]['affinity_groups'] = Tags.get_affinity_groups(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['anti_affinity_groups'] = Tags.get_anti_affinity_groups(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['ignore'] = Tags.get_ignore(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['node_relationship'] = Tags.get_node_relationship(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['type'] = 'vm'
|
||||
|
||||
logger.debug(f"Resources of Guest {guest['name']} (type VM) added: {guests['guests'][guest['name']]}")
|
||||
else:
|
||||
logger.debug(f'Metric for VM {guest["name"]} ignored because VM is not running.')
|
||||
|
||||
# CT objects: Iterate over all VMs on the current node by the lxc API object.
|
||||
# Unlike the nodes we need to keep them even when being ignored to create proper
|
||||
# resource metrics for rebalancing to ensure that we do not overprovisiong the node.
|
||||
for guest in proxmox_api.nodes(node).lxc.get():
|
||||
if guest['status'] == 'running':
|
||||
guests['guests'][guest['name']] = {}
|
||||
guests['guests'][guest['name']]['name'] = guest['name']
|
||||
guests['guests'][guest['name']]['cpu_total'] = int(guest['cpus'])
|
||||
guests['guests'][guest['name']]['cpu_used'] = guest['cpu']
|
||||
guests['guests'][guest['name']]['memory_total'] = guest['maxmem']
|
||||
guests['guests'][guest['name']]['memory_used'] = guest['mem']
|
||||
guests['guests'][guest['name']]['disk_total'] = guest['maxdisk']
|
||||
guests['guests'][guest['name']]['disk_used'] = guest['disk']
|
||||
guests['guests'][guest['name']]['id'] = guest['vmid']
|
||||
guests['guests'][guest['name']]['node_current'] = node
|
||||
guests['guests'][guest['name']]['node_target'] = node
|
||||
guests['guests'][guest['name']]['processed'] = False
|
||||
guests['guests'][guest['name']]['tags'] = Tags.get_tags_from_guests(proxmox_api, node, guest['vmid'], 'ct')
|
||||
guests['guests'][guest['name']]['affinity_groups'] = Tags.get_affinity_groups(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['anti_affinity_groups'] = Tags.get_anti_affinity_groups(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['ignore'] = Tags.get_ignore(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['node_relationship'] = Tags.get_node_relationship(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['type'] = 'ct'
|
||||
|
||||
logger.debug(f"Resources of Guest {guest['name']} (type CT) added: {guests['guests'][guest['name']]}")
|
||||
else:
|
||||
logger.debug(f'Metric for CT {guest["name"]} ignored because CT is not running.')
|
||||
|
||||
logger.debug("Finished: get_guests.")
|
||||
return guests
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user