mirror of
https://github.com/gyptazy/ProxLB.git
synced 2026-04-06 04:41:58 +02:00
Compare commits
87 Commits
feature/au
...
v1.1.7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8a154abde | ||
|
|
554a3eaf72 | ||
|
|
0b35987403 | ||
|
|
d93048db69 | ||
|
|
2aba7dbe23 | ||
|
|
ba388dfd7c | ||
|
|
5aa8257d40 | ||
|
|
99fefe20bf | ||
|
|
b9fb3a60e1 | ||
|
|
88b3288eb7 | ||
|
|
fa0113f112 | ||
|
|
0039ae9093 | ||
|
|
e3bbf31fdd | ||
|
|
bf393c6bbf | ||
|
|
7e5b72cfc7 | ||
|
|
0ba76f80f3 | ||
|
|
b48ff9d677 | ||
|
|
b5c11af474 | ||
|
|
af2992747d | ||
|
|
fb8dc40c16 | ||
|
|
34f1de8367 | ||
|
|
0e992e99de | ||
|
|
f5d073dc02 | ||
|
|
70ba1f2dfc | ||
|
|
c9855f1991 | ||
|
|
9bd29158b9 | ||
|
|
1ff0c5d96e | ||
|
|
3eb4038723 | ||
|
|
47e7dd3c56 | ||
|
|
bb8cf9033d | ||
|
|
756b4efcbd | ||
|
|
8630333e4b | ||
|
|
7bd9a9b038 | ||
|
|
16651351de | ||
|
|
63805f1f50 | ||
|
|
c0ff1b5273 | ||
|
|
07f8596fc5 | ||
|
|
affbe433f9 | ||
|
|
7bda22e754 | ||
|
|
253dcf8eb9 | ||
|
|
6212d23268 | ||
|
|
cf8c06393f | ||
|
|
5c23fd3433 | ||
|
|
0fb732fc8c | ||
|
|
f36d96c72a | ||
|
|
9cc03717ef | ||
|
|
4848887ccc | ||
|
|
04476feeaf | ||
|
|
b3765bf0ae | ||
|
|
806b728a14 | ||
|
|
2c34ec91b1 | ||
|
|
08b746a53b | ||
|
|
615e2f5608 | ||
|
|
fa1e1ad8a3 | ||
|
|
c78def3919 | ||
|
|
54c53b9860 | ||
|
|
1fe8f703cc | ||
|
|
7ba806abf7 | ||
|
|
6b2e120739 | ||
|
|
e4103df326 | ||
|
|
f2acd4efa6 | ||
|
|
f4ed8d9928 | ||
|
|
ba74254b93 | ||
|
|
792a0f3820 | ||
|
|
b766041c4c | ||
|
|
a31e41f839 | ||
|
|
7cb5a31b89 | ||
|
|
617d0a3ae3 | ||
|
|
db3a3b77fc | ||
|
|
5a9643275a | ||
|
|
60d1e333aa | ||
|
|
96dc435cf6 | ||
|
|
263b08b53a | ||
|
|
89102d517e | ||
|
|
845af4abc8 | ||
|
|
3e02403598 | ||
|
|
0b0d569877 | ||
|
|
1cbda2e2f9 | ||
|
|
b6febf1933 | ||
|
|
53a6d2a459 | ||
|
|
6c82ce010b | ||
|
|
4b8b73e468 | ||
|
|
a75729dd6a | ||
|
|
b8792a87af | ||
|
|
c1261a2d3c | ||
|
|
0035f57738 | ||
|
|
b372d361e7 |
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Force type cast cpu count of guests to int for some corner cases where a str got returned (by @gyptazy). [#222]
|
||||
@@ -1 +1 @@
|
||||
date: TBD
|
||||
date: 2025-05-13
|
||||
|
||||
2
.changelogs/1.1.3/189_add_reload_function.yml
Normal file
2
.changelogs/1.1.3/189_add_reload_function.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add relaod (SIGHUP) function to ProxLB to reload the configuration (by @gyptazy). [#189]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Align maintenance mode with Proxmox HA maintenance mode (by @gyptazy). [#232]
|
||||
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add optional wait time parameter to delay execution until the service takes action (by @gyptazy). #239
|
||||
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Make the amount of parallel migrations configurable (by @gyptazy). [#241]
|
||||
@@ -0,0 +1,2 @@
|
||||
changed:
|
||||
- Use the average CPU consumption of a guest within the last 60 minutes instead of the current CPU usage (by @philslab-ninja & @gyptazy). [#94]
|
||||
1
.changelogs/1.1.3/release_meta.yml
Normal file
1
.changelogs/1.1.3/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2025-06-19
|
||||
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Allow pinning of guests to a group of nodes (@gyptazy). [#245]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fixed an issue where balancing was performed in combination of deactivated balancing and dry-run mode (@gyptazy). [#248]
|
||||
2
.changelogs/1.1.4/255_fix_loglevels.yml
Normal file
2
.changelogs/1.1.4/255_fix_loglevels.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Modified log levels to make output lighter at INFO level (@pmarasse) [#255]
|
||||
1
.changelogs/1.1.4/release_meta.yml
Normal file
1
.changelogs/1.1.4/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2025-06-27
|
||||
2
.changelogs/1.1.5/260_allow_custom_api_ports.yml
Normal file
2
.changelogs/1.1.5/260_allow_custom_api_ports.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Allow custom API ports instead of fixed tcp/8006 (@gyptazy). [#260]
|
||||
1
.changelogs/1.1.5/release_meta.yml
Normal file
1
.changelogs/1.1.5/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2025-07-14
|
||||
2
.changelogs/1.1.6/268_fix_balancing_type_eval.yml
Normal file
2
.changelogs/1.1.6/268_fix_balancing_type_eval.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix balancing evaluation of guest types (e.g., VM or CT) (@gyptazy). [#268]
|
||||
2
.changelogs/1.1.6/290_validate_user_token_syntax.yml
Normal file
2
.changelogs/1.1.6/290_validate_user_token_syntax.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add validation for provided API user token id to avoid confusions (@gyptazy). [#291]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix stacktrace output when validating permissions on non existing users in Proxmox (@gyptazy). [#291]
|
||||
@@ -0,0 +1,3 @@
|
||||
fixed:
|
||||
- Fix Overprovisioning first node if anti_affinity_group has only one member (@MiBUl-eu). [#295]
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
fixed:
|
||||
- Validate for node presence when pinning guests to avoid crashing (@gyptazy). [#296]
|
||||
|
||||
1
.changelogs/1.1.6/release_meta.yml
Normal file
1
.changelogs/1.1.6/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2025-09-04
|
||||
2
.changelogs/1.1.7/304_add_graceful_shutdown_sigint.yml
Normal file
2
.changelogs/1.1.7/304_add_graceful_shutdown_sigint.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add graceful shutdown for SIGINT (e.g., CTRL + C abort). (@gyptazy). [#304]
|
||||
@@ -0,0 +1,2 @@
|
||||
added:
|
||||
- Add conntrack state aware migrations of VMs (@gyptazy). [#305]
|
||||
2
.changelogs/1.1.7/308_fix_only_validate_valid_jobids.yml
Normal file
2
.changelogs/1.1.7/308_fix_only_validate_valid_jobids.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix crash when validating absent migration job ids. (@gyptazy). [#308]
|
||||
@@ -0,0 +1,2 @@
|
||||
fixed:
|
||||
- Fix guest object names are not being evaluated in debug log. (@gyptazy). [#310]
|
||||
1
.changelogs/1.1.7/release_meta.yml
Normal file
1
.changelogs/1.1.7/release_meta.yml
Normal file
@@ -0,0 +1 @@
|
||||
date: 2025-09-19
|
||||
@@ -1,2 +0,0 @@
|
||||
added:
|
||||
- Add power management feature for cluster nodes (by @gyptazy) [#141]
|
||||
@@ -1 +0,0 @@
|
||||
date: TBD
|
||||
26
.github/workflows/30-pipeline-build-container-amd64.yml
vendored
Normal file
26
.github/workflows/30-pipeline-build-container-amd64.yml
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
name: "Build Container Image: AMD64"
|
||||
on: [push]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Build amd64 image and save as tar
|
||||
run: |
|
||||
docker buildx build \
|
||||
--platform linux/amd64 \
|
||||
--load \
|
||||
-t proxlb-image:amd64 \
|
||||
.
|
||||
|
||||
docker save proxlb-image:amd64 -o proxlb_image_amd64.tar
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: proxlb-image-amd64
|
||||
path: proxlb_image_amd64.tar
|
||||
26
.github/workflows/30-pipeline-build-container-arm64.yml
vendored
Normal file
26
.github/workflows/30-pipeline-build-container-arm64.yml
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
name: "Build Container Image: ARM64"
|
||||
on: [push]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Build arm64 image and save as tar
|
||||
run: |
|
||||
docker buildx build \
|
||||
--platform linux/arm64 \
|
||||
--load \
|
||||
-t proxlb-image:arm64 \
|
||||
.
|
||||
|
||||
docker save proxlb-image:arm64 -o proxlb_image_arm64.tar
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: proxlb-image-arm64
|
||||
path: proxlb_image_arm64.tar
|
||||
23
.github/workflows/30-pipeline-build-container-multi-arch.yml
vendored
Normal file
23
.github/workflows/30-pipeline-build-container-multi-arch.yml
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
name: "Build Container Image: Multiarch"
|
||||
on: [push]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Build multi-arch image and save as tar
|
||||
run: |
|
||||
docker buildx build \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
--output type=tar,dest=proxlb_image_multiarch.tar \
|
||||
.
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: proxlb-image-multiarch
|
||||
path: proxlb_image_multiarch.tar
|
||||
81
CHANGELOG.md
81
CHANGELOG.md
@@ -5,6 +5,87 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
|
||||
## [1.1.7] - 2025-09-19
|
||||
|
||||
### Added
|
||||
|
||||
- Add conntrack state aware migrations of VMs (@gyptazy). [#305]
|
||||
- Add graceful shutdown for SIGINT (e.g., CTRL + C abort). (@gyptazy). [#304]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix crash when validating absent migration job ids. (@gyptazy). [#308]
|
||||
- Fix guest object names are not being evaluated in debug log. (@gyptazy). [#310]
|
||||
|
||||
## [1.1.6.1] - 2025-09-04
|
||||
|
||||
### Fixed
|
||||
|
||||
- Validate for node presence when pinning VMs to avoid crashing (@gyptazy). [#296]
|
||||
|
||||
## [1.1.6] - 2025-09-04
|
||||
|
||||
### Added
|
||||
|
||||
- Add validation for provided API user token id to avoid confusions (@gyptazy). [#291]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix stacktrace output when validating permissions on non existing users in Proxmox (@gyptazy). [#291]
|
||||
- Fix Overprovisioning first node if anti_affinity_group has only one member (@MiBUl-eu). [#295]
|
||||
- Validate for node presence when pinning guests to avoid crashing (@gyptazy). [#296]
|
||||
- Fix balancing evaluation of guest types (e.g., VM or CT) (@gyptazy). [#268]
|
||||
|
||||
## [1.1.5] - 2025-07-14
|
||||
|
||||
### Added
|
||||
|
||||
- Allow custom API ports instead of fixed tcp/8006 (@gyptazy). [#260]
|
||||
|
||||
|
||||
## [1.1.4] - 2025-06-27
|
||||
|
||||
### Added
|
||||
|
||||
- Allow pinning of guests to a group of nodes (@gyptazy). [#245]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Modified log levels to make output lighter at INFO level (@pmarasse) [#255]
|
||||
- Fixed an issue where balancing was performed in combination of deactivated balancing and dry-run mode (@gyptazy). [#248]
|
||||
|
||||
|
||||
## [1.1.3] - 2025-06-19
|
||||
|
||||
### Added
|
||||
|
||||
- Add relaod (SIGHUP) function to ProxLB to reload the configuration (by @gyptazy). [#189]
|
||||
- Add optional wait time parameter to delay execution until the service takes action (by @gyptazy). [#239]
|
||||
- Make the amount of parallel migrations configurable (by @gyptazy). [#241]
|
||||
|
||||
### Changed
|
||||
|
||||
- Use the average CPU consumption of a guest within the last 60 minutes instead of the current CPU usage (by @philslab-ninja & @gyptazy). [#94]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Align maintenance mode with Proxmox HA maintenance mode (by @gyptazy). [#232]
|
||||
|
||||
|
||||
## [1.1.2] - 2025-05-13
|
||||
|
||||
### Added
|
||||
|
||||
- Add a configurable retry mechanism when connecting to the Proxmox API (by @gyptazy) [#157]
|
||||
- Add 1-to-1 relationships between guest and hypervisor node to ping a guest on a node (by @gyptazy) [#218]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Force type cast cpu count of guests to int for some corner cases where a str got returned (by @gyptazy). [#222]
|
||||
- Fix systemd unit file to run after network on non PVE nodes (by @robertdahlem) [#137]
|
||||
|
||||
|
||||
## [1.1.1] - 2025-04-20
|
||||
|
||||
### Added
|
||||
|
||||
88
README.md
88
README.md
@@ -1,5 +1,5 @@
|
||||
# ProxLB - (Re)Balance VM Workloads in Proxmox Clusters
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/Prox-LB-logo.jpg"/>
|
||||
<img align="left" src="https://cdn.gyptazy.com/img/ProxLB.jpg"/>
|
||||
<br>
|
||||
|
||||
<p float="center"><img src="https://img.shields.io/github/license/gyptazy/ProxLB"/><img src="https://img.shields.io/github/contributors/gyptazy/ProxLB"/><img src="https://img.shields.io/github/last-commit/gyptazy/ProxLB/main"/><img src="https://img.shields.io/github/issues-raw/gyptazy/ProxLB"/><img src="https://img.shields.io/github/issues-pr/gyptazy/ProxLB"/></p>
|
||||
@@ -46,29 +46,28 @@ Overall, ProxLB significantly enhances resource management by intelligently dist
|
||||
<img src="https://cdn.gyptazy.com/images/proxlb-rebalancing-demo.gif"/>
|
||||
|
||||
## Features
|
||||
ProxLB's key features are by enabling automatic rebalancing of VMs and CTs across a Proxmox cluster based on memory, CPU, and local disk usage while identifying optimal nodes for automation. It supports maintenance mode, affinity rules, and seamless Proxmox API integration with ACL support, offering flexible usage as a one-time operation, a daemon, or through the Proxmox Web GUI. In addition, ProxLB also supports additional enterprise alike features like power managements for nodes (often also known as DPM) where nodes can be turned on/off on demand when workloads are higher/lower than usual. Also the automated security-patching of nodes within the cluster (known as ASPM) may help to reduce the manual work from cluster admins, where nodes will install patches, move guests across the cluster, reboot and then reblance the cluster again.
|
||||
ProxLB's key features are by enabling automatic rebalancing of VMs and CTs across a Proxmox cluster based on memory, CPU, and local disk usage while identifying optimal nodes for automation. It supports maintenance mode, affinity rules, and seamless Proxmox API integration with ACL support, offering flexible usage as a one-time operation, a daemon, or through the Proxmox Web GUI.
|
||||
|
||||
**Features**
|
||||
* Re-Balancing (DRS)
|
||||
* Supporting VMs & CTs
|
||||
* Balancing by:
|
||||
* CPU
|
||||
* Memory
|
||||
* Disk
|
||||
* Affinity / Anti-Affinity Rules
|
||||
* Affinity: Groups guests together
|
||||
* Anti-Affinity: Ensuring guests run on different nodes
|
||||
* Best node evaluation
|
||||
* Get the best node for guest placement (e.g., CI/CD)
|
||||
* Rebalance VMs/CTs in the cluster by:
|
||||
* Memory
|
||||
* Disk (only local storage)
|
||||
* CPU
|
||||
* Get best nodes for further automation
|
||||
* Supported Guest Types
|
||||
* VMs
|
||||
* CTs
|
||||
* Maintenance Mode
|
||||
* Evacuating a sinlge or multiple nodes
|
||||
* Node Power Management (DPM)
|
||||
* Auto Node Security-Patch-Management (ASPM)
|
||||
* Set node(s) into maintenance
|
||||
* Move all workloads to different nodes
|
||||
* Affinity / Anti-Affinity Rules
|
||||
* Fully based on Proxmox API
|
||||
* Utilizing the Proxmox User Authentications
|
||||
* Supporting API tokens
|
||||
* No SSH or Agents required
|
||||
* Can run everywhere
|
||||
* Fully integrated into the Proxmox ACL
|
||||
* No SSH required
|
||||
* Usage
|
||||
* One-Time
|
||||
* Daemon
|
||||
* Proxmox Web GUI Integration
|
||||
|
||||
## How does it work?
|
||||
ProxLB is a load-balancing system designed to optimize the distribution of virtual machines (VMs) and containers (CTs) across a cluster. It works by first gathering resource usage metrics from all nodes in the cluster through the Proxmox API. This includes detailed resource metrics for each VM and CT on every node. ProxLB then evaluates the difference between the maximum and minimum resource usage of the nodes, referred to as "Balanciness." If this difference exceeds a predefined threshold (which is configurable), the system initiates the rebalancing process.
|
||||
@@ -78,6 +77,10 @@ Before starting any migrations, ProxLB validates that rebalancing actions are ne
|
||||
## Installation
|
||||
|
||||
### Requirements / Dependencies
|
||||
* Proxmox
|
||||
* Proxmox 7.x
|
||||
* Proxmox 8.x
|
||||
* Proxmox 9.x
|
||||
* Python3.x
|
||||
* proxmoxer
|
||||
* requests
|
||||
@@ -131,7 +134,7 @@ wget -O /etc/apt/trusted.gpg.d/proxlb.asc https://repo.gyptazy.com/repository.gp
|
||||
|
||||
#### Debian Packages (.deb files)
|
||||
If you do not want to use the repository you can also find the debian packages as a .deb file on gyptazy's CDN at:
|
||||
* https://cdn.gyptazy.com/files/os/debian/proxlb/
|
||||
* https://cdn.gyptazy.com/debian/
|
||||
|
||||
Afterwards, you can simply install the package by running:
|
||||
```bash
|
||||
@@ -162,6 +165,13 @@ docker run -it --rm -v $(pwd)/proxlb.yaml:/etc/proxlb/proxlb.yaml proxlb
|
||||
| Version | Image |
|
||||
|------|:------:|
|
||||
| latest | cr.gyptazy.com/proxlb/proxlb:latest |
|
||||
| v1.1.7 | cr.gyptazy.com/proxlb/proxlb:v1.1.7 |
|
||||
| v1.1.6.1 | cr.gyptazy.com/proxlb/proxlb:v1.1.6.1 |
|
||||
| v1.1.6 | cr.gyptazy.com/proxlb/proxlb:v1.1.6 |
|
||||
| v1.1.5 | cr.gyptazy.com/proxlb/proxlb:v1.1.5 |
|
||||
| v1.1.4 | cr.gyptazy.com/proxlb/proxlb:v1.1.4 |
|
||||
| v1.1.3 | cr.gyptazy.com/proxlb/proxlb:v1.1.3 |
|
||||
| v1.1.2 | cr.gyptazy.com/proxlb/proxlb:v1.1.2 |
|
||||
| v1.1.1 | cr.gyptazy.com/proxlb/proxlb:v1.1.1 |
|
||||
| v1.1.0 | cr.gyptazy.com/proxlb/proxlb:v1.1.0 |
|
||||
| v1.0.6 | cr.gyptazy.com/proxlb/proxlb:v1.0.6 |
|
||||
@@ -238,7 +248,7 @@ The following options can be set in the configuration file `proxlb.yaml`:
|
||||
| Section | Option | Sub Option | Example | Type | Description |
|
||||
|---------|:------:|:----------:|:-------:|:----:|:-----------:|
|
||||
| `proxmox_api` | | | | | |
|
||||
| | hosts | | ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe'] | `List` | List of Proxmox nodes. Can be IPv4, IPv6 or mixed. |
|
||||
| | hosts | | ['virt01.example.com', '10.10.10.10', 'fe01:bad:code::cafe', 'virt01.example.com:443', '[fc00::1]', '[fc00::1]:443', 'fc00::1:8006'] | `List` | List of Proxmox nodes. Can be IPv4, IPv6 or mixed. You can specify custom ports. In case of IPv6 without brackets the port is considered after the last colon |
|
||||
| | user | | root@pam | `Str` | Username for the API. |
|
||||
| | pass | | FooBar | `Str` | Password for the API. (Recommended: Use API token authorization!) |
|
||||
| | token_id | | proxlb | `Str` | Token ID of the user for the API. |
|
||||
@@ -248,38 +258,38 @@ The following options can be set in the configuration file `proxlb.yaml`:
|
||||
| | retries | | 1 | `Int` | How often a connection attempt to the defined API host should be performed. |
|
||||
| | wait_time | | 1 | `Int` | How many seconds should be waited before performing another connection attempt to the API host. |
|
||||
| `proxmox_cluster` | | | | | |
|
||||
| | maintenance_nodes | | ['virt66.example.com'] | `List` | A list of Proxmox nodes that are defined to be in a maintenance. |
|
||||
| | maintenance_nodes | | ['virt66.example.com'] | `List` | A list of Proxmox nodes that are defined to be in a maintenance. (must be the same node names as used within the cluster) |
|
||||
| | ignore_nodes | | [] | `List` | A list of Proxmox nodes that are defined to be ignored. |
|
||||
| | overprovisioning | | False | `Bool` | Avoids balancing when nodes would become overprovisioned. |
|
||||
| `balancing` | | | | | |
|
||||
| | enable | | True | `Bool` | Enables the guest balancing.|
|
||||
| | enforce_affinity | | True | `Bool` | Enforcing affinity/anti-affinity rules but balancing might become worse. |
|
||||
| | parallel | | False | `Bool` | If guests should be moved in parallel or sequentially.|
|
||||
| | parallel_jobs | | 5 | `Int` | The amount if parallel jobs when migrating guests. (default: `5`)|
|
||||
| | live | | True | `Bool` | If guests should be moved live or shutdown.|
|
||||
| | with_local_disks | | True | `Bool` | If balancing of guests should include local disks.|
|
||||
| | with_conntrack_state | | True | `Bool` | If balancing of guests should including the conntrack state.|
|
||||
| | balance_types | | ['vm', 'ct'] | `List` | Defined the types of guests that should be honored. [values: `vm`, `ct`]|
|
||||
| | max_job_validation | | 1800 | `Int` | How long a job validation may take in seconds. (default: 1800) |
|
||||
| | balanciness | | 10 | `Int` | The maximum delta of resource usage between node with highest and lowest usage. |
|
||||
| | method | | memory | `Str` | The balancing method that should be used. [values: `memory` (default), `cpu`, `disk`]|
|
||||
| | mode | | used | `Str` | The balancing mode that should be used. [values: `used` (default), `assigned`] |
|
||||
| `dpm` | | | | | |
|
||||
| | enable | | True | `Bool` | Enables the Dynamic Power Management functions.|
|
||||
| | method | | memory | `Str` | The balancing method that should be used. [values: `memory` (default), `cpu`, `disk`]|
|
||||
| | mode | | static | `Str` | The balancing mode that should be used. [values: `static` (default), `auto`] |
|
||||
| | cluster_min_free_resources | | 60 | `Int` | Representing the minimum required free resouzrces in percent within the cluster. [values: `60`% (default)] |
|
||||
| | cluster_min_nodes | | 3 | `Int` | The minimum of required nodes that should remain in a cluster. [values: `3` (default)] |
|
||||
| `service` | | | | | |
|
||||
| | daemon | | True | `Bool` | If daemon mode should be activated. |
|
||||
| | `schedule` | | | `Dict` | Schedule config block for rebalancing. |
|
||||
| | | interval | 12 | `Int` | How often rebalancing should occur in daemon mode.|
|
||||
| | | format | hours | `Str` | Sets the time format. [values: `hours` (default), `minutes`]|
|
||||
| | `delay` | | | `Dict` | Schedule config block for an optional delay until the service starts. |
|
||||
| | | enable | False | `Bool` | If a delay time should be validated.|
|
||||
| | | time | 1 | `Int` | Delay time until the service starts after the initial execution.|
|
||||
| | | format | hours | `Str` | Sets the time format. [values: `hours` (default), `minutes`]|
|
||||
| | log_level | | INFO | `Str` | Defines the default log level that should be logged. [values: `INFO` (default), `WARNING`, `CRITICAL`, `DEBUG`] |
|
||||
|
||||
|
||||
An example of the configuration file looks like:
|
||||
```
|
||||
proxmox_api:
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe']
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01:bad:code::cafe']
|
||||
user: root@pam
|
||||
pass: crazyPassw0rd!
|
||||
# API Token method
|
||||
@@ -302,26 +312,22 @@ balancing:
|
||||
parallel: False
|
||||
live: True
|
||||
with_local_disks: True
|
||||
with_conntrack_state: True
|
||||
balance_types: ['vm', 'ct']
|
||||
max_job_validation: 1800
|
||||
balanciness: 5
|
||||
method: memory
|
||||
mode: used
|
||||
|
||||
dpm:
|
||||
# DPM requires you to define the WOL (Wake-on-Lan)
|
||||
# MAC address for each node in Proxmox.
|
||||
enable: True
|
||||
method: memory
|
||||
mode: static
|
||||
cluster_min_free_resources: 60
|
||||
cluster_min_nodes: 1
|
||||
|
||||
service:
|
||||
daemon: True
|
||||
schedule:
|
||||
interval: 12
|
||||
format: hours
|
||||
delay:
|
||||
enable: False
|
||||
time: 1
|
||||
format: hours
|
||||
log_level: INFO
|
||||
```
|
||||
|
||||
@@ -382,7 +388,7 @@ As a result, ProxLB will not migrate this guest with the `plb_ignore_dev` tag to
|
||||
**Note:** Ignored guests are really ignored. Even by enforcing affinity rules this guest will be ignored.
|
||||
|
||||
### Pin VMs to Specific Hypervisor Nodes
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-tag-node-pinning.jpg"/> Guests, such as VMs or CTs, can also be pinned to specific nodes in the cluster. This might be usefull when running applications with some special licensing requirements that are only fulfilled on certain nodes. It might also be interesting, when some physical hardware is attached to a node, that is not available in general within the cluster.
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-tag-node-pinning.jpg"/> Guests, such as VMs or CTs, can also be pinned to specific (and multiple) nodes in the cluster. This might be usefull when running applications with some special licensing requirements that are only fulfilled on certain nodes. It might also be interesting, when some physical hardware is attached to a node, that is not available in general within the cluster.
|
||||
|
||||
To pin a guest to a specific cluster node, users assign a tag with the prefix `plb_pin_$nodename` to the desired guest:
|
||||
|
||||
@@ -393,6 +399,8 @@ plb_pin_node03
|
||||
|
||||
As a result, ProxLB will pin the guest `dev-vm01` to the node `virt03`.
|
||||
|
||||
You can also repeat this step multiple times for different node names to create a potential group of allowed hosts where a the guest may be served on. In this case, ProxLB takes the node with the lowest used resources according to the defined balancing values from this group.
|
||||
|
||||
**Note:** The given node names from the tag are validated. This means, ProxLB validated if the given node name is really part of the cluster. In case of a wrongly defined or unavailable node name it continous to use the regular processes to make sure the guest keeps running.
|
||||
|
||||
## Maintenance
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
proxmox_api:
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe']
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01:bad:code::cafe']
|
||||
user: root@pam
|
||||
pass: crazyPassw0rd!
|
||||
# API Token method
|
||||
@@ -20,24 +20,25 @@ balancing:
|
||||
enable: True
|
||||
enforce_affinity: False
|
||||
parallel: False
|
||||
# If running parallel job, you can define
|
||||
# the amount of prallel jobs (default: 5)
|
||||
parallel_jobs: 1
|
||||
live: True
|
||||
with_local_disks: True
|
||||
with_conntrack_state: True
|
||||
balance_types: ['vm', 'ct']
|
||||
max_job_validation: 1800
|
||||
balanciness: 5
|
||||
method: memory
|
||||
mode: used
|
||||
|
||||
dpm:
|
||||
enable: True
|
||||
method: memory
|
||||
mode: static
|
||||
cluster_min_free_resources: 60
|
||||
cluster_min_nodes: 1
|
||||
|
||||
service:
|
||||
daemon: True
|
||||
schedule:
|
||||
interval: 12
|
||||
format: hours
|
||||
delay:
|
||||
enable: False
|
||||
time: 1
|
||||
format: hours
|
||||
log_level: INFO
|
||||
|
||||
59
debian/changelog
vendored
59
debian/changelog
vendored
@@ -1,8 +1,61 @@
|
||||
proxlb (1.1.2~b1) stable; urgency=medium
|
||||
proxlb (1.1.7) stable; urgency=medium
|
||||
|
||||
* Auto-created 1.1.2 beta 1 release.
|
||||
* Add conntrack state aware migrations of VMs. (Closes: #305)
|
||||
* Add graceful shutdown for SIGINT command. (Closes: #304)
|
||||
* Fix crash when validating absent migration job ids. (Closes: #308)
|
||||
* Fix guest object names are not being evaluated in debug log. (Closes: #310)
|
||||
* Note: Have a great Dutch Proxmox Day 2025!
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Mon, 17 Mar 2025 18:55:02 +0000
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Thu, 04 Sep 2025 19:23:51 +0000
|
||||
|
||||
proxlb (1.1.6.1) stable; urgency=medium
|
||||
|
||||
* Validate for node presence when pinning VMs to avoid crashing. (Closes: #296)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Thu, 04 Sep 2025 19:23:51 +0000
|
||||
|
||||
proxlb (1.1.6) stable; urgency=medium
|
||||
|
||||
* Add validation for provided API user token id to avoid confusions. (Closes: #291)
|
||||
* Fix stacktrace output when validating permissions on non existing users in Proxmox. (Closes: #291)
|
||||
* Fix Overprovisioning first node if anti_affinity_group has only one member. (Closes: #295)
|
||||
* Validate for node presence when pinning guests to avoid crashing. (Closes: #296)
|
||||
* Fix balancing evaluation of guest types (e.g., VM or CT). (Closes: #268)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Thu, 04 Sep 2025 05:12:19 +0000
|
||||
|
||||
proxlb (1.1.5) stable; urgency=medium
|
||||
|
||||
* Allow custom API ports instead of fixed tcp/8006. (Closes: #260)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Mon, 14 Jul 2025 11:07:34 +0000
|
||||
|
||||
proxlb (1.1.4) stable; urgency=medium
|
||||
|
||||
* Allow pinning of guests to a group of nodes. (Closes: #245)
|
||||
* Modified log levels to make output lighter at INFO level. (Closes: #255)
|
||||
* ixed an issue where balancing was performed in combination of deactivated balancing and dry-run mode. (Closes: #248)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Fri, 27 Jun 2025 16:22:58 +0000
|
||||
|
||||
proxlb (1.1.3) stable; urgency=medium
|
||||
|
||||
* Add relaod (SIGHUP) function to ProxLB to reload the configuration. (Closes: #189)
|
||||
* Add optional wait time parameter to delay execution until the service takes action. (Closes: #239)
|
||||
* Make the amount of parallel migrations configurable. (Closes: #241)
|
||||
* Use the average CPU consumption of a guest within the last 60 minutes instead of the current CPU usage. (Closes: #94)
|
||||
* Align maintenance mode with Proxmox HA maintenance mode. (Closes: #232)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Thu, 19 Jun 2025 09:10:43 +0000
|
||||
|
||||
proxlb (1.1.2) stable; urgency=medium
|
||||
|
||||
* Add a configurable retry mechanism when connecting to the Proxmox API. (Closed: #157)
|
||||
* Add 1-to-1 relationships between guest and hypervisor node to ping a guest on a node. (Closes #218)
|
||||
* Force type cast cpu count of guests to int for some corner cases where a str got returned. (Closed #222)
|
||||
* Fix systemd unit file to run after network on non PVE nodes. (Closes #137)
|
||||
|
||||
-- Florian Paul Azim Hoberg <gyptazy@gyptazy.com> Mon, 13 May 2025 18:12:04 +0000
|
||||
|
||||
proxlb (1.1.1) stable; urgency=medium
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
6. [Parallel Migrations](#parallel-migrations)
|
||||
7. [Run as a Systemd-Service](#run-as-a-systemd-service)
|
||||
8. [SSL Self-Signed Certificates](#ssl-self-signed-certificates)
|
||||
9. [Dynamic Power Management (DPM)](#dynamic-power-management)
|
||||
9. [Node Maintenances](#node-maintenances)
|
||||
|
||||
## Authentication / User Accounts / Permissions
|
||||
### Authentication
|
||||
@@ -138,14 +138,20 @@ plb_pin_node03
|
||||
|
||||
As a result, ProxLB will pin the guest `dev-vm01` to the node `virt03`.
|
||||
|
||||
You can also repeat this step multiple times for different node names to create a potential group of allowed hosts where a the guest may be served on. In this case, ProxLB takes the node with the lowest used resources according to the defined balancing values from this group.
|
||||
|
||||
**Note:** The given node names from the tag are validated. This means, ProxLB validated if the given node name is really part of the cluster. In case of a wrongly defined or unavailable node name it continous to use the regular processes to make sure the guest keeps running.
|
||||
|
||||
### API Loadbalancing
|
||||
ProxLB supports API loadbalancing, where one or more host objects can be defined as a list. This ensures, that you can even operator ProxLB without further changes when one or more nodes are offline or in a maintenance. When defining multiple hosts, the first reachable one will be picked.
|
||||
ProxLB supports API loadbalancing, where one or more host objects can be defined as a list. This ensures, that you can even operator ProxLB without further changes when one or more nodes are offline or in a maintenance. When defining multiple hosts, the first reachable one will be picked. You can speficy custom ports in the list. There are 4 ways of defining hosts with ports:
|
||||
1. Hostname of IPv4 without port (in this case the default 8006 will be used)
|
||||
2. Hostname or IPv4 with port
|
||||
3. IPv6 in brackets with optional port
|
||||
4. IPv6 without brackets, in this case the port is assumed after last colon
|
||||
|
||||
```
|
||||
proxmox_api:
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe']
|
||||
hosts: ['virt01.example.com', '10.10.10.10', 'fe01::bad:code::cafe', 'virt01.example.com:443', '[fc00::1]', '[fc00::1]:443', 'fc00::1:8006']
|
||||
```
|
||||
|
||||
### Ignore Host-Nodes or Guests
|
||||
@@ -210,32 +216,23 @@ proxmox_api:
|
||||
|
||||
*Note: Disabling SSL certificate validation is not recommended.*
|
||||
|
||||
### Dynamic Power Management (DPM)
|
||||
<img align="left" src="https://cdn.gyptazy.com/images/proxlb-proxmox-node-wakeonlan-wol-mac-dpm.jpg"/> Configuring Dynamic Power Management (DPM) in ProxLB within a Proxmox cluster involves a few critical steps to ensure proper operation. The first consideration is that any node intended for automatic shutdown and startup must support Wake-on-LAN (WOL). This is essential because DPM relies on the ability to power nodes back on remotely. For this to work, the ProxLB instance must be able to reach the target node’s MAC address directly over the network.
|
||||
### Node Maintenances
|
||||
To exclude specific nodes from receiving any new workloads during the balancing process, the `maintenance_nodes` configuration option can be used. This option allows administrators to define a list of nodes that are currently undergoing maintenance or should otherwise not be used for running virtual machines or containers.
|
||||
|
||||
To make this possible, you must configure the correct MAC address for WOL within the Proxmox web interface. This is done by selecting the node, going to the “System” section, then “Options,” and finally setting the “MAC address for Wake-on-LAN.” Alternatively, this value can also be submitted using the Proxmox API. Without this MAC address in place, ProxLB will not allow the node to be shut down. This restriction is in place to prevent nodes from being turned off without a way to bring them back online, which could lead to service disruption. By ensuring that each node has a valid WOL MAC address configured, DPM can operate safely and effectively, allowing ProxLB to manage the cluster’s power consumption dynamically.
|
||||
```yaml
|
||||
maintenance_nodes:
|
||||
- virt66.example.com
|
||||
```
|
||||
|
||||
#### Requirements
|
||||
Using the powermanagement feature within clusters comes along with several requirements:
|
||||
* ProxLB needs to reach the WOL-Mac address of the node (plain network)
|
||||
* WOL must be enabled of the node in general (BIOS/UEFI)
|
||||
* The related WOL network interface must be defined
|
||||
* The related WOL network interface MAC address must be defined in Proxmox for the node
|
||||
which can also be written as:
|
||||
|
||||
#### Options
|
||||
| Section | Option | Sub Option | Example | Type | Description |
|
||||
|---------|:------:|:----------:|:-------:|:----:|:-----------:|
|
||||
| `dpm` | | | | | |
|
||||
| | enable | | True | `Bool` | Enables the Dynamic Power Management functions.|
|
||||
| | method | | memory | `Str` | The balancing method that should be used. [values: `memory` (default), `cpu`, `disk`]|
|
||||
| | mode | | static | `Str` | The balancing mode that should be used. [values: `static` (default), `auto`] |
|
||||
| | cluster_min_free_resources | | 60 | `Int` | Representing the minimum required free resouzrces in percent within the cluster. [values: `60`% (default)] |
|
||||
| | cluster_min_nodes | | 3 | `Int` | The minimum of required nodes that should remain in a cluster. [values: `3` (default)] |
|
||||
```yaml
|
||||
maintenance_nodes: ['virt66.example.com']
|
||||
```
|
||||
|
||||
#### DPM Modes
|
||||
##### Static
|
||||
Static mode in DPM lets you set a fixed number of nodes that should always stay powered on in a Proxmox cluster. This is important to keep the cluster working properly, since you need at least three nodes to maintain quorum. The system won’t let you go below that limit to avoid breaking cluster functionality.
|
||||
The maintenance_nodes key must be defined as a list, even if it only includes a single node. Each entry in the list must exactly match the node name as it is known within the Proxmox VE cluster. Do not use IP addresses, alternative DNS names, or aliases—only the actual cluster node names are valid. Once a node is marked as being in maintenance mode:
|
||||
|
||||
Besides the minimum number of active nodes, you can also define a baseline for how many free resources—like CPU or RAM—should always be available when the virtual machines are running. If the available resources drop below that level, ProxLB will try to power on more nodes, as long as they're available and can be started. On the other hand, if the cluster has more than enough resources, ProxLB will begin to shut down nodes again, but only until the free resource threshold is reached.
|
||||
* No new workloads will be balanced or migrated onto it.
|
||||
* Any existing workloads currently running on the node will be migrated away in accordance with the configured balancing strategies, assuming resources on other nodes allow.
|
||||
|
||||
This mode gives you a more stable setup by always keeping a minimum number of nodes ready while still adjusting the rest of the cluster based on resource usage, but in a controlled and predictable way.
|
||||
This feature is particularly useful during planned maintenance, upgrades, or troubleshooting, ensuring that services continue to run with minimal disruption while the specified node is being worked on.
|
||||
6
helm/proxlb/Chart.yaml
Normal file
6
helm/proxlb/Chart.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: v3
|
||||
name: proxlb
|
||||
description: A Helm chart for self-hosted ProxLB
|
||||
type: application
|
||||
version: "1.1.7"
|
||||
appVersion: "v1.1.7"
|
||||
13
helm/proxlb/templates/_helpers.yaml
Normal file
13
helm/proxlb/templates/_helpers.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
{{- define "proxlb.fullname" -}}
|
||||
{{- printf "%s-%s" .Release.Name .Chart.Name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{ define "proxlb.labels" }}
|
||||
app.kubernetes.io/name: {{ .Release.Name }}
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion }}
|
||||
app.kubernetes.io/component: proxlb
|
||||
{{- if .Values.labels }}
|
||||
{{ toYaml .Values.labels }}
|
||||
{{- end }}
|
||||
{{ end }}
|
||||
11
helm/proxlb/templates/configmap.yaml
Normal file
11
helm/proxlb/templates/configmap.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
{{- if .Values.configmap.create }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: proxlb-config
|
||||
labels:
|
||||
{{- include "proxlb.labels" . | nindent 4 }}
|
||||
data:
|
||||
proxlb.yaml: |
|
||||
{{ toYaml .Values.configmap.config | indent 4 }}
|
||||
{{ end }}
|
||||
44
helm/proxlb/templates/deployment.yaml
Normal file
44
helm/proxlb/templates/deployment.yaml
Normal file
@@ -0,0 +1,44 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ .Release.Name }}
|
||||
labels:
|
||||
{{- include "proxlb.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: 1 # Number of replicas cannot be more than 1
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "proxlb.labels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "proxlb.labels" . | nindent 8 }}
|
||||
spec:
|
||||
{{- with .Values.image.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
# not interacting with the k8s cluster
|
||||
automountServiceAccountToken: False
|
||||
containers:
|
||||
- name: proxlb
|
||||
image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
args:
|
||||
{{- if .Values.extraArgs.dryRun }}
|
||||
- --dry-run
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/proxlb/proxlb.yaml
|
||||
subPath: proxlb.yaml
|
||||
{{ if .Values.resources }}
|
||||
resources:
|
||||
{{ with .Values.resources }}
|
||||
{{ toYaml . | nindent 10 }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: proxlb-config
|
||||
61
helm/proxlb/values.yaml
Normal file
61
helm/proxlb/values.yaml
Normal file
@@ -0,0 +1,61 @@
|
||||
image:
|
||||
registry: cr.gyptazy.com
|
||||
repository: proxlb/proxlb
|
||||
tag: v1.1.7
|
||||
pullPolicy: IfNotPresent
|
||||
imagePullSecrets: [ ]
|
||||
|
||||
resources:
|
||||
limits:
|
||||
cpu: "1000m"
|
||||
memory: "2Gi"
|
||||
requests:
|
||||
cpu: "100m"
|
||||
memory: "100Mi"
|
||||
|
||||
labels: {}
|
||||
|
||||
extraArgs:
|
||||
dryRun: false
|
||||
|
||||
configmap:
|
||||
create: true
|
||||
config:
|
||||
proxmox_api:
|
||||
hosts: []
|
||||
#Can be either a user or a token
|
||||
# user: ""
|
||||
# pass: ""
|
||||
# token_id: ""
|
||||
# token_secret: ""
|
||||
ssl_verification: True
|
||||
timeout: 10
|
||||
proxmox_cluster:
|
||||
maintenance_nodes: [ ]
|
||||
ignore_nodes: [ ]
|
||||
overprovisioning: True
|
||||
balancing:
|
||||
enable: True
|
||||
enforce_affinity: False
|
||||
parallel: False
|
||||
# If running parallel job, you can define
|
||||
# the amount of prallel jobs (default: 5)
|
||||
parallel_jobs: 1
|
||||
live: True
|
||||
with_local_disks: True
|
||||
with_conntrack_state: True
|
||||
balance_types: [ 'vm', 'ct' ]
|
||||
max_job_validation: 1800
|
||||
balanciness: 5
|
||||
method: memory
|
||||
mode: used
|
||||
service:
|
||||
daemon: True
|
||||
schedule:
|
||||
interval: 12
|
||||
format: "hours"
|
||||
delay:
|
||||
enable: False
|
||||
time: 1
|
||||
format: "hours"
|
||||
log_level: INFO
|
||||
@@ -1,6 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
VERSION="1.1.2b"
|
||||
VERSION="1.1.7"
|
||||
|
||||
# ProxLB
|
||||
sed -i "s/^__version__ = .*/__version__ = \"$VERSION\"/" "proxlb/utils/version.py"
|
||||
sed -i "s/version=\"[0-9]*\.[0-9]*\.[0-9]*\"/version=\"$VERSION\"/" setup.py
|
||||
|
||||
# Helm Chart
|
||||
sed -i "s/^version: .*/version: \"$VERSION\"/" helm/proxlb/Chart.yaml
|
||||
sed -i "s/^appVersion: .*/appVersion: \"v$VERSION\"/" helm/proxlb/Chart.yaml
|
||||
|
||||
echo "OK: Versions have been sucessfully set to $VERSION"
|
||||
|
||||
@@ -13,11 +13,11 @@ __license__ = "GPL-3.0"
|
||||
|
||||
|
||||
import logging
|
||||
import signal
|
||||
from utils.logger import SystemdLogger
|
||||
from utils.cli_parser import CliParser
|
||||
from utils.config_parser import ConfigParser
|
||||
from utils.proxmox_api import ProxmoxApi
|
||||
from models.dpm import DPM
|
||||
from models.nodes import Nodes
|
||||
from models.guests import Guests
|
||||
from models.groups import Groups
|
||||
@@ -33,6 +33,10 @@ def main():
|
||||
# Initialize logging handler
|
||||
logger = SystemdLogger(level=logging.INFO)
|
||||
|
||||
# Initialize handlers
|
||||
signal.signal(signal.SIGHUP, Helper.handler_sighup)
|
||||
signal.signal(signal.SIGINT, Helper.handler_sigint)
|
||||
|
||||
# Parses arguments passed from the CLI
|
||||
cli_parser = CliParser()
|
||||
cli_args = cli_parser.parse_args()
|
||||
@@ -45,6 +49,9 @@ def main():
|
||||
# Update log level from config and fallback to INFO if not defined
|
||||
logger.set_log_level(proxlb_config.get('service', {}).get('log_level', 'INFO'))
|
||||
|
||||
# Validate of an optional service delay
|
||||
Helper.get_service_delay(proxlb_config)
|
||||
|
||||
# Connect to Proxmox API & create API object
|
||||
proxmox_api = ProxmoxApi(proxlb_config)
|
||||
|
||||
@@ -52,19 +59,25 @@ def main():
|
||||
proxlb_config["proxmox_api"]["pass"] = "********"
|
||||
|
||||
while True:
|
||||
|
||||
# Validate if reload signal was sent during runtime
|
||||
# and reload the ProxLB configuration and adjust log level
|
||||
if Helper.proxlb_reload:
|
||||
logger.info("Reloading ProxLB configuration.")
|
||||
proxlb_config = config_parser.get_config()
|
||||
logger.set_log_level(proxlb_config.get('service', {}).get('log_level', 'INFO'))
|
||||
Helper.proxlb_reload = False
|
||||
|
||||
# Get all required objects from the Proxmox cluster
|
||||
meta = {"meta": proxlb_config}
|
||||
nodes, cluster = Nodes.get_nodes(proxmox_api, proxlb_config)
|
||||
nodes = Nodes.get_nodes(proxmox_api, proxlb_config)
|
||||
guests = Guests.get_guests(proxmox_api, nodes, meta)
|
||||
groups = Groups.get_groups(guests, nodes)
|
||||
|
||||
# Merge obtained objects from the Proxmox cluster for further usage
|
||||
proxlb_data = {**meta, **cluster, **nodes, **guests, **groups}
|
||||
proxlb_data = {**meta, **nodes, **guests, **groups}
|
||||
Helper.log_node_metrics(proxlb_data)
|
||||
|
||||
# Evaluate the dynamic power management for nodes in the clustet
|
||||
DPM(proxlb_data)
|
||||
|
||||
# Update the initial node resource assignments
|
||||
# by the previously created groups.
|
||||
Calculations.set_node_assignments(proxlb_data)
|
||||
@@ -74,13 +87,10 @@ def main():
|
||||
Calculations.relocate_guests(proxlb_data)
|
||||
Helper.log_node_metrics(proxlb_data, init=False)
|
||||
|
||||
# Perform balancing
|
||||
if not cli_args.dry_run or not proxlb_data["meta"]["balancing"].get("enable", False):
|
||||
Balancing(proxmox_api, proxlb_data)
|
||||
|
||||
# Perform DPM
|
||||
if not cli_args.dry_run:
|
||||
DPM.dpm_shutdown_nodes(proxmox_api, proxlb_data)
|
||||
# Perform balancing actions via Proxmox API
|
||||
if proxlb_data["meta"]["balancing"].get("enable", False):
|
||||
if not cli_args.dry_run:
|
||||
Balancing(proxmox_api, proxlb_data)
|
||||
|
||||
# Validate if the JSON output should be
|
||||
# printed to stdout
|
||||
|
||||
@@ -12,6 +12,7 @@ __license__ = "GPL-3.0"
|
||||
|
||||
import proxmoxer
|
||||
import time
|
||||
from itertools import islice
|
||||
from utils.logger import SystemdLogger
|
||||
from typing import Dict, Any
|
||||
|
||||
@@ -48,34 +49,81 @@ class Balancing:
|
||||
Initializes the Balancing class with the provided ProxLB data.
|
||||
|
||||
Args:
|
||||
proxlb_data (dict): The data required for balancing VMs and CTs.
|
||||
proxmox_api (object): The Proxmox API client instance used to interact with the Proxmox cluster.
|
||||
proxlb_data (dict): A dictionary containing data related to the ProxLB load balancing configuration.
|
||||
"""
|
||||
for guest_name, guest_meta in proxlb_data["guests"].items():
|
||||
def chunk_dict(data, size):
|
||||
"""
|
||||
Splits a dictionary into chunks of a specified size.
|
||||
Args:
|
||||
data (dict): The dictionary to be split into chunks.
|
||||
size (int): The size of each chunk.
|
||||
Yields:
|
||||
dict: A chunk of the original dictionary with the specified size.
|
||||
"""
|
||||
logger.debug("Starting: chunk_dict.")
|
||||
it = iter(data.items())
|
||||
for chunk in range(0, len(data), size):
|
||||
yield dict(islice(it, size))
|
||||
|
||||
# Check if the guest's target is not the same as the current node
|
||||
if guest_meta["node_current"] != guest_meta["node_target"]:
|
||||
# Check if the guest is not ignored and perform the balancing
|
||||
# operation based on the guest type
|
||||
if not guest_meta["ignore"]:
|
||||
guest_id = guest_meta["id"]
|
||||
guest_node_current = guest_meta["node_current"]
|
||||
guest_node_target = guest_meta["node_target"]
|
||||
# Validate if balancing should be performed in parallel or sequentially.
|
||||
# If parallel balancing is enabled, set the number of parallel jobs.
|
||||
parallel_jobs = proxlb_data["meta"]["balancing"].get("parallel_jobs", 5)
|
||||
if not proxlb_data["meta"]["balancing"].get("parallel", False):
|
||||
parallel_jobs = 1
|
||||
logger.debug("Balancing: Parallel balancing is disabled. Running sequentially.")
|
||||
else:
|
||||
logger.debug(f"Balancing: Parallel balancing is enabled. Running with {parallel_jobs} parallel jobs.")
|
||||
|
||||
# VM Balancing
|
||||
if guest_meta["type"] == "vm":
|
||||
self.exec_rebalancing_vm(proxmox_api, proxlb_data, guest_name)
|
||||
for chunk in chunk_dict(proxlb_data["guests"], parallel_jobs):
|
||||
jobs_to_wait = []
|
||||
|
||||
# CT Balancing
|
||||
elif guest_meta["type"] == "ct":
|
||||
self.exec_rebalancing_ct(proxmox_api, proxlb_data, guest_name)
|
||||
for guest_name, guest_meta in chunk.items():
|
||||
|
||||
# Check if the guest's target is not the same as the current node
|
||||
if guest_meta["node_current"] != guest_meta["node_target"]:
|
||||
|
||||
# Check if the guest is not ignored and perform the balancing
|
||||
# operation based on the guest type
|
||||
if not guest_meta["ignore"]:
|
||||
job_id = None
|
||||
|
||||
# VM Balancing
|
||||
if guest_meta["type"] == "vm":
|
||||
if 'vm' in proxlb_data["meta"]["balancing"].get("balance_types", []):
|
||||
logger.debug(f"Balancing: Balancing for guest {guest_name} of type VM started.")
|
||||
job_id = self.exec_rebalancing_vm(proxmox_api, proxlb_data, guest_name)
|
||||
else:
|
||||
logger.debug(
|
||||
f"Balancing: Balancing for guest {guest_name} will not be performed. "
|
||||
"Guest is of type VM which is not included in allowed balancing types.")
|
||||
|
||||
# CT Balancing
|
||||
elif guest_meta["type"] == "ct":
|
||||
if 'ct' in proxlb_data["meta"]["balancing"].get("balance_types", []):
|
||||
logger.debug(f"Balancing: Balancing for guest {guest_name} of type CT started.")
|
||||
job_id = self.exec_rebalancing_ct(proxmox_api, proxlb_data, guest_name)
|
||||
else:
|
||||
logger.debug(
|
||||
f"Balancing: Balancing for guest {guest_name} will not be performed. "
|
||||
"Guest is of type CT which is not included in allowed balancing types.")
|
||||
|
||||
# Just in case we get a new type of guest in the future
|
||||
else:
|
||||
logger.critical(f"Balancing: Got unexpected guest type: {guest_meta['type']}. Cannot proceed guest: {guest_meta['name']}.")
|
||||
|
||||
if job_id:
|
||||
jobs_to_wait.append((guest_name, guest_meta["node_current"], job_id))
|
||||
|
||||
# Just in case we get a new type of guest in the future
|
||||
else:
|
||||
logger.critical(f"Balancing: Got unexpected guest type: {guest_meta['type']}. Cannot proceed guest: {guest_meta['name']}.")
|
||||
logger.debug(f"Balancing: Guest {guest_name} is ignored and will not be rebalanced.")
|
||||
else:
|
||||
logger.debug(f"Balancing: Guest {guest_name} is ignored and will not be rebalanced.")
|
||||
else:
|
||||
logger.debug(f"Balancing: Guest {guest_name} is already on the target node {guest_meta['node_target']} and will not be rebalanced.")
|
||||
logger.debug(f"Balancing: Guest {guest_name} is already on the target node {guest_meta['node_target']} and will not be rebalanced.")
|
||||
|
||||
# Wait for all jobs in the current chunk to complete
|
||||
for guest_name, node, job_id in jobs_to_wait:
|
||||
if job_id:
|
||||
self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, node, job_id)
|
||||
|
||||
def exec_rebalancing_vm(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None:
|
||||
"""
|
||||
@@ -96,6 +144,7 @@ class Balancing:
|
||||
guest_id = proxlb_data["guests"][guest_name]["id"]
|
||||
guest_node_current = proxlb_data["guests"][guest_name]["node_current"]
|
||||
guest_node_target = proxlb_data["guests"][guest_name]["node_target"]
|
||||
job_id = None
|
||||
|
||||
if proxlb_data["meta"]["balancing"].get("live", True):
|
||||
online_migration = 1
|
||||
@@ -107,20 +156,27 @@ class Balancing:
|
||||
else:
|
||||
with_local_disks = 0
|
||||
|
||||
if proxlb_data["meta"]["balancing"].get("with_conntrack_state", True):
|
||||
with_conntrack_state = 1
|
||||
else:
|
||||
with_conntrack_state = 0
|
||||
|
||||
migration_options = {
|
||||
'target': {guest_node_target},
|
||||
'target': guest_node_target,
|
||||
'online': online_migration,
|
||||
'with-local-disks': with_local_disks
|
||||
'with-local-disks': with_local_disks,
|
||||
'with-conntrack-state': with_conntrack_state,
|
||||
}
|
||||
|
||||
try:
|
||||
logger.debug(f"Balancing: Starting to migrate guest {guest_name} of type VM.")
|
||||
logger.info(f"Balancing: Starting to migrate VM guest {guest_name} from {guest_node_current} to {guest_node_target}.")
|
||||
job_id = proxmox_api.nodes(guest_node_current).qemu(guest_id).migrate().post(**migration_options)
|
||||
self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, guest_node_current, job_id)
|
||||
except proxmoxer.core.ResourceException as proxmox_api_error:
|
||||
logger.critical(f"Balancing: Failed to migrate guest {guest_name} of type VM due to some Proxmox errors. Please check if resource is locked or similar.")
|
||||
logger.debug(f"Balancing: Failed to migrate guest {guest_name} of type VM due to some Proxmox errors: {proxmox_api_error}")
|
||||
|
||||
logger.debug("Finished: exec_rebalancing_vm.")
|
||||
return job_id
|
||||
|
||||
def exec_rebalancing_ct(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str) -> None:
|
||||
"""
|
||||
@@ -141,15 +197,17 @@ class Balancing:
|
||||
guest_id = proxlb_data["guests"][guest_name]["id"]
|
||||
guest_node_current = proxlb_data["guests"][guest_name]["node_current"]
|
||||
guest_node_target = proxlb_data["guests"][guest_name]["node_target"]
|
||||
job_id = None
|
||||
|
||||
try:
|
||||
logger.debug(f"Balancing: Starting to migrate guest {guest_name} of type CT.")
|
||||
logger.info(f"Balancing: Starting to migrate CT guest {guest_name} from {guest_node_current} to {guest_node_target}.")
|
||||
job_id = proxmox_api.nodes(guest_node_current).lxc(guest_id).migrate().post(target=guest_node_target, restart=1)
|
||||
self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, guest_node_current, job_id)
|
||||
except proxmoxer.core.ResourceException as proxmox_api_error:
|
||||
logger.critical(f"Balancing: Failed to migrate guest {guest_name} of type CT due to some Proxmox errors. Please check if resource is locked or similar.")
|
||||
logger.debug(f"Balancing: Failed to migrate guest {guest_name} of type CT due to some Proxmox errors: {proxmox_api_error}")
|
||||
|
||||
logger.debug("Finished: exec_rebalancing_ct.")
|
||||
return job_id
|
||||
|
||||
def get_rebalancing_job_status(self, proxmox_api: any, proxlb_data: Dict[str, Any], guest_name: str, guest_current_node: str, job_id: int, retry_counter: int = 1) -> bool:
|
||||
"""
|
||||
@@ -167,35 +225,32 @@ class Balancing:
|
||||
bool: True if the job completed successfully, False otherwise.
|
||||
"""
|
||||
logger.debug("Starting: get_rebalancing_job_status.")
|
||||
# Parallel migrations can take a huge time and create a higher load, if not defined by an
|
||||
# operator we will use a sequential mode by default
|
||||
if not proxlb_data["meta"]["balancing"].get("parallel", False):
|
||||
job = proxmox_api.nodes(guest_current_node).tasks(job_id).status().get()
|
||||
job = proxmox_api.nodes(guest_current_node).tasks(job_id).status().get()
|
||||
|
||||
# Watch job id until it finalizes
|
||||
if job["status"] == "running":
|
||||
# Do not hammer the API while
|
||||
# watching the job status
|
||||
time.sleep(10)
|
||||
retry_counter += 1
|
||||
# Watch job id until it finalizes
|
||||
if job["status"] == "running":
|
||||
# Do not hammer the API while
|
||||
# watching the job status
|
||||
time.sleep(10)
|
||||
retry_counter += 1
|
||||
|
||||
# Run recursion until we hit the soft-limit of maximum migration time for a guest
|
||||
if retry_counter < proxlb_data["meta"]["balancing"].get("max_job_validation", 1800):
|
||||
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) for migration is still running... (Run: {retry_counter})")
|
||||
self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, guest_current_node, job_id, retry_counter)
|
||||
else:
|
||||
logger.warning(f"Balancing: Job ID {job_id} (guest: {guest_name}) for migration took too long. Please check manually.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return False
|
||||
# Run recursion until we hit the soft-limit of maximum migration time for a guest
|
||||
if retry_counter < proxlb_data["meta"]["balancing"].get("max_job_validation", 1800):
|
||||
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) for migration is still running... (Run: {retry_counter})")
|
||||
self.get_rebalancing_job_status(proxmox_api, proxlb_data, guest_name, guest_current_node, job_id, retry_counter)
|
||||
else:
|
||||
logger.warning(f"Balancing: Job ID {job_id} (guest: {guest_name}) for migration took too long. Please check manually.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return False
|
||||
|
||||
# Validate job output for errors when finished
|
||||
if job["status"] == "stopped":
|
||||
# Validate job output for errors when finished
|
||||
if job["status"] == "stopped":
|
||||
|
||||
if job["exitstatus"] == "OK":
|
||||
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) was successfully.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return True
|
||||
else:
|
||||
logger.critical(f"Balancing: Job ID {job_id} (guest: {guest_name}) went into an error! Please check manually.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return False
|
||||
if job["exitstatus"] == "OK":
|
||||
logger.debug(f"Balancing: Job ID {job_id} (guest: {guest_name}) was successfully.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return True
|
||||
else:
|
||||
logger.critical(f"Balancing: Job ID {job_id} (guest: {guest_name}) went into an error! Please check manually.")
|
||||
logger.debug("Finished: get_rebalancing_job_status.")
|
||||
return False
|
||||
|
||||
@@ -82,6 +82,7 @@ class Calculations:
|
||||
guest_node_current = proxlb_data["guests"][guest_name]["node_current"]
|
||||
# Update Hardware assignments
|
||||
# Update assigned values for the current node
|
||||
logger.debug(f"set_node_assignment of guest {guest_name} on node {guest_node_current} with cpu_total: {proxlb_data['guests'][guest_name]['cpu_total']}, memory_total: {proxlb_data['guests'][guest_name]['memory_total']}, disk_total: {proxlb_data['guests'][guest_name]['disk_total']}.")
|
||||
proxlb_data["nodes"][guest_node_current]["cpu_assigned"] += proxlb_data["guests"][guest_name]["cpu_total"]
|
||||
proxlb_data["nodes"][guest_node_current]["memory_assigned"] += proxlb_data["guests"][guest_name]["memory_total"]
|
||||
proxlb_data["nodes"][guest_node_current]["disk_assigned"] += proxlb_data["guests"][guest_name]["disk_total"]
|
||||
@@ -128,7 +129,7 @@ class Calculations:
|
||||
logger.debug("Finished: get_balanciness.")
|
||||
|
||||
@staticmethod
|
||||
def get_most_free_node(proxlb_data: Dict[str, Any], return_node: bool = False) -> Dict[str, Any]:
|
||||
def get_most_free_node(proxlb_data: Dict[str, Any], return_node: bool = False, guest_node_relation_list: list = []) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the name of the Proxmox node in the cluster with the most free resources based on
|
||||
the user defined method (e.g.: memory) and mode (e.g.: used).
|
||||
@@ -137,6 +138,8 @@ class Calculations:
|
||||
proxlb_data (Dict[str, Any]): The data holding all content of all objects.
|
||||
return_node (bool): The indicator to simply return the best node for further
|
||||
assignments.
|
||||
guest_node_relation_list (list): A list of nodes that have a tag on the given
|
||||
guest relationship for pinning.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Updated meta data section of the node with the most free resources that should
|
||||
@@ -145,8 +148,15 @@ class Calculations:
|
||||
logger.debug("Starting: get_most_free_node.")
|
||||
proxlb_data["meta"]["balancing"]["balance_next_node"] = ""
|
||||
|
||||
# Do not include nodes that are marked in 'maintenance'
|
||||
# Filter and exclude nodes that are in maintenance mode
|
||||
filtered_nodes = [node for node in proxlb_data["nodes"].values() if not node["maintenance"]]
|
||||
|
||||
# Filter and include nodes that given by a relationship between guest and node. This is only
|
||||
# used if the guest has a relationship to a node defined by "pin" tags.
|
||||
if len(guest_node_relation_list) > 0:
|
||||
filtered_nodes = [node for node in proxlb_data["nodes"].values() if node["name"] in guest_node_relation_list]
|
||||
|
||||
# Filter by the defined methods and modes for balancing
|
||||
method = proxlb_data["meta"]["balancing"].get("method", "memory")
|
||||
mode = proxlb_data["meta"]["balancing"].get("mode", "used")
|
||||
lowest_usage_node = min(filtered_nodes, key=lambda x: x[f"{method}_{mode}_percent"])
|
||||
@@ -162,7 +172,7 @@ class Calculations:
|
||||
logger.debug("Finished: get_most_free_node.")
|
||||
|
||||
@staticmethod
|
||||
def relocate_guests_on_maintenance_nodes(proxlb_data: Dict[str, Any]) -> None:
|
||||
def relocate_guests_on_maintenance_nodes(proxlb_data: Dict[str, Any]):
|
||||
"""
|
||||
Relocates guests that are currently on nodes marked for maintenance to
|
||||
nodes with the most available resources.
|
||||
@@ -192,7 +202,7 @@ class Calculations:
|
||||
logger.debug("Finished: get_most_free_node.")
|
||||
|
||||
@staticmethod
|
||||
def relocate_guests(proxlb_data: Dict[str, Any]) -> None:
|
||||
def relocate_guests(proxlb_data: Dict[str, Any]):
|
||||
"""
|
||||
Relocates guests within the provided data structure to ensure affinity groups are
|
||||
placed on nodes with the most free resources.
|
||||
@@ -225,13 +235,13 @@ class Calculations:
|
||||
for guest_name in proxlb_data["groups"]["affinity"][group_name]["guests"]:
|
||||
proxlb_data["meta"]["balancing"]["balance_next_guest"] = guest_name
|
||||
Calculations.val_anti_affinity(proxlb_data, guest_name)
|
||||
Calculations.val_node_relationship(proxlb_data, guest_name)
|
||||
Calculations.val_node_relationships(proxlb_data, guest_name)
|
||||
Calculations.update_node_resources(proxlb_data)
|
||||
|
||||
logger.debug("Finished: relocate_guests.")
|
||||
|
||||
@staticmethod
|
||||
def val_anti_affinity(proxlb_data: Dict[str, Any], guest_name: str) -> None:
|
||||
def val_anti_affinity(proxlb_data: Dict[str, Any], guest_name: str):
|
||||
"""
|
||||
Validates and assigns nodes to guests based on anti-affinity rules.
|
||||
|
||||
@@ -256,23 +266,28 @@ class Calculations:
|
||||
if guest_name in proxlb_data["groups"]["anti_affinity"][group_name]['guests'] and not proxlb_data["guests"][guest_name]["processed"]:
|
||||
logger.debug(f"Anti-Affinity: Guest: {guest_name} is included in anti-affinity group: {group_name}.")
|
||||
|
||||
# Iterate over all available nodes
|
||||
for node_name in proxlb_data["nodes"].keys():
|
||||
# Check if the group has only one member. If so skip new guest node assignment.
|
||||
if proxlb_data["groups"]["anti_affinity"][group_name]["counter"] > 1:
|
||||
logger.debug(f"Anti-Affinity: Group has more than 1 member.")
|
||||
# Iterate over all available nodes
|
||||
for node_name in proxlb_data["nodes"].keys():
|
||||
|
||||
# Only select node if it was not used before and is not in a
|
||||
# maintenance mode. Afterwards, add it to the list of already
|
||||
# used nodes for the current anti-affinity group
|
||||
if node_name not in proxlb_data["groups"]["anti_affinity"][group_name]["used_nodes"]:
|
||||
# Only select node if it was not used before and is not in a
|
||||
# maintenance mode. Afterwards, add it to the list of already
|
||||
# used nodes for the current anti-affinity group
|
||||
if node_name not in proxlb_data["groups"]["anti_affinity"][group_name]["used_nodes"]:
|
||||
|
||||
if not proxlb_data["nodes"][node_name]["maintenance"]:
|
||||
# If the node has not been used yet, we assign this node to the guest
|
||||
proxlb_data["meta"]["balancing"]["balance_next_node"] = node_name
|
||||
proxlb_data["groups"]["anti_affinity"][group_name]["used_nodes"].append(node_name)
|
||||
logger.debug(f"Node: {node_name} marked as used for anti-affinity group: {group_name} with guest {guest_name}")
|
||||
break
|
||||
if not proxlb_data["nodes"][node_name]["maintenance"]:
|
||||
# If the node has not been used yet, we assign this node to the guest
|
||||
proxlb_data["meta"]["balancing"]["balance_next_node"] = node_name
|
||||
proxlb_data["groups"]["anti_affinity"][group_name]["used_nodes"].append(node_name)
|
||||
logger.debug(f"Node: {node_name} marked as used for anti-affinity group: {group_name} with guest {guest_name}")
|
||||
break
|
||||
|
||||
else:
|
||||
logger.critical(f"Node: {node_name} already got used for anti-affinity group:: {group_name}. (Tried for guest: {guest_name})")
|
||||
else:
|
||||
logger.critical(f"Node: {node_name} already got used for anti-affinity group:: {group_name}. (Tried for guest: {guest_name})")
|
||||
else:
|
||||
logger.debug(f"Anti-Affinity: Group has less than 2 members. Skipping node calculation for the group.")
|
||||
|
||||
else:
|
||||
logger.debug(f"Guest: {guest_name} is not included in anti-affinity group: {group_name}. Skipping.")
|
||||
@@ -280,7 +295,7 @@ class Calculations:
|
||||
logger.debug("Finished: val_anti_affinity.")
|
||||
|
||||
@staticmethod
|
||||
def val_node_relationship(proxlb_data: Dict[str, Any], guest_name: str) -> None:
|
||||
def val_node_relationships(proxlb_data: Dict[str, Any], guest_name: str):
|
||||
"""
|
||||
Validates and assigns guests to nodes based on defined relationships based on tags.
|
||||
|
||||
@@ -291,27 +306,29 @@ class Calculations:
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Starting: val_node_relationship.")
|
||||
logger.debug("Starting: val_node_relationships.")
|
||||
proxlb_data["guests"][guest_name]["processed"] = True
|
||||
|
||||
if proxlb_data["guests"][guest_name]["node_relationship"]:
|
||||
logger.info(f"Guest '{guest_name}' has a specific relationship defined to node: {proxlb_data['guests'][guest_name]['node_relationship']}. Pinning to node.")
|
||||
if len(proxlb_data["guests"][guest_name]["node_relationships"]) > 0:
|
||||
logger.debug(f"Guest '{guest_name}' has relationships defined to node(s): {','.join(proxlb_data['guests'][guest_name]['node_relationships'])}. Pinning to node.")
|
||||
|
||||
# Get the node with the most free resources of the group
|
||||
guest_node_relation_list = proxlb_data["guests"][guest_name]["node_relationships"]
|
||||
Calculations.get_most_free_node(proxlb_data, False, guest_node_relation_list)
|
||||
|
||||
# Validate if the specified node name is really part of the cluster
|
||||
if proxlb_data['guests'][guest_name]['node_relationship'] in proxlb_data["nodes"].keys():
|
||||
logger.info(f"Guest '{guest_name}' has a specific relationship defined to node: {proxlb_data['guests'][guest_name]['node_relationship']} is a known hypervisor node in the cluster.")
|
||||
# Pin the guest to the specified hypervisor node.
|
||||
proxlb_data["meta"]["balancing"]["balance_next_node"] = proxlb_data['guests'][guest_name]['node_relationship']
|
||||
if proxlb_data["meta"]["balancing"]["balance_next_node"] in proxlb_data["nodes"].keys():
|
||||
logger.debug(f"Guest '{guest_name}' has a specific relationship defined to node: {proxlb_data['meta']['balancing']['balance_next_node']} is a known hypervisor node in the cluster.")
|
||||
else:
|
||||
logger.warning(f"Guest '{guest_name}' has a specific relationship defined to node: {proxlb_data['guests'][guest_name]['node_relationship']} but this node name is not known in the cluster!")
|
||||
logger.warning(f"Guest '{guest_name}' has a specific relationship defined to node: {proxlb_data['meta']['balancing']['balance_next_node']} but this node name is not known in the cluster!")
|
||||
|
||||
else:
|
||||
logger.info(f"Guest '{guest_name}' does not have any specific node relationships.")
|
||||
logger.debug(f"Guest '{guest_name}' does not have any specific node relationships.")
|
||||
|
||||
logger.debug("Finished: val_node_relationship.")
|
||||
logger.debug("Finished: val_node_relationships.")
|
||||
|
||||
@staticmethod
|
||||
def update_node_resources(proxlb_data: Dict[str, Any]) -> None:
|
||||
def update_node_resources(proxlb_data):
|
||||
"""
|
||||
Updates the resource allocation and usage statistics for nodes when a guest
|
||||
is moved from one node to another.
|
||||
@@ -375,68 +392,3 @@ class Calculations:
|
||||
logger.debug(f"Set guest {guest_name} from node {node_current} to node {node_target}.")
|
||||
|
||||
logger.debug("Finished: update_node_resources.")
|
||||
|
||||
@staticmethod
|
||||
def update_cluster_resources(proxlb_data: Dict[str, Any], node: str, action: str) -> None:
|
||||
"""
|
||||
Updates the cluster resource statistics based on the specified action and node.
|
||||
|
||||
This method modifies the cluster-level resource data (such as CPU, memory, disk usage,
|
||||
and node counts) based on the action performed ('add' or 'remove') for the specified node.
|
||||
It calculates the updated statistics after adding or removing a node and logs the results.
|
||||
|
||||
Parameters:
|
||||
proxlb_data (Dict[str, Any]): The data representing the current state of the cluster,
|
||||
including node-level statistics for CPU, memory, and disk.
|
||||
node (str): The identifier of the node whose resources are being added or removed from the cluster.
|
||||
action (str): The action to perform, either 'add' or 'remove'. 'add' will include the node's
|
||||
resources in the cluster, while 'remove' will exclude the node's resources.
|
||||
|
||||
Returns:
|
||||
None: The function modifies the `proxlb_data` dictionary in place to update the cluster resources.
|
||||
"""
|
||||
logger.debug("Starting: update_cluster_resources.")
|
||||
logger.debug(f"DPM: Updating cluster statistics by online node {node}. Action: {action}")
|
||||
logger.debug(f"DPM: update_cluster_resources - Before {action}: {proxlb_data['cluster']['memory_free_percent']}")
|
||||
|
||||
if action == "add":
|
||||
proxlb_data["cluster"]["node_count"] = proxlb_data["cluster"].get("node_count", 0) + 1
|
||||
proxlb_data["cluster"]["cpu_total"] = proxlb_data["cluster"].get("cpu_total", 0) + proxlb_data["nodes"][node]["cpu_total"]
|
||||
proxlb_data["cluster"]["cpu_used"] = proxlb_data["cluster"].get("cpu_used", 0) + proxlb_data["nodes"][node]["cpu_used"]
|
||||
proxlb_data["cluster"]["cpu_free"] = proxlb_data["cluster"].get("cpu_free", 0) + proxlb_data["nodes"][node]["cpu_free"]
|
||||
proxlb_data["cluster"]["cpu_free_percent"] = proxlb_data["cluster"].get("cpu_free", 0) / proxlb_data["cluster"].get("cpu_total", 0) * 100
|
||||
proxlb_data["cluster"]["cpu_used_percent"] = proxlb_data["cluster"].get("cpu_used", 0) / proxlb_data["cluster"].get("cpu_total", 0) * 100
|
||||
proxlb_data["cluster"]["memory_total"] = proxlb_data["cluster"].get("memory_total", 0) + proxlb_data["nodes"][node]["memory_total"]
|
||||
proxlb_data["cluster"]["memory_used"] = proxlb_data["cluster"].get("memory_used", 0) + proxlb_data["nodes"][node]["memory_used"]
|
||||
proxlb_data["cluster"]["memory_free"] = proxlb_data["cluster"].get("memory_free", 0) + proxlb_data["nodes"][node]["memory_free"]
|
||||
proxlb_data["cluster"]["memory_free_percent"] = proxlb_data["cluster"].get("memory_free", 0) / proxlb_data["cluster"].get("memory_total", 0) * 100
|
||||
proxlb_data["cluster"]["memory_used_percent"] = proxlb_data["cluster"].get("memory_used", 0) / proxlb_data["cluster"].get("memory_total", 0) * 100
|
||||
proxlb_data["cluster"]["disk_total"] = proxlb_data["cluster"].get("disk_total", 0) + proxlb_data["nodes"][node]["disk_total"]
|
||||
proxlb_data["cluster"]["disk_used"] = proxlb_data["cluster"].get("disk_used", 0) + proxlb_data["nodes"][node]["disk_used"]
|
||||
proxlb_data["cluster"]["disk_free"] = proxlb_data["cluster"].get("disk_free", 0) + proxlb_data["nodes"][node]["disk_free"]
|
||||
proxlb_data["cluster"]["disk_free_percent"] = proxlb_data["cluster"].get("disk_free", 0) / proxlb_data["cluster"].get("disk_total", 0) * 100
|
||||
proxlb_data["cluster"]["disk_used_percent"] = proxlb_data["cluster"].get("disk_used", 0) / proxlb_data["cluster"].get("disk_total", 0) * 100
|
||||
proxlb_data["cluster"]["node_count_available"] = proxlb_data["cluster"].get("node_count_available", 0) + 1
|
||||
proxlb_data["cluster"]["node_count_overall"] = proxlb_data["cluster"].get("node_count_overall", 0) + 1
|
||||
|
||||
if action == "remove":
|
||||
proxlb_data["cluster"]["node_count"] = proxlb_data["cluster"].get("node_count", 0) - 1
|
||||
proxlb_data["cluster"]["cpu_total"] = proxlb_data["cluster"].get("cpu_total", 0) - proxlb_data["nodes"][node]["cpu_total"]
|
||||
proxlb_data["cluster"]["cpu_used"] = proxlb_data["cluster"].get("cpu_used", 0) - proxlb_data["nodes"][node]["cpu_used"]
|
||||
proxlb_data["cluster"]["cpu_free"] = proxlb_data["cluster"].get("cpu_free", 0) - proxlb_data["nodes"][node]["cpu_free"]
|
||||
proxlb_data["cluster"]["cpu_free_percent"] = proxlb_data["cluster"].get("cpu_free", 0) / proxlb_data["cluster"].get("cpu_total", 0) * 100
|
||||
proxlb_data["cluster"]["cpu_used_percent"] = proxlb_data["cluster"].get("cpu_used", 0) / proxlb_data["cluster"].get("cpu_total", 0) * 100
|
||||
proxlb_data["cluster"]["memory_total"] = proxlb_data["cluster"].get("memory_total", 0) - proxlb_data["nodes"][node]["memory_total"]
|
||||
proxlb_data["cluster"]["memory_used"] = proxlb_data["cluster"].get("memory_used") - proxlb_data["nodes"][node]["memory_used"]
|
||||
proxlb_data["cluster"]["memory_free"] = proxlb_data["cluster"].get("memory_free") - proxlb_data["nodes"][node]["memory_free"]
|
||||
proxlb_data["cluster"]["memory_free_percent"] = proxlb_data["cluster"].get("memory_free") / proxlb_data["cluster"].get("memory_total", 0) * 100
|
||||
proxlb_data["cluster"]["memory_used_percent"] = proxlb_data["cluster"].get("memory_used") / proxlb_data["cluster"].get("memory_total", 0) * 100
|
||||
proxlb_data["cluster"]["disk_total"] = proxlb_data["cluster"].get("disk_total", 0) - proxlb_data["nodes"][node]["disk_total"]
|
||||
proxlb_data["cluster"]["disk_used"] = proxlb_data["cluster"].get("disk_used", 0) - proxlb_data["nodes"][node]["disk_used"]
|
||||
proxlb_data["cluster"]["disk_free"] = proxlb_data["cluster"].get("disk_free", 0) - proxlb_data["nodes"][node]["disk_free"]
|
||||
proxlb_data["cluster"]["disk_free_percent"] = proxlb_data["cluster"].get("disk_free", 0) / proxlb_data["cluster"].get("disk_total", 0) * 100
|
||||
proxlb_data["cluster"]["disk_used_percent"] = proxlb_data["cluster"].get("disk_used", 0) / proxlb_data["cluster"].get("disk_total", 0) * 100
|
||||
proxlb_data["cluster"]["node_count_available"] = proxlb_data["cluster"].get("node_count_available", 0) - 1
|
||||
|
||||
logger.debug(f"DPM: update_cluster_resources - After {action}: {proxlb_data['cluster']['memory_free_percent']}")
|
||||
logger.debug("Finished: update_cluster_resources.")
|
||||
|
||||
@@ -1,255 +0,0 @@
|
||||
"""
|
||||
The DPM (Dynamic Power Management) class is responsible for the dynamic management
|
||||
of nodes within a Proxmox cluster, optimizing resource utilization by controlling
|
||||
node power states based on specified schedules and conditions.
|
||||
|
||||
This class provides functionality for:
|
||||
- Tracking and validating schedules for dynamic power management.
|
||||
- Shutting down nodes that are underutilized or not needed.
|
||||
- Starting up nodes using Wake-on-LAN (WOL) based on certain conditions.
|
||||
- Ensuring that nodes are properly flagged for maintenance and startup/shutdown actions.
|
||||
|
||||
The DPM class can operate in different modes, such as static and automatic,
|
||||
to either perform predefined actions or dynamically adjust based on real-time resource usage.
|
||||
"""
|
||||
|
||||
__author__ = "Florian Paul Azim Hoberg <gyptazy>"
|
||||
__copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)"
|
||||
__license__ = "GPL-3.0"
|
||||
|
||||
|
||||
import proxmoxer
|
||||
from typing import Dict, Any
|
||||
from models.calculations import Calculations
|
||||
from utils.logger import SystemdLogger
|
||||
|
||||
logger = SystemdLogger()
|
||||
|
||||
|
||||
class DPM:
|
||||
"""
|
||||
The DPM (Dynamic Power Management) class is responsible for the dynamic management
|
||||
of nodes within a Proxmox cluster, optimizing resource utilization by controlling
|
||||
node power states based on specified schedules and conditions.
|
||||
|
||||
This class provides functionality for:
|
||||
- Tracking and validating schedules for dynamic power management.
|
||||
- Shutting down nodes that are underutilized or not needed.
|
||||
- Starting up nodes using Wake-on-LAN (WOL) based on certain conditions.
|
||||
- Ensuring that nodes are properly flagged for maintenance and startup/shutdown actions.
|
||||
|
||||
The DPM class can operate in different modes, such as static and automatic,
|
||||
to either perform predefined actions or dynamically adjust based on real-time resource usage.
|
||||
|
||||
Attributes:
|
||||
None directly defined for the class; instead, all actions are based on input data
|
||||
and interactions with the Proxmox API and other helper functions.
|
||||
|
||||
Methods:
|
||||
__init__(proxlb_data: Dict[str, Any]):
|
||||
Initializes the DPM class, checking whether DPM is enabled and operating in the
|
||||
appropriate mode (static or auto).
|
||||
|
||||
dpm_static(proxlb_data: Dict[str, Any]) -> None:
|
||||
Evaluates the cluster's resource availability and performs static power management
|
||||
actions by removing nodes that are not required.
|
||||
|
||||
dpm_shutdown_nodes(proxmox_api, proxlb_data) -> None:
|
||||
Shuts down nodes flagged for DPM shutdown by using the Proxmox API, ensuring
|
||||
that Wake-on-LAN (WOL) is available for proper node recovery.
|
||||
|
||||
dpm_startup_nodes(proxmox_api, proxlb_data) -> None:
|
||||
Powers on nodes that are flagged for startup and are not in maintenance mode,
|
||||
leveraging Wake-on-LAN (WOL) functionality.
|
||||
|
||||
dpm_validate_wol_mac(proxmox_api, node) -> None:
|
||||
Validates and retrieves the Wake-on-LAN (WOL) MAC address for a given node,
|
||||
ensuring that a valid address is set for powering on the node remotely.
|
||||
"""
|
||||
|
||||
def __init__(self, proxlb_data: Dict[str, Any]):
|
||||
"""
|
||||
Initializes the DPM class with the provided ProxLB data.
|
||||
|
||||
Args:
|
||||
proxlb_data (dict): The data required for balancing VMs and CTs.
|
||||
"""
|
||||
logger.debug("Starting: dpm class.")
|
||||
|
||||
if proxlb_data["meta"].get("dpm", {}).get("enable", False):
|
||||
logger.debug("DPM function is enabled.")
|
||||
mode = proxlb_data["meta"].get("dpm", {}).get("mode", None)
|
||||
|
||||
if mode == "static":
|
||||
self.dpm_static(proxlb_data)
|
||||
|
||||
if mode == "auto":
|
||||
self.dpm_auto(proxlb_data)
|
||||
|
||||
else:
|
||||
logger.debug("DPM function is not enabled.")
|
||||
|
||||
logger.debug("Finished: dpm class.")
|
||||
|
||||
def dpm_static(self, proxlb_data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Evaluates and performs static Distributed Power Management (DPM) actions based on current cluster state.
|
||||
|
||||
This method monitors cluster resource availability and attempts to reduce the number of active nodes
|
||||
when sufficient free resources are available. It ensures a minimum number of nodes remains active
|
||||
and prioritizes shutting down nodes with the least utilized resources to minimize impact. Nodes selected
|
||||
for shutdown are marked for maintenance and flagged for DPM shutdown.
|
||||
|
||||
Parameters:
|
||||
proxlb_data (Dict[str, Any]): A dictionary containing metadata, cluster status, and node-level information
|
||||
including resource utilization, configuration settings, and DPM thresholds.
|
||||
|
||||
Returns:
|
||||
None: Modifies the input dictionary in-place to reflect updated cluster state and node flags.
|
||||
"""
|
||||
logger.debug("Starting: dpm_static.")
|
||||
|
||||
method = proxlb_data["meta"].get("dpm", {}).get("method", "memory")
|
||||
cluster_nodes_overall = proxlb_data["cluster"]["node_count_overall"]
|
||||
cluster_nodes_available = proxlb_data["cluster"]["node_count_available"]
|
||||
cluster_free_resources_percent = int(proxlb_data["cluster"][f"{method}_free_percent"])
|
||||
cluster_free_resources_req_min = proxlb_data["meta"].get("dpm", {}).get("cluster_min_free_resources", 0)
|
||||
cluster_mind_nodes = proxlb_data["meta"].get("dpm", {}).get("cluster_min_nodes", 3)
|
||||
logger.debug(f"DPM: Cluster Nodes: {cluster_nodes_overall} | Nodes available: {cluster_nodes_available} | Nodes offline: {cluster_nodes_overall - cluster_nodes_available}")
|
||||
|
||||
# Only proceed removing nodes if the cluster has enough resources
|
||||
while cluster_free_resources_percent > cluster_free_resources_req_min:
|
||||
logger.debug(f"DPM: More free resources {cluster_free_resources_percent}% available than required: {cluster_free_resources_req_min}%. DPM evaluation starting...")
|
||||
|
||||
# Ensure that we have at least a defined minimum of nodes left
|
||||
if cluster_nodes_available > cluster_mind_nodes:
|
||||
logger.debug(f"DPM: A minimum of {cluster_mind_nodes} nodes is required. {cluster_nodes_available} are available. Proceeding...")
|
||||
|
||||
# Get the node with the fewest used resources to keep migrations low
|
||||
Calculations.get_most_free_node(proxlb_data, False)
|
||||
dpm_node = proxlb_data["meta"]["balancing"]["balance_next_node"]
|
||||
|
||||
# Perform cluster calculation for evaluating how many nodes can safely leave
|
||||
# the cluster. Further object calculations are being processed afterwards by
|
||||
# the calculation class
|
||||
logger.debug(f"DPM: Removing node {dpm_node} from cluster. Node will be turned off later.")
|
||||
Calculations.update_cluster_resources(proxlb_data, dpm_node, "remove")
|
||||
cluster_free_resources_percent = int(proxlb_data["cluster"][f"{method}_free_percent"])
|
||||
logger.debug(f"DPM: Free cluster resources changed to: {int(proxlb_data['cluster'][f'{method}_free_percent'])}%.")
|
||||
|
||||
# Set node to maintenance and DPM shutdown
|
||||
proxlb_data["nodes"][dpm_node]["maintenance"] = True
|
||||
proxlb_data["nodes"][dpm_node]["dpm_shutdown"] = True
|
||||
else:
|
||||
logger.warning(f"DPM: A minimum of {cluster_mind_nodes} nodes is required. {cluster_nodes_available} are available. Cannot proceed!")
|
||||
|
||||
logger.debug(f"DPM: Not enough free resources {cluster_free_resources_percent}% available than required: {cluster_free_resources_req_min}%. DPM evaluation stopped.")
|
||||
logger.debug("Finished: dpm_static.")
|
||||
return proxlb_data
|
||||
|
||||
@staticmethod
|
||||
def dpm_shutdown_nodes(proxmox_api, proxlb_data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Shuts down cluster nodes that are marked for maintenance and flagged for DPM shutdown.
|
||||
|
||||
This method iterates through the cluster nodes in the provided data and attempts to
|
||||
power off any node that has both the 'maintenance' and 'dpm_shutdown' flags set.
|
||||
It communicates with the Proxmox API to issue shutdown commands and logs any failures.
|
||||
|
||||
Parameters:
|
||||
proxmox_api: An instance of the Proxmox API client used to issue node shutdown commands.
|
||||
proxlb_data: A dictionary containing node status information, including flags for
|
||||
maintenance and DPM shutdown readiness.
|
||||
|
||||
Returns:
|
||||
None: Performs shutdown operations and logs outcomes; modifies no data directly.
|
||||
"""
|
||||
logger.debug("Starting: dpm_shutdown_nodes.")
|
||||
for node, node_info in proxlb_data["nodes"].items():
|
||||
|
||||
if node_info["maintenance"] and node_info["dpm_shutdown"]:
|
||||
logger.debug(f"DPM: Node: {node} is flagged as maintenance mode and to be powered off.")
|
||||
|
||||
# Ensure that the node has a valid WOL MAC defined. If not
|
||||
# we would be unable to power on that system again
|
||||
valid_wol_mac = DPM.dpm_validate_wol_mac(proxmox_api, node)
|
||||
|
||||
if valid_wol_mac:
|
||||
try:
|
||||
logger.debug(f"DPM: Shutting down node: {node}.")
|
||||
job_id = proxmox_api.nodes(node).status.post(command="shutdown")
|
||||
except proxmoxer.core.ResourceException as proxmox_api_error:
|
||||
logger.critical(f"DPM: Error while powering off node {node}. Please check job-id: {job_id}")
|
||||
logger.debug(f"DPM: Error while powering off node {node}. Please check job-id: {job_id}")
|
||||
else:
|
||||
logger.critical(f"DPM: Node {node} cannot be powered off due to missing WOL MAC. Please define a valid WOL MAC for this node.")
|
||||
|
||||
logger.debug("Finished: dpm_shutdown_nodes.")
|
||||
|
||||
@staticmethod
|
||||
def dpm_startup_nodes(proxmox_api, proxlb_data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Starts uo cluster nodes that are marked for DPM start up.
|
||||
|
||||
This method iterates through the cluster nodes in the provided data and attempts to
|
||||
power on any node that is not flagged as 'maintenance' but flagged as 'dpm_startup'.
|
||||
It communicates with the Proxmox API to issue poweron commands and logs any failures.
|
||||
|
||||
Parameters:
|
||||
proxmox_api: An instance of the Proxmox API client used to issue node startup commands.
|
||||
proxlb_data: A dictionary containing node status information, including flags for
|
||||
maintenance and DPM shutdown readiness.
|
||||
|
||||
Returns:
|
||||
None: Performs poweron operations and logs outcomes; modifies no data directly.
|
||||
"""
|
||||
logger.debug("Starting: dpm_startup_nodes.")
|
||||
for node, node_info in proxlb_data["nodes"].items():
|
||||
|
||||
if not node_info["maintenance"]:
|
||||
logger.debug(f"DPM: Node: {node} is not in maintenance mode.")
|
||||
|
||||
if node_info["dpm_startup"]:
|
||||
logger.debug(f"DPM: Node: {node} is flagged as to be started.")
|
||||
|
||||
try:
|
||||
logger.debug(f"DPM: Powering on node: {node}.")
|
||||
# Important: This requires Proxmox Operators to define the
|
||||
# WOL address for each node within the Proxmox webinterface
|
||||
job_id = proxmox_api.nodes().wakeonlan.post(node=node)
|
||||
except proxmoxer.core.ResourceException as proxmox_api_error:
|
||||
logger.critical(f"DPM: Error while powering on node {node}. Please check job-id: {job_id}")
|
||||
logger.debug(f"DPM: Error while powering on node {node}. Please check job-id: {job_id}")
|
||||
|
||||
logger.debug("Finished: dpm_startup_nodes.")
|
||||
|
||||
@staticmethod
|
||||
def dpm_validate_wol_mac(proxmox_api, node: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Retrieves and validates the Wake-on-LAN (WOL) MAC address for a specified node.
|
||||
|
||||
This method fetches the MAC address configured for Wake-on-LAN (WOL) from the Proxmox API.
|
||||
If the MAC address is found, it is logged. In case of failure to retrieve the address,
|
||||
a critical log is generated indicating the absence of a WOL MAC address for the node.
|
||||
|
||||
Parameters:
|
||||
proxmox_api: An instance of the Proxmox API client used to query node configurations.
|
||||
node: The identifier (name or ID) of the node for which the WOL MAC address is to be validated.
|
||||
|
||||
Returns:
|
||||
node_wol_mac_address: The WOL MAC address for the specified node if found, otherwise `None`.
|
||||
"""
|
||||
logger.debug("Starting: dpm_validate_wol_mac.")
|
||||
|
||||
try:
|
||||
logger.debug(f"DPM: Getting WOL MAC address for node {node} from API.")
|
||||
node_wol_mac_address = proxmox_api.nodes(node).config.get(property="wakeonlan")
|
||||
node_wol_mac_address = node_wol_mac_address.get("wakeonlan")
|
||||
logger.debug(f"DPM: Node {node} has MAC address: {node_wol_mac_address} for WOL.")
|
||||
except proxmoxer.core.ResourceException as proxmox_api_error:
|
||||
logger.debug(f"DPM: Failed to get WOL MAC address for node {node} from API.")
|
||||
node_wol_mac_address = None
|
||||
logger.critical(f"DPM: Node {node} has no MAC address defined for WOL.")
|
||||
|
||||
logger.debug("Finished: dpm_validate_wol_mac.")
|
||||
return node_wol_mac_address
|
||||
@@ -63,21 +63,10 @@ class Guests:
|
||||
for guest in proxmox_api.nodes(node).qemu.get():
|
||||
if guest['status'] == 'running':
|
||||
|
||||
# If the balancing method is set to cpu, we need to wait for the guest to report
|
||||
# cpu usage. This is important for the balancing process to ensure that we do not
|
||||
# wait for a guest for an infinite time.
|
||||
if meta["meta"]["balancing"]["method"] == "cpu":
|
||||
retry_counter = 0
|
||||
while guest['cpu'] == 0 and retry_counter < 10:
|
||||
guest = proxmox_api.nodes(node).qemu(guest['vmid']).status.current.get()
|
||||
logger.debug(f"Guest {guest['name']} (type VM) is reporting {guest['cpu']} cpu usage on retry {retry_counter}.")
|
||||
retry_counter += 1
|
||||
time.sleep(1)
|
||||
|
||||
guests['guests'][guest['name']] = {}
|
||||
guests['guests'][guest['name']]['name'] = guest['name']
|
||||
guests['guests'][guest['name']]['cpu_total'] = guest['cpus']
|
||||
guests['guests'][guest['name']]['cpu_used'] = guest['cpu'] * guest['cpus']
|
||||
guests['guests'][guest['name']]['cpu_total'] = int(guest['cpus'])
|
||||
guests['guests'][guest['name']]['cpu_used'] = Guests.get_guest_cpu_usage(proxmox_api, node, guest['vmid'], guest['name'])
|
||||
guests['guests'][guest['name']]['memory_total'] = guest['maxmem']
|
||||
guests['guests'][guest['name']]['memory_used'] = guest['mem']
|
||||
guests['guests'][guest['name']]['disk_total'] = guest['maxdisk']
|
||||
@@ -90,8 +79,10 @@ class Guests:
|
||||
guests['guests'][guest['name']]['affinity_groups'] = Tags.get_affinity_groups(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['anti_affinity_groups'] = Tags.get_anti_affinity_groups(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['ignore'] = Tags.get_ignore(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['node_relationship'] = Tags.get_node_relationship(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['node_relationships'] = Tags.get_node_relationships(guests['guests'][guest['name']]['tags'], nodes)
|
||||
guests['guests'][guest['name']]['type'] = 'vm'
|
||||
|
||||
logger.debug(f"Resources of Guest {guest['name']} (type VM) added: {guests['guests'][guest['name']]}")
|
||||
else:
|
||||
logger.debug(f'Metric for VM {guest["name"]} ignored because VM is not running.')
|
||||
|
||||
@@ -102,8 +93,8 @@ class Guests:
|
||||
if guest['status'] == 'running':
|
||||
guests['guests'][guest['name']] = {}
|
||||
guests['guests'][guest['name']]['name'] = guest['name']
|
||||
guests['guests'][guest['name']]['cpu_total'] = guest['cpus']
|
||||
guests['guests'][guest['name']]['cpu_used'] = guest['cpu']
|
||||
guests['guests'][guest['name']]['cpu_total'] = int(guest['cpus'])
|
||||
guests['guests'][guest['name']]['cpu_used'] = Guests.get_guest_cpu_usage(proxmox_api, node, guest['vmid'], guest['name'])
|
||||
guests['guests'][guest['name']]['memory_total'] = guest['maxmem']
|
||||
guests['guests'][guest['name']]['memory_used'] = guest['mem']
|
||||
guests['guests'][guest['name']]['disk_total'] = guest['maxdisk']
|
||||
@@ -116,10 +107,47 @@ class Guests:
|
||||
guests['guests'][guest['name']]['affinity_groups'] = Tags.get_affinity_groups(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['anti_affinity_groups'] = Tags.get_anti_affinity_groups(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['ignore'] = Tags.get_ignore(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['node_relationship'] = Tags.get_node_relationship(guests['guests'][guest['name']]['tags'])
|
||||
guests['guests'][guest['name']]['node_relationships'] = Tags.get_node_relationships(guests['guests'][guest['name']]['tags'], nodes)
|
||||
guests['guests'][guest['name']]['type'] = 'ct'
|
||||
|
||||
logger.debug(f"Resources of Guest {guest['name']} (type CT) added: {guests['guests'][guest['name']]}")
|
||||
else:
|
||||
logger.debug(f'Metric for CT {guest["name"]} ignored because CT is not running.')
|
||||
|
||||
logger.debug("Finished: get_guests.")
|
||||
return guests
|
||||
|
||||
@staticmethod
|
||||
def get_guest_cpu_usage(proxmox_api, node_name: str, vm_id: int, vm_name: str) -> float:
|
||||
"""
|
||||
Retrieve the average CPU usage of a guest instance (VM/CT) over the past hour.
|
||||
|
||||
This method queries the Proxmox VE API for RRD (Round-Robin Database) data
|
||||
related to CPU usage of a specific guest instance and calculates the average CPU usage
|
||||
over the last hour using the "AVERAGE" consolidation function.
|
||||
|
||||
Args:
|
||||
proxmox_api: An instance of the Proxmox API client.
|
||||
node_name (str): The name of the Proxmox node hosting the VM.
|
||||
vm_id (int): The unique identifier of the guest instance (VM/CT).
|
||||
vm_name (str): The name of the guest instance (VM/CT).
|
||||
|
||||
Returns:
|
||||
float: The average CPU usage as a fraction (0.0 to 1.0) over the past hour.
|
||||
Returns 0.0 if no data is available.
|
||||
"""
|
||||
logger.debug("Finished: get_guest_cpu_usage.")
|
||||
time.sleep(0.1)
|
||||
|
||||
try:
|
||||
logger.debug(f"Getting RRD dara for guest: {vm_name}.")
|
||||
guest_data_rrd = proxmox_api.nodes(node_name).qemu(vm_id).rrddata.get(timeframe="hour", cf="AVERAGE")
|
||||
except Exception:
|
||||
logger.error(f"Failed to retrieve RRD data for guest: {vm_name} (ID: {vm_id}) on node: {node_name}. Using 0.0 as CPU usage.")
|
||||
logger.debug("Finished: get_guest_cpu_usage.")
|
||||
return 0.0
|
||||
|
||||
cpu_usage = sum(entry.get("cpu", 0.0) for entry in guest_data_rrd) / len(guest_data_rrd)
|
||||
logger.debug(f"CPU RRD data for guest: {vm_name}: {cpu_usage}")
|
||||
logger.debug("Finished: get_guest_cpu_usage.")
|
||||
return cpu_usage
|
||||
|
||||
@@ -54,7 +54,6 @@ class Nodes:
|
||||
"""
|
||||
logger.debug("Starting: get_nodes.")
|
||||
nodes = {"nodes": {}}
|
||||
cluster = {"cluster": {}}
|
||||
|
||||
for node in proxmox_api.nodes.get():
|
||||
# Ignoring a node results into ignoring all placed guests on the ignored node!
|
||||
@@ -62,8 +61,6 @@ class Nodes:
|
||||
nodes["nodes"][node["node"]] = {}
|
||||
nodes["nodes"][node["node"]]["name"] = node["node"]
|
||||
nodes["nodes"][node["node"]]["maintenance"] = False
|
||||
nodes["nodes"][node["node"]]["dpm_shutdown"] = False
|
||||
nodes["nodes"][node["node"]]["dpm_startup"] = False
|
||||
nodes["nodes"][node["node"]]["cpu_total"] = node["maxcpu"]
|
||||
nodes["nodes"][node["node"]]["cpu_assigned"] = 0
|
||||
nodes["nodes"][node["node"]]["cpu_used"] = node["cpu"] * node["maxcpu"]
|
||||
@@ -87,41 +84,14 @@ class Nodes:
|
||||
nodes["nodes"][node["node"]]["disk_used_percent"] = nodes["nodes"][node["node"]]["disk_used"] / node["maxdisk"] * 100
|
||||
|
||||
# Evaluate if node should be set to maintenance mode
|
||||
if Nodes.set_node_maintenance(proxlb_config, node["node"]):
|
||||
if Nodes.set_node_maintenance(proxmox_api, proxlb_config, node["node"]):
|
||||
nodes["nodes"][node["node"]]["maintenance"] = True
|
||||
|
||||
# Generate the intial cluster statistics within the same loop to avoid a further one.
|
||||
logger.debug(f"Updating cluster statistics by online node {node['node']}.")
|
||||
cluster["cluster"]["node_count"] = cluster["cluster"].get("node_count", 0) + 1
|
||||
cluster["cluster"]["cpu_total"] = cluster["cluster"].get("cpu_total", 0) + nodes["nodes"][node["node"]]["cpu_total"]
|
||||
cluster["cluster"]["cpu_used"] = cluster["cluster"].get("cpu_used", 0) + nodes["nodes"][node["node"]]["cpu_used"]
|
||||
cluster["cluster"]["cpu_free"] = cluster["cluster"].get("cpu_free", 0) + nodes["nodes"][node["node"]]["cpu_free"]
|
||||
cluster["cluster"]["cpu_free_percent"] = cluster["cluster"].get("cpu_free", 0) / cluster["cluster"].get("cpu_total", 0) * 100
|
||||
cluster["cluster"]["cpu_used_percent"] = cluster["cluster"].get("cpu_used", 0) / cluster["cluster"].get("cpu_total", 0) * 100
|
||||
cluster["cluster"]["memory_total"] = cluster["cluster"].get("memory_total", 0) + nodes["nodes"][node["node"]]["memory_total"]
|
||||
cluster["cluster"]["memory_used"] = cluster["cluster"].get("memory_used", 0) + nodes["nodes"][node["node"]]["memory_used"]
|
||||
cluster["cluster"]["memory_free"] = cluster["cluster"].get("memory_free", 0) + nodes["nodes"][node["node"]]["memory_free"]
|
||||
cluster["cluster"]["memory_free_percent"] = cluster["cluster"].get("memory_free", 0) / cluster["cluster"].get("memory_total", 0) * 100
|
||||
cluster["cluster"]["memory_used_percent"] = cluster["cluster"].get("memory_used", 0) / cluster["cluster"].get("memory_total", 0) * 100
|
||||
cluster["cluster"]["disk_total"] = cluster["cluster"].get("disk_total", 0) + nodes["nodes"][node["node"]]["disk_total"]
|
||||
cluster["cluster"]["disk_used"] = cluster["cluster"].get("disk_used", 0) + nodes["nodes"][node["node"]]["disk_used"]
|
||||
cluster["cluster"]["disk_free"] = cluster["cluster"].get("disk_free", 0) + nodes["nodes"][node["node"]]["disk_free"]
|
||||
cluster["cluster"]["disk_free_percent"] = cluster["cluster"].get("disk_free", 0) / cluster["cluster"].get("disk_total", 0) * 100
|
||||
cluster["cluster"]["disk_used_percent"] = cluster["cluster"].get("disk_used", 0) / cluster["cluster"].get("disk_total", 0) * 100
|
||||
|
||||
cluster["cluster"]["node_count_available"] = cluster["cluster"].get("node_count_available", 0) + 1
|
||||
cluster["cluster"]["node_count_overall"] = cluster["cluster"].get("node_count_overall", 0) + 1
|
||||
|
||||
# Update the cluster statistics by offline nodes to have the overall count of nodes in the cluster
|
||||
else:
|
||||
logger.debug(f"Updating cluster statistics by offline node {node['node']}.")
|
||||
cluster["cluster"]["node_count_overall"] = cluster["cluster"].get("node_count_overall", 0) + 1
|
||||
|
||||
logger.debug("Finished: get_nodes.")
|
||||
return nodes, cluster
|
||||
return nodes
|
||||
|
||||
@staticmethod
|
||||
def set_node_maintenance(proxlb_config: Dict[str, Any], node_name: str) -> Dict[str, Any]:
|
||||
def set_node_maintenance(proxmox_api, proxlb_config: Dict[str, Any], node_name: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Set nodes to maintenance mode based on the provided configuration.
|
||||
|
||||
@@ -129,6 +99,7 @@ class Nodes:
|
||||
based on the configuration provided in proxlb_config.
|
||||
|
||||
Args:
|
||||
proxmox_api (any): The Proxmox API client instance.
|
||||
proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration, including maintenance nodes.
|
||||
node_name: (str): The current node name within the outer iteration.
|
||||
|
||||
@@ -137,11 +108,24 @@ class Nodes:
|
||||
"""
|
||||
logger.debug("Starting: set_node_maintenance.")
|
||||
|
||||
# Evaluate maintenance mode by config
|
||||
if proxlb_config.get("proxmox_cluster", None).get("maintenance_nodes", None) is not None:
|
||||
if len(proxlb_config.get("proxmox_cluster", {}).get("maintenance_nodes", [])) > 0:
|
||||
if node_name in proxlb_config.get("proxmox_cluster", {}).get("maintenance_nodes", []):
|
||||
logger.warning(f"Node: {node_name} has been set to maintenance mode.")
|
||||
logger.info(f"Node: {node_name} has been set to maintenance mode (by ProxLB config).")
|
||||
return True
|
||||
else:
|
||||
logger.debug(f"Node: {node_name} is not in maintenance mode by ProxLB config.")
|
||||
|
||||
# Evaluate maintenance mode by Proxmox HA
|
||||
for ha_element in proxmox_api.cluster.ha.status.current.get():
|
||||
if ha_element.get("status"):
|
||||
if "maintenance mode" in ha_element.get("status"):
|
||||
if ha_element.get("node") == node_name:
|
||||
logger.info(f"Node: {node_name} has been set to maintenance mode (by Proxmox HA API).")
|
||||
return True
|
||||
else:
|
||||
logger.debug(f"Node: {node_name} is not in maintenance mode by Proxmox HA API.")
|
||||
|
||||
logger.debug("Finished: set_node_maintenance.")
|
||||
|
||||
@@ -165,7 +149,7 @@ class Nodes:
|
||||
if proxlb_config.get("proxmox_cluster", None).get("ignore_nodes", None) is not None:
|
||||
if len(proxlb_config.get("proxmox_cluster", {}).get("ignore_nodes", [])) > 0:
|
||||
if node_name in proxlb_config.get("proxmox_cluster", {}).get("ignore_nodes", []):
|
||||
logger.warning(f"Node: {node_name} has been set to be ignored. Not adding node!")
|
||||
logger.info(f"Node: {node_name} has been set to be ignored. Not adding node!")
|
||||
return True
|
||||
|
||||
logger.debug("Finished: set_node_ignore.")
|
||||
|
||||
@@ -12,7 +12,9 @@ __license__ = "GPL-3.0"
|
||||
|
||||
import time
|
||||
from typing import List
|
||||
from typing import Dict, Any
|
||||
from utils.logger import SystemdLogger
|
||||
from utils.helper import Helper
|
||||
|
||||
logger = SystemdLogger()
|
||||
|
||||
@@ -153,7 +155,7 @@ class Tags:
|
||||
return ignore_tag
|
||||
|
||||
@staticmethod
|
||||
def get_node_relationship(tags: List[str]) -> str:
|
||||
def get_node_relationships(tags: List[str], nodes: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Get a node relationship tag for a guest from the Proxmox cluster by the API to pin
|
||||
a guest to a node.
|
||||
@@ -163,17 +165,25 @@ class Tags:
|
||||
|
||||
Args:
|
||||
tags (List): A list holding all defined tags for a given guest.
|
||||
nodes (Dict): A dictionary holding all available nodes in the cluster.
|
||||
|
||||
Returns:
|
||||
Str: The related hypervisor node name.
|
||||
"""
|
||||
logger.debug("Starting: get_node_relationship.")
|
||||
node_relationship_tag = False
|
||||
logger.debug("Starting: get_node_relationships.")
|
||||
node_relationship_tags = []
|
||||
|
||||
if len(tags) > 0:
|
||||
for tag in tags:
|
||||
if tag.startswith("plb_pin"):
|
||||
node_relationship_tag = tag.replace("plb_pin_", "")
|
||||
|
||||
logger.debug("Finished: get_node_relationship.")
|
||||
return node_relationship_tag
|
||||
# Validate if the node to pin is present in the cluster
|
||||
if Helper.validate_node_presence(node_relationship_tag, nodes):
|
||||
logger.info(f"Tag {node_relationship_tag} is valid! Defined node exists in the cluster.")
|
||||
node_relationship_tags.append(node_relationship_tag)
|
||||
else:
|
||||
logger.warning(f"Tag {node_relationship_tag} is invalid! Defined node does not exist in the cluster. Not applying pinning.")
|
||||
|
||||
logger.debug("Finished: get_node_relationships.")
|
||||
return node_relationship_tags
|
||||
|
||||
@@ -10,11 +10,13 @@ __license__ = "GPL-3.0"
|
||||
|
||||
import json
|
||||
import uuid
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import utils.version
|
||||
from utils.logger import SystemdLogger
|
||||
from typing import Dict, Any
|
||||
from types import FrameType
|
||||
|
||||
logger = SystemdLogger()
|
||||
|
||||
@@ -40,6 +42,8 @@ class Helper:
|
||||
get_daemon_mode(proxlb_config: Dict[str, Any]) -> None:
|
||||
Checks if the daemon mode is active and handles the scheduling accordingly.
|
||||
"""
|
||||
proxlb_reload = False
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Initializes the general Helper clas.
|
||||
@@ -143,6 +147,39 @@ class Helper:
|
||||
|
||||
logger.debug("Finished: get_daemon_mode.")
|
||||
|
||||
@staticmethod
|
||||
def get_service_delay(proxlb_config: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Checks if a start up delay for the service is defined and waits to proceed until
|
||||
the time is up.
|
||||
|
||||
Parameters:
|
||||
proxlb_config (Dict[str, Any]): A dictionary containing the ProxLB configuration.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Starting: get_service_delay.")
|
||||
if proxlb_config.get("service", {}).get("delay", {}).get("enable", False):
|
||||
|
||||
# Convert hours to seconds
|
||||
if proxlb_config["service"]["delay"].get("format", "hours") == "hours":
|
||||
sleep_seconds = proxlb_config.get("service", {}).get("delay", {}).get("time", 1) * 3600
|
||||
# Convert minutes to seconds
|
||||
elif proxlb_config["service"]["delay"].get("format", "hours") == "minutes":
|
||||
sleep_seconds = proxlb_config.get("service", {}).get("delay", {}).get("time", 60) * 60
|
||||
else:
|
||||
logger.error("Invalid format for service delay. Please use 'hours' or 'minutes'.")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info(f"Service delay active: First run in: {proxlb_config.get('service', {}).get('delay', {}).get('time', 1)} {proxlb_config['service']['delay'].get('format', 'hours')}.")
|
||||
time.sleep(sleep_seconds)
|
||||
|
||||
else:
|
||||
logger.debug("Service delay not active. Proceeding without delay.")
|
||||
|
||||
logger.debug("Finished: get_service_delay.")
|
||||
|
||||
@staticmethod
|
||||
def print_json(proxlb_config: Dict[str, Any], print_json: bool = False) -> None:
|
||||
"""
|
||||
@@ -162,3 +199,111 @@ class Helper:
|
||||
print(json.dumps(filtered_data, indent=4))
|
||||
|
||||
logger.debug("Finished: print_json.")
|
||||
|
||||
@staticmethod
|
||||
def handler_sighup(signum: int, frame: FrameType) -> None:
|
||||
"""
|
||||
Signal handler for SIGHUP.
|
||||
|
||||
This method is triggered when the process receives a SIGHUP signal.
|
||||
It sets the `proxlb_reload` class variable to True to indicate that
|
||||
configuration should be reloaded in the main loop.
|
||||
|
||||
Args:
|
||||
signum (int): The signal number (expected to be signal.SIGHUP).
|
||||
frame (frame object): Current stack frame (unused but required by signal handler signature).
|
||||
"""
|
||||
logger.debug("Starting: handle_sighup.")
|
||||
logger.debug("Got SIGHUP signal. Reloading...")
|
||||
Helper.proxlb_reload = True
|
||||
logger.debug("Finished: handle_sighup.")
|
||||
|
||||
@staticmethod
|
||||
def handler_sigint(signum: int, frame: FrameType) -> None:
|
||||
"""
|
||||
Signal handler for SIGINT. (triggered by CTRL+C).
|
||||
|
||||
Args:
|
||||
signum (int): The signal number (e.g., SIGINT).
|
||||
frame (FrameType): The current stack frame when the signal was received.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
exit_message = "ProxLB has been successfully terminated by user."
|
||||
logger.debug(exit_message)
|
||||
print(f"\n {exit_message}")
|
||||
sys.exit(0)
|
||||
|
||||
@staticmethod
|
||||
def get_host_port_from_string(host_object):
|
||||
"""
|
||||
Parses a string containing a host (IPv4, IPv6, or hostname) and an optional port, and returns a tuple of (host, port).
|
||||
|
||||
Supported formats:
|
||||
- Hostname or IPv4 without port: "example.com" or "192.168.0.1"
|
||||
- Hostname or IPv4 with port: "example.com:8006" or "192.168.0.1:8006"
|
||||
- IPv6 in brackets with optional port: "[fc00::1]" or "[fc00::1]:8006"
|
||||
- IPv6 without brackets, port is assumed after last colon: "fc00::1:8006"
|
||||
|
||||
If no port is specified, port 8006 is used as the default.
|
||||
|
||||
Args:
|
||||
host_object (str): A string representing a host with or without a port.
|
||||
|
||||
Returns:
|
||||
tuple: A tuple (host: str, port: int)
|
||||
"""
|
||||
logger.debug("Starting: get_host_port_from_string.")
|
||||
|
||||
# IPv6 (with or without port, written in brackets)
|
||||
match = re.match(r'^\[(.+)\](?::(\d+))?$', host_object)
|
||||
if match:
|
||||
host = match.group(1)
|
||||
port = int(match.group(2)) if match.group(2) else 8006
|
||||
return host, port
|
||||
|
||||
# Count colons to identify IPv6 addresses without brackets
|
||||
colon_count = host_object.count(':')
|
||||
|
||||
# IPv4 or hostname without port
|
||||
if colon_count == 0:
|
||||
return host_object, 8006
|
||||
|
||||
# IPv4 or hostname with port
|
||||
elif colon_count == 1:
|
||||
host, port = host_object.split(':')
|
||||
return host, int(port)
|
||||
|
||||
# IPv6 (with or without port, assume last colon is port)
|
||||
else:
|
||||
parts = host_object.rsplit(':', 1)
|
||||
try:
|
||||
port = int(parts[1])
|
||||
return parts[0], port
|
||||
except ValueError:
|
||||
return host_object, 8006
|
||||
|
||||
@staticmethod
|
||||
def validate_node_presence(node: str, nodes: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Validates whether a given node exists in the provided cluster nodes dictionary.
|
||||
|
||||
Args:
|
||||
node (str): The name of the node to validate.
|
||||
nodes (Dict[str, Any]): A dictionary containing cluster information.
|
||||
Must include a "nodes" key mapping to a dict of available nodes.
|
||||
|
||||
Returns:
|
||||
bool: True if the node exists in the cluster, False otherwise.
|
||||
"""
|
||||
logger.debug("Starting: validate_node_presence.")
|
||||
|
||||
if node in nodes["nodes"].keys():
|
||||
logger.info(f"Node {node} found in cluster. Applying pinning.")
|
||||
logger.debug("Finished: validate_node_presence.")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Node {node} not found in cluster. Not applying pinning!")
|
||||
logger.debug("Finished: validate_node_presence.")
|
||||
return False
|
||||
|
||||
@@ -33,6 +33,7 @@ try:
|
||||
except ImportError:
|
||||
URLLIB3_PRESENT = False
|
||||
from typing import Dict, Any
|
||||
from utils.helper import Helper
|
||||
from utils.logger import SystemdLogger
|
||||
|
||||
|
||||
@@ -134,6 +135,14 @@ class ProxmoxApi:
|
||||
proxlb_credentials = proxlb_config["proxmox_api"]
|
||||
present_auth_pass = "pass" in proxlb_credentials
|
||||
present_auth_secret = "token_secret" in proxlb_credentials
|
||||
token_id = proxlb_credentials.get("token_id", None)
|
||||
|
||||
if token_id:
|
||||
non_allowed_chars = ["@", "!"]
|
||||
for char in non_allowed_chars:
|
||||
if char in token_id:
|
||||
logger.error(f"Wrong user/token format defined. User and token id must be splitted! Please see: https://github.com/gyptazy/ProxLB/blob/main/docs/03_configuration.md#required-permissions-for-a-user")
|
||||
sys.exit(1)
|
||||
|
||||
if present_auth_pass and present_auth_secret:
|
||||
logger.critical(f"Username/password and API token authentication are mutal exclusive. Please use only one!")
|
||||
@@ -189,9 +198,9 @@ class ProxmoxApi:
|
||||
api_connection_wait_time = proxlb_config["proxmox_api"].get("wait_time", 1)
|
||||
|
||||
for api_connection_attempt in range(api_connection_retries):
|
||||
validated = self.test_api_proxmox_host(host)
|
||||
if validated:
|
||||
validated_api_hosts.append(validated)
|
||||
validated_api_host, api_port = self.test_api_proxmox_host(host)
|
||||
if validated_api_host:
|
||||
validated_api_hosts.append(validated_api_host)
|
||||
break
|
||||
else:
|
||||
logger.warning(f"Attempt {api_connection_attempt + 1}/{api_connection_retries} failed for host {host}. Retrying in {api_connection_wait_time} seconds...")
|
||||
@@ -200,7 +209,7 @@ class ProxmoxApi:
|
||||
if len(validated_api_hosts) > 0:
|
||||
# Choose a random host to distribute the load across the cluster
|
||||
# as a simple load balancing mechanism.
|
||||
return random.choice(validated_api_hosts)
|
||||
return random.choice(validated_api_hosts), api_port
|
||||
|
||||
logger.critical("No valid Proxmox API hosts found.")
|
||||
print("No valid Proxmox API hosts found.")
|
||||
@@ -228,6 +237,10 @@ class ProxmoxApi:
|
||||
"""
|
||||
logger.debug("Starting: test_api_proxmox_host.")
|
||||
|
||||
# Validate for custom ports in API hosts which might indicate
|
||||
# that an external loadbalancer will be used.
|
||||
host, port = Helper.get_host_port_from_string(host)
|
||||
|
||||
# Try resolving DNS to IP and log non-resolvable ones
|
||||
try:
|
||||
ip = socket.getaddrinfo(host, None, socket.AF_UNSPEC)
|
||||
@@ -239,12 +252,12 @@ class ProxmoxApi:
|
||||
for address_type in ip:
|
||||
if address_type[0] == socket.AF_INET:
|
||||
logger.debug(f"{host} is type ipv4.")
|
||||
if self.test_api_proxmox_host_ipv4(host):
|
||||
return host
|
||||
if self.test_api_proxmox_host_ipv4(host, port):
|
||||
return host, port
|
||||
elif address_type[0] == socket.AF_INET6:
|
||||
logger.debug(f"{host} is type ipv6.")
|
||||
if self.test_api_proxmox_host_ipv6(host):
|
||||
return host
|
||||
if self.test_api_proxmox_host_ipv6(host, port):
|
||||
return host, port
|
||||
else:
|
||||
return False
|
||||
|
||||
@@ -331,7 +344,15 @@ class ProxmoxApi:
|
||||
permissions_available = []
|
||||
|
||||
# Get the permissions for the current user/token from API
|
||||
permissions = proxmox_api.access.permissions.get()
|
||||
try:
|
||||
permissions = proxmox_api.access.permissions.get()
|
||||
except proxmoxer.core.ResourceException as api_error:
|
||||
if "no such user" in str(api_error):
|
||||
logger.error("Authentication to Proxmox API not possible: User not known - please check your username and config file.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
logger.error(f"Proxmox API error: {api_error}")
|
||||
sys.exit(1)
|
||||
|
||||
# Get all available permissions of the current user/token
|
||||
for path, permission in permissions.items():
|
||||
@@ -378,7 +399,7 @@ class ProxmoxApi:
|
||||
self.validate_config(proxlb_config)
|
||||
|
||||
# Get a valid Proxmox API endpoint
|
||||
proxmox_api_endpoint = self.api_connect_get_hosts(proxlb_config, proxlb_config.get("proxmox_api", {}).get("hosts", []))
|
||||
proxmox_api_endpoint, proxmox_api_port = self.api_connect_get_hosts(proxlb_config, proxlb_config.get("proxmox_api", {}).get("hosts", []))
|
||||
|
||||
# Disable warnings for SSL certificate validation
|
||||
if not proxlb_config.get("proxmox_api").get("ssl_verification", True):
|
||||
@@ -392,6 +413,7 @@ class ProxmoxApi:
|
||||
if proxlb_config.get("proxmox_api").get("token_secret", False):
|
||||
proxmox_api = proxmoxer.ProxmoxAPI(
|
||||
proxmox_api_endpoint,
|
||||
port=proxmox_api_port,
|
||||
user=proxlb_config.get("proxmox_api").get("user", True),
|
||||
token_name=proxlb_config.get("proxmox_api").get("token_id", True),
|
||||
token_value=proxlb_config.get("proxmox_api").get("token_secret", True),
|
||||
@@ -401,6 +423,7 @@ class ProxmoxApi:
|
||||
else:
|
||||
proxmox_api = proxmoxer.ProxmoxAPI(
|
||||
proxmox_api_endpoint,
|
||||
port=proxmox_api_port,
|
||||
user=proxlb_config.get("proxmox_api").get("user", True),
|
||||
password=proxlb_config.get("proxmox_api").get("pass", True),
|
||||
verify_ssl=proxlb_config.get("proxmox_api").get("ssl_verification", True),
|
||||
@@ -420,6 +443,5 @@ class ProxmoxApi:
|
||||
sys.exit(2)
|
||||
|
||||
logger.info(f"API connection to host {proxmox_api_endpoint} succeeded.")
|
||||
|
||||
logger.debug("Finished: api_connect.")
|
||||
return proxmox_api
|
||||
|
||||
@@ -3,5 +3,5 @@ __app_desc__ = "A DRS alike loadbalancer for Proxmox clusters."
|
||||
__author__ = "Florian Paul Azim Hoberg <gyptazy>"
|
||||
__copyright__ = "Copyright (C) 2025 Florian Paul Azim Hoberg (@gyptazy)"
|
||||
__license__ = "GPL-3.0"
|
||||
__version__ = "1.1.2b"
|
||||
__version__ = "1.1.7"
|
||||
__url__ = "https://github.com/gyptazy/ProxLB"
|
||||
|
||||
@@ -6,6 +6,8 @@ Wants=network-online.target pveproxy.service
|
||||
[Service]
|
||||
ExecStart=python3 /usr/lib/python3/dist-packages/proxlb/main.py -c /etc/proxlb/proxlb.yaml
|
||||
User=plb
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=process
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
2
setup.py
2
setup.py
@@ -2,7 +2,7 @@ from setuptools import setup
|
||||
|
||||
setup(
|
||||
name="proxlb",
|
||||
version="1.1.2b",
|
||||
version="1.1.7",
|
||||
description="A DRS alike loadbalancer for Proxmox clusters.",
|
||||
long_description="An advanced DRS alike loadbalancer for Proxmox clusters that also supports maintenance modes and affinity/anti-affinity rules.",
|
||||
author="Florian Paul Azim Hoberg",
|
||||
|
||||
Reference in New Issue
Block a user