mirror of
https://github.com/databasus/databasus.git
synced 2026-04-06 00:32:03 +02:00
FEATURE (backups): Add retries in case of failed backups
This commit is contained in:
@@ -4,7 +4,6 @@ import (
|
||||
"log/slog"
|
||||
"postgresus-backend/internal/config"
|
||||
backups_config "postgresus-backend/internal/features/backups/config"
|
||||
"postgresus-backend/internal/features/databases"
|
||||
"postgresus-backend/internal/features/storages"
|
||||
"postgresus-backend/internal/util/period"
|
||||
"time"
|
||||
@@ -13,9 +12,8 @@ import (
|
||||
type BackupBackgroundService struct {
|
||||
backupService *BackupService
|
||||
backupRepository *BackupRepository
|
||||
databaseService *databases.DatabaseService
|
||||
storageService *storages.StorageService
|
||||
backupConfigService *backups_config.BackupConfigService
|
||||
storageService *storages.StorageService
|
||||
|
||||
lastBackupTime time.Time
|
||||
logger *slog.Logger
|
||||
@@ -51,7 +49,7 @@ func (s *BackupBackgroundService) Run() {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *BackupBackgroundService) IsBackupsRunning() bool {
|
||||
func (s *BackupBackgroundService) IsBackupsWorkerRunning() bool {
|
||||
// if last backup time is more than 5 minutes ago, return false
|
||||
return s.lastBackupTime.After(time.Now().UTC().Add(-5 * time.Minute))
|
||||
}
|
||||
@@ -90,18 +88,12 @@ func (s *BackupBackgroundService) failBackupsInProgress() error {
|
||||
}
|
||||
|
||||
func (s *BackupBackgroundService) cleanOldBackups() error {
|
||||
allDatabases, err := s.databaseService.GetAllDatabases()
|
||||
enabledBackupConfigs, err := s.backupConfigService.GetBackupConfigsWithEnabledBackups()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, database := range allDatabases {
|
||||
backupConfig, err := s.backupConfigService.GetBackupConfigByDbId(database.ID)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to get backup config by database ID", "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, backupConfig := range enabledBackupConfigs {
|
||||
backupStorePeriod := backupConfig.StorePeriod
|
||||
|
||||
if backupStorePeriod == period.PeriodForever {
|
||||
@@ -112,14 +104,14 @@ func (s *BackupBackgroundService) cleanOldBackups() error {
|
||||
dateBeforeBackupsShouldBeDeleted := time.Now().UTC().Add(-storeDuration)
|
||||
|
||||
oldBackups, err := s.backupRepository.FindBackupsBeforeDate(
|
||||
database.ID,
|
||||
backupConfig.DatabaseID,
|
||||
dateBeforeBackupsShouldBeDeleted,
|
||||
)
|
||||
if err != nil {
|
||||
s.logger.Error(
|
||||
"Failed to find old backups for database",
|
||||
"databaseId",
|
||||
database.ID,
|
||||
backupConfig.DatabaseID,
|
||||
"error",
|
||||
err,
|
||||
)
|
||||
@@ -149,7 +141,13 @@ func (s *BackupBackgroundService) cleanOldBackups() error {
|
||||
continue
|
||||
}
|
||||
|
||||
s.logger.Info("Deleted old backup", "backupId", backup.ID, "databaseId", database.ID)
|
||||
s.logger.Info(
|
||||
"Deleted old backup",
|
||||
"backupId",
|
||||
backup.ID,
|
||||
"databaseId",
|
||||
backupConfig.DatabaseID,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,28 +155,22 @@ func (s *BackupBackgroundService) cleanOldBackups() error {
|
||||
}
|
||||
|
||||
func (s *BackupBackgroundService) runPendingBackups() error {
|
||||
allDatabases, err := s.databaseService.GetAllDatabases()
|
||||
enabledBackupConfigs, err := s.backupConfigService.GetBackupConfigsWithEnabledBackups()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, database := range allDatabases {
|
||||
backupConfig, err := s.backupConfigService.GetBackupConfigByDbId(database.ID)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to get backup config by database ID", "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, backupConfig := range enabledBackupConfigs {
|
||||
if backupConfig.BackupInterval == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
lastBackup, err := s.backupRepository.FindLastByDatabaseID(database.ID)
|
||||
lastBackup, err := s.backupRepository.FindLastByDatabaseID(backupConfig.DatabaseID)
|
||||
if err != nil {
|
||||
s.logger.Error(
|
||||
"Failed to get last backup for database",
|
||||
"databaseId",
|
||||
database.ID,
|
||||
backupConfig.DatabaseID,
|
||||
"error",
|
||||
err,
|
||||
)
|
||||
@@ -190,19 +182,71 @@ func (s *BackupBackgroundService) runPendingBackups() error {
|
||||
lastBackupTime = &lastBackup.CreatedAt
|
||||
}
|
||||
|
||||
if backupConfig.BackupInterval.ShouldTriggerBackup(time.Now().UTC(), lastBackupTime) {
|
||||
remainedBackupTryCount := s.GetRemainedBackupTryCount(lastBackup)
|
||||
|
||||
if backupConfig.BackupInterval.ShouldTriggerBackup(time.Now().UTC(), lastBackupTime) ||
|
||||
remainedBackupTryCount > 0 {
|
||||
s.logger.Info(
|
||||
"Triggering scheduled backup",
|
||||
"databaseId",
|
||||
database.ID,
|
||||
backupConfig.DatabaseID,
|
||||
"intervalType",
|
||||
backupConfig.BackupInterval.Interval,
|
||||
)
|
||||
|
||||
go s.backupService.MakeBackup(database.ID)
|
||||
s.logger.Info("Successfully triggered scheduled backup", "databaseId", database.ID)
|
||||
go s.backupService.MakeBackup(backupConfig.DatabaseID, remainedBackupTryCount == 1)
|
||||
s.logger.Info(
|
||||
"Successfully triggered scheduled backup",
|
||||
"databaseId",
|
||||
backupConfig.DatabaseID,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetRemainedBackupTryCount returns the number of remaining backup tries for a given backup.
|
||||
// If the backup is not failed or the backup config does not allow retries, it returns 0.
|
||||
// If the backup is failed and the backup config allows retries, it returns the number of remaining tries.
|
||||
// If the backup is failed and the backup config does not allow retries, it returns 0.
|
||||
func (s *BackupBackgroundService) GetRemainedBackupTryCount(lastBackup *Backup) int {
|
||||
if lastBackup == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
if lastBackup.Status != BackupStatusFailed {
|
||||
return 0
|
||||
}
|
||||
|
||||
backupConfig, err := s.backupConfigService.GetBackupConfigByDbId(lastBackup.DatabaseID)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to get backup config by database ID", "error", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
if !backupConfig.IsRetryIfFailed {
|
||||
return 0
|
||||
}
|
||||
|
||||
maxFailedTriesCount := backupConfig.MaxFailedTriesCount
|
||||
|
||||
lastBackups, err := s.backupRepository.FindByDatabaseIDWithLimit(
|
||||
lastBackup.DatabaseID,
|
||||
maxFailedTriesCount,
|
||||
)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to find last backups by database ID", "error", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
lastFailedBackups := make([]*Backup, 0)
|
||||
|
||||
for _, backup := range lastBackups {
|
||||
if backup.Status == BackupStatusFailed {
|
||||
lastFailedBackups = append(lastFailedBackups, backup)
|
||||
}
|
||||
}
|
||||
|
||||
return maxFailedTriesCount - len(lastFailedBackups)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,314 @@
|
||||
package backups
|
||||
|
||||
import (
|
||||
backups_config "postgresus-backend/internal/features/backups/config"
|
||||
"postgresus-backend/internal/features/databases"
|
||||
"postgresus-backend/internal/features/intervals"
|
||||
"postgresus-backend/internal/features/notifiers"
|
||||
"postgresus-backend/internal/features/storages"
|
||||
"postgresus-backend/internal/features/users"
|
||||
"postgresus-backend/internal/util/period"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func Test_MakeBackupForDbHavingBackupDayAgo_BackupCreated(t *testing.T) {
|
||||
// setup data
|
||||
user := users.GetTestUser()
|
||||
storage := storages.CreateTestStorage(user.UserID)
|
||||
notifier := notifiers.CreateTestNotifier(user.UserID)
|
||||
database := databases.CreateTestDatabase(user.UserID, storage, notifier)
|
||||
|
||||
// Enable backups for the database
|
||||
backupConfig, err := backups_config.GetBackupConfigService().GetBackupConfigByDbId(database.ID)
|
||||
assert.NoError(t, err)
|
||||
|
||||
timeOfDay := "04:00"
|
||||
backupConfig.BackupInterval = &intervals.Interval{
|
||||
Interval: intervals.IntervalDaily,
|
||||
TimeOfDay: &timeOfDay,
|
||||
}
|
||||
backupConfig.IsBackupsEnabled = true
|
||||
backupConfig.StorePeriod = period.PeriodWeek
|
||||
backupConfig.Storage = storage
|
||||
backupConfig.StorageID = &storage.ID
|
||||
|
||||
_, err = backups_config.GetBackupConfigService().SaveBackupConfig(backupConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// add old backup
|
||||
backupRepository.Save(&Backup{
|
||||
Database: database,
|
||||
DatabaseID: database.ID,
|
||||
|
||||
Storage: storage,
|
||||
StorageID: storage.ID,
|
||||
|
||||
Status: BackupStatusCompleted,
|
||||
|
||||
CreatedAt: time.Now().UTC().Add(-24 * time.Hour),
|
||||
})
|
||||
|
||||
GetBackupBackgroundService().runPendingBackups()
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// assertions
|
||||
backups, err := backupRepository.FindByDatabaseID(database.ID)
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, backups, 2)
|
||||
|
||||
// cleanup
|
||||
for _, backup := range backups {
|
||||
err := backupRepository.DeleteByID(backup.ID)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
databases.RemoveTestDatabase(database)
|
||||
storages.RemoveTestStorage(storage.ID)
|
||||
notifiers.RemoveTestNotifier(notifier)
|
||||
}
|
||||
|
||||
func Test_MakeBackupForDbHavingHourAgoBackup_BackupSkipped(t *testing.T) {
|
||||
// setup data
|
||||
user := users.GetTestUser()
|
||||
storage := storages.CreateTestStorage(user.UserID)
|
||||
notifier := notifiers.CreateTestNotifier(user.UserID)
|
||||
database := databases.CreateTestDatabase(user.UserID, storage, notifier)
|
||||
|
||||
// Enable backups for the database
|
||||
backupConfig, err := backups_config.GetBackupConfigService().GetBackupConfigByDbId(database.ID)
|
||||
assert.NoError(t, err)
|
||||
|
||||
timeOfDay := "04:00"
|
||||
backupConfig.BackupInterval = &intervals.Interval{
|
||||
Interval: intervals.IntervalDaily,
|
||||
TimeOfDay: &timeOfDay,
|
||||
}
|
||||
backupConfig.IsBackupsEnabled = true
|
||||
backupConfig.StorePeriod = period.PeriodWeek
|
||||
backupConfig.Storage = storage
|
||||
backupConfig.StorageID = &storage.ID
|
||||
|
||||
_, err = backups_config.GetBackupConfigService().SaveBackupConfig(backupConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// add recent backup (1 hour ago)
|
||||
backupRepository.Save(&Backup{
|
||||
Database: database,
|
||||
DatabaseID: database.ID,
|
||||
|
||||
Storage: storage,
|
||||
StorageID: storage.ID,
|
||||
|
||||
Status: BackupStatusCompleted,
|
||||
|
||||
CreatedAt: time.Now().UTC().Add(-1 * time.Hour),
|
||||
})
|
||||
|
||||
GetBackupBackgroundService().runPendingBackups()
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// assertions
|
||||
backups, err := backupRepository.FindByDatabaseID(database.ID)
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, backups, 1) // Should still be 1 backup, no new backup created
|
||||
|
||||
// cleanup
|
||||
for _, backup := range backups {
|
||||
err := backupRepository.DeleteByID(backup.ID)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
databases.RemoveTestDatabase(database)
|
||||
storages.RemoveTestStorage(storage.ID)
|
||||
notifiers.RemoveTestNotifier(notifier)
|
||||
}
|
||||
|
||||
func Test_MakeBackupHavingFailedBackupWithoutRetries_BackupSkipped(t *testing.T) {
|
||||
// setup data
|
||||
user := users.GetTestUser()
|
||||
storage := storages.CreateTestStorage(user.UserID)
|
||||
notifier := notifiers.CreateTestNotifier(user.UserID)
|
||||
database := databases.CreateTestDatabase(user.UserID, storage, notifier)
|
||||
|
||||
// Enable backups for the database with retries disabled
|
||||
backupConfig, err := backups_config.GetBackupConfigService().GetBackupConfigByDbId(database.ID)
|
||||
assert.NoError(t, err)
|
||||
|
||||
timeOfDay := "04:00"
|
||||
backupConfig.BackupInterval = &intervals.Interval{
|
||||
Interval: intervals.IntervalDaily,
|
||||
TimeOfDay: &timeOfDay,
|
||||
}
|
||||
backupConfig.IsBackupsEnabled = true
|
||||
backupConfig.StorePeriod = period.PeriodWeek
|
||||
backupConfig.Storage = storage
|
||||
backupConfig.StorageID = &storage.ID
|
||||
backupConfig.IsRetryIfFailed = false
|
||||
backupConfig.MaxFailedTriesCount = 0
|
||||
|
||||
_, err = backups_config.GetBackupConfigService().SaveBackupConfig(backupConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// add failed backup
|
||||
failMessage := "backup failed"
|
||||
backupRepository.Save(&Backup{
|
||||
Database: database,
|
||||
DatabaseID: database.ID,
|
||||
|
||||
Storage: storage,
|
||||
StorageID: storage.ID,
|
||||
|
||||
Status: BackupStatusFailed,
|
||||
FailMessage: &failMessage,
|
||||
|
||||
CreatedAt: time.Now().UTC().Add(-1 * time.Hour),
|
||||
})
|
||||
|
||||
GetBackupBackgroundService().runPendingBackups()
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// assertions
|
||||
backups, err := backupRepository.FindByDatabaseID(database.ID)
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, backups, 1) // Should still be 1 backup, no retry attempted
|
||||
|
||||
// cleanup
|
||||
for _, backup := range backups {
|
||||
err := backupRepository.DeleteByID(backup.ID)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
databases.RemoveTestDatabase(database)
|
||||
storages.RemoveTestStorage(storage.ID)
|
||||
notifiers.RemoveTestNotifier(notifier)
|
||||
}
|
||||
|
||||
func Test_MakeBackupHavingFailedBackupWithRetries_BackupCreated(t *testing.T) {
|
||||
// setup data
|
||||
user := users.GetTestUser()
|
||||
storage := storages.CreateTestStorage(user.UserID)
|
||||
notifier := notifiers.CreateTestNotifier(user.UserID)
|
||||
database := databases.CreateTestDatabase(user.UserID, storage, notifier)
|
||||
|
||||
// Enable backups for the database with retries enabled
|
||||
backupConfig, err := backups_config.GetBackupConfigService().GetBackupConfigByDbId(database.ID)
|
||||
assert.NoError(t, err)
|
||||
|
||||
timeOfDay := "04:00"
|
||||
backupConfig.BackupInterval = &intervals.Interval{
|
||||
Interval: intervals.IntervalDaily,
|
||||
TimeOfDay: &timeOfDay,
|
||||
}
|
||||
backupConfig.IsBackupsEnabled = true
|
||||
backupConfig.StorePeriod = period.PeriodWeek
|
||||
backupConfig.Storage = storage
|
||||
backupConfig.StorageID = &storage.ID
|
||||
backupConfig.IsRetryIfFailed = true
|
||||
backupConfig.MaxFailedTriesCount = 3
|
||||
|
||||
_, err = backups_config.GetBackupConfigService().SaveBackupConfig(backupConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// add failed backup
|
||||
failMessage := "backup failed"
|
||||
backupRepository.Save(&Backup{
|
||||
Database: database,
|
||||
DatabaseID: database.ID,
|
||||
|
||||
Storage: storage,
|
||||
StorageID: storage.ID,
|
||||
|
||||
Status: BackupStatusFailed,
|
||||
FailMessage: &failMessage,
|
||||
|
||||
CreatedAt: time.Now().UTC().Add(-1 * time.Hour),
|
||||
})
|
||||
|
||||
GetBackupBackgroundService().runPendingBackups()
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// assertions
|
||||
backups, err := backupRepository.FindByDatabaseID(database.ID)
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, backups, 2) // Should have 2 backups, retry was attempted
|
||||
|
||||
// cleanup
|
||||
for _, backup := range backups {
|
||||
err := backupRepository.DeleteByID(backup.ID)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
databases.RemoveTestDatabase(database)
|
||||
storages.RemoveTestStorage(storage.ID)
|
||||
notifiers.RemoveTestNotifier(notifier)
|
||||
}
|
||||
|
||||
func Test_MakeBackupHavingFailedBackupWithRetries_RetriesCountNotExceeded(t *testing.T) {
|
||||
// setup data
|
||||
user := users.GetTestUser()
|
||||
storage := storages.CreateTestStorage(user.UserID)
|
||||
notifier := notifiers.CreateTestNotifier(user.UserID)
|
||||
database := databases.CreateTestDatabase(user.UserID, storage, notifier)
|
||||
|
||||
// Enable backups for the database with retries enabled
|
||||
backupConfig, err := backups_config.GetBackupConfigService().GetBackupConfigByDbId(database.ID)
|
||||
assert.NoError(t, err)
|
||||
|
||||
timeOfDay := "04:00"
|
||||
backupConfig.BackupInterval = &intervals.Interval{
|
||||
Interval: intervals.IntervalDaily,
|
||||
TimeOfDay: &timeOfDay,
|
||||
}
|
||||
backupConfig.IsBackupsEnabled = true
|
||||
backupConfig.StorePeriod = period.PeriodWeek
|
||||
backupConfig.Storage = storage
|
||||
backupConfig.StorageID = &storage.ID
|
||||
backupConfig.IsRetryIfFailed = true
|
||||
backupConfig.MaxFailedTriesCount = 3
|
||||
|
||||
_, err = backups_config.GetBackupConfigService().SaveBackupConfig(backupConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
failMessage := "backup failed"
|
||||
|
||||
for i := 0; i < 3; i++ {
|
||||
backupRepository.Save(&Backup{
|
||||
Database: database,
|
||||
DatabaseID: database.ID,
|
||||
|
||||
Storage: storage,
|
||||
StorageID: storage.ID,
|
||||
|
||||
Status: BackupStatusFailed,
|
||||
FailMessage: &failMessage,
|
||||
|
||||
CreatedAt: time.Now().UTC().Add(-1 * time.Hour),
|
||||
})
|
||||
}
|
||||
|
||||
GetBackupBackgroundService().runPendingBackups()
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// assertions
|
||||
backups, err := backupRepository.FindByDatabaseID(database.ID)
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, backups, 3) // Should have 3 backups, not more than max
|
||||
|
||||
// cleanup
|
||||
for _, backup := range backups {
|
||||
err := backupRepository.DeleteByID(backup.ID)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
databases.RemoveTestDatabase(database)
|
||||
storages.RemoveTestStorage(storage.ID)
|
||||
notifiers.RemoveTestNotifier(notifier)
|
||||
}
|
||||
@@ -27,9 +27,8 @@ var backupService = &BackupService{
|
||||
var backupBackgroundService = &BackupBackgroundService{
|
||||
backupService,
|
||||
backupRepository,
|
||||
databases.GetDatabaseService(),
|
||||
storages.GetStorageService(),
|
||||
backups_config.GetBackupConfigService(),
|
||||
storages.GetStorageService(),
|
||||
time.Now().UTC(),
|
||||
logger.GetLogger(),
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package backups
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"postgresus-backend/internal/storage"
|
||||
|
||||
"time"
|
||||
@@ -43,6 +44,30 @@ func (r *BackupRepository) FindByDatabaseID(databaseID uuid.UUID) ([]*Backup, er
|
||||
return backups, nil
|
||||
}
|
||||
|
||||
func (r *BackupRepository) FindByDatabaseIDWithLimit(
|
||||
databaseID uuid.UUID,
|
||||
limit int,
|
||||
) ([]*Backup, error) {
|
||||
if limit <= 0 {
|
||||
return nil, errors.New("limit must be greater than 0")
|
||||
}
|
||||
|
||||
var backups []*Backup
|
||||
|
||||
if err := storage.
|
||||
GetDb().
|
||||
Preload("Database").
|
||||
Preload("Storage").
|
||||
Where("database_id = ?", databaseID).
|
||||
Order("created_at DESC").
|
||||
Limit(limit).
|
||||
Find(&backups).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return backups, nil
|
||||
}
|
||||
|
||||
func (r *BackupRepository) FindByStorageID(storageID uuid.UUID) ([]*Backup, error) {
|
||||
var backups []*Backup
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ func (s *BackupService) MakeBackupWithAuth(
|
||||
return errors.New("user does not have access to this database")
|
||||
}
|
||||
|
||||
go s.MakeBackup(databaseID)
|
||||
go s.MakeBackup(databaseID, true)
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -114,7 +114,7 @@ func (s *BackupService) DeleteBackup(
|
||||
return s.deleteBackup(backup)
|
||||
}
|
||||
|
||||
func (s *BackupService) MakeBackup(databaseID uuid.UUID) {
|
||||
func (s *BackupService) MakeBackup(databaseID uuid.UUID, isLastTry bool) {
|
||||
database, err := s.databaseService.GetDatabaseByID(databaseID)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to get database by ID", "error", err)
|
||||
@@ -244,6 +244,10 @@ func (s *BackupService) MakeBackup(databaseID uuid.UUID) {
|
||||
)
|
||||
}
|
||||
|
||||
if !isLastTry {
|
||||
return
|
||||
}
|
||||
|
||||
s.SendBackupNotification(
|
||||
backupConfig,
|
||||
backup,
|
||||
|
||||
@@ -52,7 +52,7 @@ func Test_BackupExecuted_NotificationSent(t *testing.T) {
|
||||
}),
|
||||
).Once()
|
||||
|
||||
backupService.MakeBackup(database.ID)
|
||||
backupService.MakeBackup(database.ID, true)
|
||||
|
||||
// Verify all expectations were met
|
||||
mockNotificationSender.AssertExpectations(t)
|
||||
@@ -84,7 +84,7 @@ func Test_BackupExecuted_NotificationSent(t *testing.T) {
|
||||
[]BackupRemoveListener{},
|
||||
}
|
||||
|
||||
backupService.MakeBackup(database.ID)
|
||||
backupService.MakeBackup(database.ID, true)
|
||||
|
||||
// Verify all expectations were met
|
||||
mockNotificationSender.AssertExpectations(t)
|
||||
@@ -119,7 +119,7 @@ func Test_BackupExecuted_NotificationSent(t *testing.T) {
|
||||
capturedMessage = args.Get(2).(string)
|
||||
}).Once()
|
||||
|
||||
backupService.MakeBackup(database.ID)
|
||||
backupService.MakeBackup(database.ID, true)
|
||||
|
||||
// Verify expectations were met
|
||||
mockNotificationSender.AssertExpectations(t)
|
||||
|
||||
@@ -27,6 +27,9 @@ type BackupConfig struct {
|
||||
SendNotificationsOn []BackupNotificationType `json:"sendNotificationsOn" gorm:"-"`
|
||||
SendNotificationsOnString string `json:"-" gorm:"column:send_notifications_on;type:text;not null"`
|
||||
|
||||
IsRetryIfFailed bool `json:"isRetryIfFailed" gorm:"column:is_retry_if_failed;type:boolean;not null"`
|
||||
MaxFailedTriesCount int `json:"maxFailedTriesCount" gorm:"column:max_failed_tries_count;type:int;not null"`
|
||||
|
||||
CpuCount int `json:"cpuCount" gorm:"type:int;not null"`
|
||||
}
|
||||
|
||||
@@ -81,5 +84,9 @@ func (b *BackupConfig) Validate() error {
|
||||
return errors.New("cpu count is required")
|
||||
}
|
||||
|
||||
if b.IsRetryIfFailed && b.MaxFailedTriesCount <= 0 {
|
||||
return errors.New("max failed tries count must be greater than 0")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -74,7 +74,7 @@ func (r *BackupConfigRepository) FindByDatabaseID(databaseID uuid.UUID) (*Backup
|
||||
return &backupConfig, nil
|
||||
}
|
||||
|
||||
func (r *BackupConfigRepository) FindWithEnabledBackups() ([]*BackupConfig, error) {
|
||||
func (r *BackupConfigRepository) GetWithEnabledBackups() ([]*BackupConfig, error) {
|
||||
var backupConfigs []*BackupConfig
|
||||
|
||||
if err := storage.
|
||||
|
||||
@@ -133,6 +133,10 @@ func (s *BackupConfigService) IsStorageUsing(
|
||||
return s.backupConfigRepository.IsStorageUsing(storageID)
|
||||
}
|
||||
|
||||
func (s *BackupConfigService) GetBackupConfigsWithEnabledBackups() ([]*BackupConfig, error) {
|
||||
return s.backupConfigRepository.GetWithEnabledBackups()
|
||||
}
|
||||
|
||||
func (s *BackupConfigService) initializeDefaultConfig(
|
||||
databaseID uuid.UUID,
|
||||
) error {
|
||||
@@ -150,7 +154,9 @@ func (s *BackupConfigService) initializeDefaultConfig(
|
||||
NotificationBackupFailed,
|
||||
NotificationBackupSuccess,
|
||||
},
|
||||
CpuCount: 1,
|
||||
CpuCount: 1,
|
||||
IsRetryIfFailed: true,
|
||||
MaxFailedTriesCount: 3,
|
||||
})
|
||||
|
||||
return err
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
|
||||
func Test_CheckPgHealthUseCase(t *testing.T) {
|
||||
user := users.GetTestUser()
|
||||
|
||||
storage := storages.CreateTestStorage(user.UserID)
|
||||
notifier := notifiers.CreateTestNotifier(user.UserID)
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ func (s *HealthcheckService) IsHealthy() error {
|
||||
return errors.New("cannot connect to the database")
|
||||
}
|
||||
|
||||
if !s.backupBackgroundService.IsBackupsRunning() {
|
||||
if !s.backupBackgroundService.IsBackupsWorkerRunning() {
|
||||
return errors.New("backups are not running for more than 5 minutes")
|
||||
}
|
||||
|
||||
|
||||
13
backend/migrations/20250713160827_add_backup_retries.sql
Normal file
13
backend/migrations/20250713160827_add_backup_retries.sql
Normal file
@@ -0,0 +1,13 @@
|
||||
-- +goose Up
|
||||
-- +goose StatementBegin
|
||||
ALTER TABLE backup_configs
|
||||
ADD COLUMN is_retry_if_failed BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
ADD COLUMN max_failed_tries_count INT NOT NULL DEFAULT 0;
|
||||
-- +goose StatementEnd
|
||||
|
||||
-- +goose Down
|
||||
-- +goose StatementBegin
|
||||
ALTER TABLE backup_configs
|
||||
DROP COLUMN is_retry_if_failed,
|
||||
DROP COLUMN max_failed_tries_count;
|
||||
-- +goose StatementEnd
|
||||
@@ -12,4 +12,6 @@ export interface BackupConfig {
|
||||
storage?: Storage;
|
||||
sendNotificationsOn: BackupNotificationType[];
|
||||
cpuCount: number;
|
||||
isRetryIfFailed: boolean;
|
||||
maxFailedTriesCount: number;
|
||||
}
|
||||
|
||||
@@ -151,6 +151,8 @@ export const EditBackupConfigComponent = ({
|
||||
cpuCount: 1,
|
||||
storePeriod: Period.WEEK,
|
||||
sendNotificationsOn: [],
|
||||
isRetryIfFailed: true,
|
||||
maxFailedTriesCount: 3,
|
||||
});
|
||||
}
|
||||
loadStorages();
|
||||
@@ -289,6 +291,43 @@ export const EditBackupConfigComponent = ({
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="mt-4 mb-1 flex w-full items-center">
|
||||
<div className="min-w-[150px]">Retry backup if failed</div>
|
||||
<Switch
|
||||
size="small"
|
||||
checked={backupConfig.isRetryIfFailed}
|
||||
onChange={(checked) => updateBackupConfig({ isRetryIfFailed: checked })}
|
||||
/>
|
||||
|
||||
<Tooltip
|
||||
className="cursor-pointer"
|
||||
title="Automatically retry failed backups. Backups can fail due to network failures, storage issues or temporary database unavailability."
|
||||
>
|
||||
<InfoCircleOutlined className="ml-2" style={{ color: 'gray' }} />
|
||||
</Tooltip>
|
||||
</div>
|
||||
|
||||
{backupConfig.isRetryIfFailed && (
|
||||
<div className="mb-1 flex w-full items-center">
|
||||
<div className="min-w-[150px]">Max failed tries count</div>
|
||||
<InputNumber
|
||||
min={1}
|
||||
max={10}
|
||||
value={backupConfig.maxFailedTriesCount}
|
||||
onChange={(value) => updateBackupConfig({ maxFailedTriesCount: value || 1 })}
|
||||
size="small"
|
||||
className="max-w-[200px] grow"
|
||||
/>
|
||||
|
||||
<Tooltip
|
||||
className="cursor-pointer"
|
||||
title="Maximum number of retry attempts for failed backups. You will receive a notification when all tries have failed."
|
||||
>
|
||||
<InfoCircleOutlined className="ml-2" style={{ color: 'gray' }} />
|
||||
</Tooltip>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="mt-5 mb-1 flex w-full items-center">
|
||||
<div className="min-w-[150px]">CPU count</div>
|
||||
<InputNumber
|
||||
|
||||
@@ -134,6 +134,18 @@ export const ShowBackupConfigComponent = ({ database }: Props) => {
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="mb-1 flex w-full items-center">
|
||||
<div className="min-w-[150px]">Retry if failed</div>
|
||||
<div>{backupConfig.isRetryIfFailed ? 'Yes' : 'No'}</div>
|
||||
</div>
|
||||
|
||||
{backupConfig.isRetryIfFailed && (
|
||||
<div className="mb-1 flex w-full items-center">
|
||||
<div className="min-w-[150px]">Max failed tries count</div>
|
||||
<div>{backupConfig.maxFailedTriesCount}</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="mb-1 flex w-full items-center">
|
||||
<div className="min-w-[150px]">Store period</div>
|
||||
<div>{backupConfig.storePeriod ? periodLabels[backupConfig.storePeriod] : ''}</div>
|
||||
|
||||
@@ -78,29 +78,31 @@ export const HealthckeckAttemptsComponent = ({ database }: Props) => {
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
let isHealthcheckEnabled = false;
|
||||
let interval: number | null = null;
|
||||
|
||||
setIsHealthcheckConfigLoading(true);
|
||||
healthcheckConfigApi.getHealthcheckConfig(database.id).then((healthcheckConfig) => {
|
||||
setIsHealthcheckConfigLoading(false);
|
||||
|
||||
if (healthcheckConfig.isHealthcheckEnabled) {
|
||||
isHealthcheckEnabled = true;
|
||||
setIsShowHealthcheckConfig(true);
|
||||
|
||||
loadHealthcheckAttempts();
|
||||
|
||||
// Set up interval only if healthcheck
|
||||
// is enabled and period is 'today'
|
||||
if (period === 'today') {
|
||||
interval = setInterval(() => {
|
||||
loadHealthcheckAttempts(false);
|
||||
}, 60_000); // 5 seconds
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (period === 'today') {
|
||||
if (isHealthcheckEnabled) {
|
||||
const interval = setInterval(() => {
|
||||
loadHealthcheckAttempts(false);
|
||||
}, 60_000); // 1 minute
|
||||
|
||||
return () => clearInterval(interval);
|
||||
return () => {
|
||||
if (interval) {
|
||||
clearInterval(interval);
|
||||
}
|
||||
}
|
||||
};
|
||||
}, [period]);
|
||||
|
||||
if (isHealthcheckConfigLoading) {
|
||||
|
||||
Reference in New Issue
Block a user