From 8a601c7f68cedf0ff54747b15745e71be9cc4983 Mon Sep 17 00:00:00 2001 From: Rostislav Dugin Date: Tue, 17 Mar 2026 22:20:09 +0300 Subject: [PATCH] FEATURE (agent): Add restore from WAL-backup --- .github/workflows/ci-release.yml | 27 +- agent/.env.example | 2 + agent/.gitignore | 3 +- agent/Makefile | 21 +- agent/cmd/main.go | 29 +- agent/docker-compose.yml.example | 58 ++ agent/e2e/.gitignore | 1 + agent/e2e/Dockerfile.agent-docker | 22 +- agent/e2e/Dockerfile.agent-runner | 6 +- agent/e2e/Dockerfile.backup-restore-runner | 16 + agent/e2e/docker-compose.backup-restore.yml | 33 + agent/e2e/docker-compose.yml | 22 +- agent/e2e/mock-server/main.go | 371 ++++++++++- agent/e2e/scripts/backup-restore-helpers.sh | 357 ++++++++++ agent/e2e/scripts/run-all.sh | 6 +- agent/e2e/scripts/test-pg-docker-exec.sh | 122 ++-- agent/e2e/scripts/test-pg-host-bindir.sh | 99 ++- agent/e2e/scripts/test-pg-host-path.sh | 88 +-- agent/internal/features/api/api.go | 194 ++++-- agent/internal/features/api/dto.go | 28 + .../internal/features/full_backup/backuper.go | 20 +- .../features/full_backup/backuper_test.go | 6 +- .../features/full_backup/stderr_parser.go | 2 +- .../full_backup/stderr_parser_test.go | 19 +- agent/internal/features/restore/restorer.go | 413 ++++++++++++ .../features/restore/restorer_test.go | 616 +++++++++++++++++ .../backups/backups/backuping/scheduler.go | 4 + .../backups/backuping/scheduler_test.go | 123 ++++ .../postgres_wal_controller_test.go | 170 +++++ .../backups/backups/core/repository.go | 28 + .../backups/services/postgres_wal_service.go | 89 ++- backend/internal/features/databases/model.go | 6 + .../internal/features/databases/testing.go | 70 ++ .../features/healthcheck/config/service.go | 13 +- frontend/src/entity/backups/index.ts | 1 + frontend/src/entity/backups/model/Backup.ts | 8 +- .../entity/backups/model/PgWalBackupType.ts | 4 + .../src/entity/databases/api/databaseApi.ts | 8 + frontend/src/entity/databases/index.ts | 1 + .../src/entity/databases/model/Database.ts | 2 + .../model/postgresql/PostgresBackupType.ts | 4 + .../model/postgresql/PostgresqlDatabase.ts | 2 + .../backups/ui/AgentRestoreComponent.tsx | 227 +++++++ .../features/backups/ui/BackupsComponent.tsx | 117 +++- .../ui/AgentInstallationComponent.tsx | 358 ++++++++++ .../databases/ui/CreateDatabaseComponent.tsx | 26 +- .../databases/ui/DatabaseComponent.tsx | 48 +- .../EditDatabaseSpecificDataComponent.tsx | 16 +- .../EditPostgreSqlSpecificDataComponent.tsx | 623 ++++++++++-------- .../ShowPostgreSqlSpecificDataComponent.tsx | 47 +- .../ui/HealthckeckAttemptsComponent.tsx | 12 +- 51 files changed, 4016 insertions(+), 572 deletions(-) create mode 100644 agent/docker-compose.yml.example create mode 100644 agent/e2e/Dockerfile.backup-restore-runner create mode 100644 agent/e2e/docker-compose.backup-restore.yml create mode 100644 agent/e2e/scripts/backup-restore-helpers.sh create mode 100644 agent/internal/features/restore/restorer.go create mode 100644 agent/internal/features/restore/restorer_test.go create mode 100644 frontend/src/entity/backups/model/PgWalBackupType.ts create mode 100644 frontend/src/entity/databases/model/postgresql/PostgresBackupType.ts create mode 100644 frontend/src/features/backups/ui/AgentRestoreComponent.tsx create mode 100644 frontend/src/features/databases/ui/AgentInstallationComponent.tsx diff --git a/.github/workflows/ci-release.yml b/.github/workflows/ci-release.yml index e5eeeba..fcdda68 100644 --- a/.github/workflows/ci-release.yml +++ b/.github/workflows/ci-release.yml @@ -183,6 +183,29 @@ jobs: docker compose down -v --rmi local || true rm -rf artifacts || true + e2e-agent-backup-restore: + runs-on: ubuntu-latest + needs: [lint-agent] + strategy: + matrix: + pg_version: [15, 16, 17, 18] + fail-fast: false + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Run backup-restore e2e (PG ${{ matrix.pg_version }}) + run: | + cd agent + make e2e-backup-restore PG_VERSION=${{ matrix.pg_version }} + + - name: Cleanup + if: always() + run: | + cd agent/e2e + docker compose -f docker-compose.backup-restore.yml down -v --rmi local || true + rm -rf artifacts || true + # Self-hosted: performant high-frequency CPU is used to start many containers and run tests fast. Tests # step is bottle-neck, because we need a lot of containers and cannot parallelize tests due to shared resources test-backend: @@ -518,7 +541,7 @@ jobs: runs-on: self-hosted container: image: node:20 - needs: [test-backend, test-frontend, test-agent, e2e-agent] + needs: [test-backend, test-frontend, test-agent, e2e-agent, e2e-agent-backup-restore] if: ${{ github.ref == 'refs/heads/main' && !contains(github.event.head_commit.message, '[skip-release]') }} outputs: should_release: ${{ steps.version_bump.outputs.should_release }} @@ -611,7 +634,7 @@ jobs: build-only: runs-on: self-hosted - needs: [test-backend, test-frontend, test-agent, e2e-agent] + needs: [test-backend, test-frontend, test-agent, e2e-agent, e2e-agent-backup-restore] if: ${{ github.ref == 'refs/heads/main' && contains(github.event.head_commit.message, '[skip-release]') }} steps: - name: Clean workspace diff --git a/agent/.env.example b/agent/.env.example index f655030..8471e74 100644 --- a/agent/.env.example +++ b/agent/.env.example @@ -1 +1,3 @@ ENV_MODE=development +AGENT_DB_ID=your-database-id +AGENT_TOKEN=your-agent-token diff --git a/agent/.gitignore b/agent/.gitignore index 554a966..d827ac7 100644 --- a/agent/.gitignore +++ b/agent/.gitignore @@ -23,4 +23,5 @@ valkey-data/ victoria-logs-data/ databasus.json .test-tmp/ -databasus.log \ No newline at end of file +databasus.log +wal-queue/ \ No newline at end of file diff --git a/agent/Makefile b/agent/Makefile index c437aff..46e83ae 100644 --- a/agent/Makefile +++ b/agent/Makefile @@ -1,8 +1,21 @@ -.PHONY: run build test lint e2e e2e-clean +.PHONY: run build test lint e2e e2e-clean e2e-backup-restore e2e-backup-restore-clean + +include .env +export -# Usage: make run ARGS="start --pg-host localhost" run: - go run cmd/main.go $(ARGS) + go run cmd/main.go start \ + --databasus-host http://localhost:4005 \ + --db-id $(AGENT_DB_ID) \ + --token $(AGENT_TOKEN) \ + --pg-host 127.0.0.1 \ + --pg-port 7433 \ + --pg-user devuser \ + --pg-password devpassword \ + --pg-type docker \ + --pg-docker-container-name dev-postgres \ + --pg-wal-dir ./wal-queue \ + --skip-update build: CGO_ENABLED=0 go build -ldflags "-X main.Version=$(VERSION)" -o databasus-agent ./cmd/main.go @@ -14,6 +27,7 @@ lint: golangci-lint fmt ./cmd/... ./internal/... ./e2e/... && golangci-lint run ./cmd/... ./internal/... ./e2e/... e2e: + cd e2e && docker compose build --no-cache e2e-mock-server cd e2e && docker compose build cd e2e && docker compose run --rm e2e-agent-builder cd e2e && docker compose up -d e2e-postgres e2e-mock-server @@ -23,4 +37,5 @@ e2e: e2e-clean: cd e2e && docker compose down -v --rmi local + cd e2e && docker compose -f docker-compose.backup-restore.yml down -v --rmi local 2>/dev/null || true rm -rf e2e/artifacts diff --git a/agent/cmd/main.go b/agent/cmd/main.go index 8665002..8497502 100644 --- a/agent/cmd/main.go +++ b/agent/cmd/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "errors" "flag" "fmt" @@ -12,6 +13,7 @@ import ( "databasus-agent/internal/config" "databasus-agent/internal/features/api" + "databasus-agent/internal/features/restore" "databasus-agent/internal/features/start" "databasus-agent/internal/features/upgrade" "databasus-agent/internal/logger" @@ -115,10 +117,9 @@ func runStatus() { func runRestore(args []string) { fs := flag.NewFlagSet("restore", flag.ExitOnError) - targetDir := fs.String("target-dir", "", "Target pgdata directory") + pgDataDir := fs.String("pgdata", "", "Target pgdata directory (required)") backupID := fs.String("backup-id", "", "Full backup UUID (optional)") targetTime := fs.String("target-time", "", "PITR target time in RFC3339 (optional)") - isYes := fs.Bool("yes", false, "Skip confirmation prompt") isSkipUpdate := fs.Bool("skip-update", false, "Skip auto-update check") cfg := &config.Config{} @@ -133,12 +134,24 @@ func runRestore(args []string) { isDev := checkIsDevelopment() runUpdateCheck(cfg.DatabasusHost, *isSkipUpdate, isDev, log) - log.Info("restore: stub — not yet implemented", - "targetDir", *targetDir, - "backupId", *backupID, - "targetTime", *targetTime, - "yes", *isYes, - ) + if *pgDataDir == "" { + fmt.Fprintln(os.Stderr, "Error: --pgdata is required") + os.Exit(1) + } + + if cfg.DatabasusHost == "" || cfg.Token == "" { + fmt.Fprintln(os.Stderr, "Error: databasus-host and token must be configured") + os.Exit(1) + } + + apiClient := api.NewClient(cfg.DatabasusHost, cfg.Token, log) + restorer := restore.NewRestorer(apiClient, log, *pgDataDir, *backupID, *targetTime) + + ctx := context.Background() + if err := restorer.Run(ctx); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } } func printUsage() { diff --git a/agent/docker-compose.yml.example b/agent/docker-compose.yml.example new file mode 100644 index 0000000..4e3fd28 --- /dev/null +++ b/agent/docker-compose.yml.example @@ -0,0 +1,58 @@ +services: + dev-postgres: + image: postgres:17 + container_name: dev-postgres + environment: + POSTGRES_DB: devdb + POSTGRES_USER: devuser + POSTGRES_PASSWORD: devpassword + ports: + - "7433:5432" + command: + - bash + - -c + - | + mkdir -p /wal-queue && chown postgres:postgres /wal-queue + exec docker-entrypoint.sh postgres \ + -c wal_level=replica \ + -c max_wal_senders=3 \ + -c archive_mode=on \ + -c "archive_command=cp %p /wal-queue/%f" + volumes: + - ./wal-queue:/wal-queue + healthcheck: + test: ["CMD-SHELL", "pg_isready -U devuser -d devdb"] + interval: 2s + timeout: 5s + retries: 30 + + db-writer: + image: postgres:17 + container_name: dev-db-writer + depends_on: + dev-postgres: + condition: service_healthy + environment: + PGHOST: dev-postgres + PGPORT: "5432" + PGUSER: devuser + PGPASSWORD: devpassword + PGDATABASE: devdb + command: + - bash + - -c + - | + echo "Waiting for postgres..." + until pg_isready -h dev-postgres -U devuser -d devdb; do sleep 1; done + + psql -c "DROP TABLE IF EXISTS wal_generator;" + psql -c "CREATE TABLE wal_generator (id SERIAL PRIMARY KEY, data TEXT NOT NULL);" + echo "Starting WAL generation loop..." + while true; do + echo "Inserting ~50MB of data..." + psql -c "INSERT INTO wal_generator (data) SELECT repeat(md5(random()::text), 640) FROM generate_series(1, 2500);" + echo "Deleting data..." + psql -c "DELETE FROM wal_generator;" + echo "Cycle complete, sleeping 5s..." + sleep 5 + done diff --git a/agent/e2e/.gitignore b/agent/e2e/.gitignore index d4f588e..f92417e 100644 --- a/agent/e2e/.gitignore +++ b/agent/e2e/.gitignore @@ -1 +1,2 @@ artifacts/ +pgdata/ diff --git a/agent/e2e/Dockerfile.agent-docker b/agent/e2e/Dockerfile.agent-docker index 2c5080f..66cdf99 100644 --- a/agent/e2e/Dockerfile.agent-docker +++ b/agent/e2e/Dockerfile.agent-docker @@ -1,8 +1,22 @@ -# Runs pg_basebackup-via-docker-exec test (test 5) which tests -# that the agent can connect to Postgres inside Docker container -FROM docker:27-cli +# Runs backup-restore via docker exec test (test 6). Needs both Docker +# CLI (for pg_basebackup via docker exec) and PostgreSQL server (for +# restore verification). +FROM debian:bookworm-slim -RUN apk add --no-cache bash curl +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates curl gnupg2 locales postgresql-common && \ + sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && \ + locale-gen && \ + /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y && \ + apt-get install -y --no-install-recommends \ + postgresql-17 && \ + install -m 0755 -d /etc/apt/keyrings && \ + curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian bookworm stable" > /etc/apt/sources.list.d/docker.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends docker-ce-cli && \ + rm -rf /var/lib/apt/lists/* WORKDIR /tmp ENTRYPOINT [] diff --git a/agent/e2e/Dockerfile.agent-runner b/agent/e2e/Dockerfile.agent-runner index 1c0afb9..d5747f9 100644 --- a/agent/e2e/Dockerfile.agent-runner +++ b/agent/e2e/Dockerfile.agent-runner @@ -1,5 +1,5 @@ -# Runs upgrade and host-mode pg_basebackup tests (tests 1-4). Needs -# Postgres client tools to be installed inside the system +# Runs upgrade and host-mode backup-restore tests (tests 1-5). Needs +# full PostgreSQL server for backup-restore lifecycle tests. FROM debian:bookworm-slim RUN apt-get update && \ @@ -7,7 +7,7 @@ RUN apt-get update && \ ca-certificates curl gnupg2 postgresql-common && \ /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y && \ apt-get install -y --no-install-recommends \ - postgresql-client-17 && \ + postgresql-17 && \ rm -rf /var/lib/apt/lists/* WORKDIR /tmp diff --git a/agent/e2e/Dockerfile.backup-restore-runner b/agent/e2e/Dockerfile.backup-restore-runner new file mode 100644 index 0000000..6ec8b5b --- /dev/null +++ b/agent/e2e/Dockerfile.backup-restore-runner @@ -0,0 +1,16 @@ +# Runs backup-restore lifecycle tests with a specific PostgreSQL version. +# Used for PG version matrix testing (15, 16, 17, 18). +FROM debian:bookworm-slim + +ARG PG_VERSION=17 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates curl gnupg2 postgresql-common && \ + /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y && \ + apt-get install -y --no-install-recommends \ + postgresql-${PG_VERSION} && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /tmp +ENTRYPOINT [] diff --git a/agent/e2e/docker-compose.backup-restore.yml b/agent/e2e/docker-compose.backup-restore.yml new file mode 100644 index 0000000..8cd694a --- /dev/null +++ b/agent/e2e/docker-compose.backup-restore.yml @@ -0,0 +1,33 @@ +services: + e2e-br-mock-server: + build: + context: . + dockerfile: Dockerfile.mock-server + volumes: + - backup-storage:/backup-storage + container_name: e2e-br-mock-server + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:4050/health"] + interval: 2s + timeout: 5s + retries: 10 + + e2e-br-runner: + build: + context: . + dockerfile: Dockerfile.backup-restore-runner + args: + PG_VERSION: ${PG_VERSION:-17} + volumes: + - ./artifacts:/opt/agent/artifacts:ro + - ./scripts:/opt/agent/scripts:ro + depends_on: + e2e-br-mock-server: + condition: service_healthy + container_name: e2e-br-runner + command: ["bash", "/opt/agent/scripts/test-pg-host-path.sh"] + environment: + MOCK_SERVER_OVERRIDE: "http://e2e-br-mock-server:4050" + +volumes: + backup-storage: diff --git a/agent/e2e/docker-compose.yml b/agent/e2e/docker-compose.yml index 0dd4cbb..b6cf8a0 100644 --- a/agent/e2e/docker-compose.yml +++ b/agent/e2e/docker-compose.yml @@ -14,7 +14,19 @@ services: POSTGRES_USER: testuser POSTGRES_PASSWORD: testpassword container_name: e2e-agent-postgres - command: postgres -c wal_level=replica -c max_wal_senders=3 + command: + - bash + - -c + - | + mkdir -p /wal-queue && chown postgres:postgres /wal-queue + exec docker-entrypoint.sh postgres \ + -c wal_level=replica \ + -c max_wal_senders=3 \ + -c archive_mode=on \ + -c "archive_command=cp %p /wal-queue/%f" + volumes: + - ./pgdata:/var/lib/postgresql/data + - wal-queue:/wal-queue healthcheck: test: ["CMD-SHELL", "pg_isready -U testuser -d testdb"] interval: 2s @@ -27,6 +39,7 @@ services: dockerfile: Dockerfile.mock-server volumes: - ./artifacts:/artifacts:ro + - backup-storage:/backup-storage container_name: e2e-mock-server healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:4050/health"] @@ -57,8 +70,15 @@ services: - ./artifacts:/opt/agent/artifacts:ro - ./scripts:/opt/agent/scripts:ro - /var/run/docker.sock:/var/run/docker.sock + - wal-queue:/wal-queue depends_on: e2e-postgres: condition: service_healthy + e2e-mock-server: + condition: service_healthy container_name: e2e-agent-docker command: ["bash", "/opt/agent/scripts/run-all.sh", "docker"] + +volumes: + wal-queue: + backup-storage: diff --git a/agent/e2e/mock-server/main.go b/agent/e2e/mock-server/main.go index d55d3f6..ed75cb2 100644 --- a/agent/e2e/mock-server/main.go +++ b/agent/e2e/mock-server/main.go @@ -1,17 +1,39 @@ package main import ( + "crypto/rand" "encoding/json" "fmt" + "io" "log" "net/http" + "os" + "path/filepath" "sync" + "time" ) +const backupStorageDir = "/backup-storage" + +type walSegment struct { + BackupID string + SegmentName string + FilePath string + SizeBytes int64 +} + type server struct { mu sync.RWMutex version string binaryPath string + + backupID string + backupFilePath string + startSegment string + stopSegment string + isFinalized bool + walSegments []walSegment + backupCreatedAt time.Time } func main() { @@ -19,12 +41,31 @@ func main() { binaryPath := "/artifacts/agent-v2" port := "4050" + _ = os.MkdirAll(backupStorageDir, 0o755) + s := &server{version: version, binaryPath: binaryPath} + // System endpoints http.HandleFunc("/api/v1/system/version", s.handleVersion) http.HandleFunc("/api/v1/system/agent", s.handleAgentDownload) + + // Backup endpoints + http.HandleFunc("/api/v1/backups/postgres/wal/is-wal-chain-valid-since-last-full-backup", s.handleChainValidity) + http.HandleFunc("/api/v1/backups/postgres/wal/next-full-backup-time", s.handleNextBackupTime) + http.HandleFunc("/api/v1/backups/postgres/wal/upload/full-start", s.handleFullStart) + http.HandleFunc("/api/v1/backups/postgres/wal/upload/full-complete", s.handleFullComplete) + http.HandleFunc("/api/v1/backups/postgres/wal/upload/wal", s.handleWalUpload) + http.HandleFunc("/api/v1/backups/postgres/wal/error", s.handleError) + + // Restore endpoints + http.HandleFunc("/api/v1/backups/postgres/wal/restore/plan", s.handleRestorePlan) + http.HandleFunc("/api/v1/backups/postgres/wal/restore/download", s.handleRestoreDownload) + + // Mock control endpoints http.HandleFunc("/mock/set-version", s.handleSetVersion) http.HandleFunc("/mock/set-binary-path", s.handleSetBinaryPath) + http.HandleFunc("/mock/backup-status", s.handleBackupStatus) + http.HandleFunc("/mock/reset", s.handleReset) http.HandleFunc("/health", s.handleHealth) addr := ":" + port @@ -35,7 +76,9 @@ func main() { } } -func (s *server) handleVersion(w http.ResponseWriter, r *http.Request) { +// --- System handlers --- + +func (s *server) handleVersion(w http.ResponseWriter, _ *http.Request) { s.mu.RLock() v := s.version s.mu.RUnlock() @@ -56,6 +99,263 @@ func (s *server) handleAgentDownload(w http.ResponseWriter, r *http.Request) { http.ServeFile(w, r, path) } +// --- Backup handlers --- + +func (s *server) handleChainValidity(w http.ResponseWriter, _ *http.Request) { + s.mu.RLock() + isFinalized := s.isFinalized + s.mu.RUnlock() + + log.Printf("GET chain-validity -> isFinalized=%v", isFinalized) + + w.Header().Set("Content-Type", "application/json") + + if isFinalized { + _ = json.NewEncoder(w).Encode(map[string]any{ + "isValid": true, + }) + } else { + _ = json.NewEncoder(w).Encode(map[string]any{ + "isValid": false, + "error": "no full backup found", + }) + } +} + +func (s *server) handleNextBackupTime(w http.ResponseWriter, _ *http.Request) { + log.Printf("GET next-full-backup-time") + + nextTime := time.Now().UTC().Add(1 * time.Hour) + + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "nextFullBackupTime": nextTime.Format(time.RFC3339), + }) +} + +func (s *server) handleFullStart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + + backupID := generateID() + filePath := filepath.Join(backupStorageDir, backupID+".zst") + + file, err := os.Create(filePath) + if err != nil { + log.Printf("ERROR creating backup file: %v", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + bytesWritten, err := io.Copy(file, r.Body) + _ = file.Close() + + if err != nil { + log.Printf("ERROR writing backup data: %v", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + s.mu.Lock() + s.backupID = backupID + s.backupFilePath = filePath + s.backupCreatedAt = time.Now().UTC() + s.mu.Unlock() + + log.Printf("POST full-start -> backupID=%s, size=%d bytes", backupID, bytesWritten) + + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"backupId": backupID}) +} + +func (s *server) handleFullComplete(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + + var body struct { + BackupID string `json:"backupId"` + StartSegment string `json:"startSegment"` + StopSegment string `json:"stopSegment"` + Error *string `json:"error,omitempty"` + } + + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + if body.Error != nil { + log.Printf("POST full-complete -> backupID=%s ERROR: %s", body.BackupID, *body.Error) + w.WriteHeader(http.StatusOK) + return + } + + s.mu.Lock() + s.startSegment = body.StartSegment + s.stopSegment = body.StopSegment + s.isFinalized = true + s.mu.Unlock() + + log.Printf( + "POST full-complete -> backupID=%s, start=%s, stop=%s", + body.BackupID, + body.StartSegment, + body.StopSegment, + ) + + w.WriteHeader(http.StatusOK) +} + +func (s *server) handleWalUpload(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + + segmentName := r.Header.Get("X-Wal-Segment-Name") + if segmentName == "" { + http.Error(w, "missing X-Wal-Segment-Name header", http.StatusBadRequest) + return + } + + walBackupID := generateID() + filePath := filepath.Join(backupStorageDir, walBackupID+".zst") + + file, err := os.Create(filePath) + if err != nil { + log.Printf("ERROR creating WAL file: %v", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + bytesWritten, err := io.Copy(file, r.Body) + _ = file.Close() + + if err != nil { + log.Printf("ERROR writing WAL data: %v", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + s.mu.Lock() + s.walSegments = append(s.walSegments, walSegment{ + BackupID: walBackupID, + SegmentName: segmentName, + FilePath: filePath, + SizeBytes: bytesWritten, + }) + s.mu.Unlock() + + log.Printf("POST wal-upload -> segment=%s, walBackupID=%s, size=%d", segmentName, walBackupID, bytesWritten) + + w.WriteHeader(http.StatusNoContent) +} + +func (s *server) handleError(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + + var body struct { + Error string `json:"error"` + } + + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + log.Printf("POST error -> failed to decode: %v", err) + } else { + log.Printf("POST error -> %s", body.Error) + } + + w.WriteHeader(http.StatusOK) +} + +// --- Restore handlers --- + +func (s *server) handleRestorePlan(w http.ResponseWriter, _ *http.Request) { + s.mu.RLock() + defer s.mu.RUnlock() + + if !s.isFinalized { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{ + "error": "no_backups", + "message": "No full backups available", + }) + return + } + + backupFileInfo, err := os.Stat(s.backupFilePath) + if err != nil { + log.Printf("ERROR stat backup file: %v", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + backupSizeBytes := backupFileInfo.Size() + totalSizeBytes := backupSizeBytes + + walSegmentsJSON := make([]map[string]any, 0, len(s.walSegments)) + + latestSegment := "" + + for _, segment := range s.walSegments { + totalSizeBytes += segment.SizeBytes + latestSegment = segment.SegmentName + + walSegmentsJSON = append(walSegmentsJSON, map[string]any{ + "backupId": segment.BackupID, + "segmentName": segment.SegmentName, + "sizeBytes": segment.SizeBytes, + }) + } + + response := map[string]any{ + "fullBackup": map[string]any{ + "id": s.backupID, + "fullBackupWalStartSegment": s.startSegment, + "fullBackupWalStopSegment": s.stopSegment, + "pgVersion": "17", + "createdAt": s.backupCreatedAt.Format(time.RFC3339), + "sizeBytes": backupSizeBytes, + }, + "walSegments": walSegmentsJSON, + "totalSizeBytes": totalSizeBytes, + "latestAvailableSegment": latestSegment, + } + + log.Printf("GET restore-plan -> backupID=%s, walSegments=%d", s.backupID, len(s.walSegments)) + + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(response) +} + +func (s *server) handleRestoreDownload(w http.ResponseWriter, r *http.Request) { + requestedBackupID := r.URL.Query().Get("backupId") + if requestedBackupID == "" { + http.Error(w, "missing backupId query param", http.StatusBadRequest) + return + } + + filePath := s.findBackupFile(requestedBackupID) + if filePath == "" { + log.Printf("GET restore-download -> backupId=%s NOT FOUND", requestedBackupID) + http.Error(w, "backup not found", http.StatusNotFound) + return + } + + log.Printf("GET restore-download -> backupId=%s, file=%s", requestedBackupID, filePath) + + http.ServeFile(w, r, filePath) +} + +// --- Mock control handlers --- + func (s *server) handleSetVersion(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { http.Error(w, "POST only", http.StatusMethodNotAllowed) @@ -65,6 +365,7 @@ func (s *server) handleSetVersion(w http.ResponseWriter, r *http.Request) { var body struct { Version string `json:"version"` } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return @@ -88,6 +389,7 @@ func (s *server) handleSetBinaryPath(w http.ResponseWriter, r *http.Request) { var body struct { BinaryPath string `json:"binaryPath"` } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return @@ -102,7 +404,74 @@ func (s *server) handleSetBinaryPath(w http.ResponseWriter, r *http.Request) { _, _ = fmt.Fprintf(w, "binary path set to %s", body.BinaryPath) } +func (s *server) handleBackupStatus(w http.ResponseWriter, _ *http.Request) { + s.mu.RLock() + isFinalized := s.isFinalized + walSegmentCount := len(s.walSegments) + s.mu.RUnlock() + + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "isFinalized": isFinalized, + "walSegmentCount": walSegmentCount, + }) +} + +func (s *server) handleReset(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + + s.mu.Lock() + s.backupID = "" + s.backupFilePath = "" + s.startSegment = "" + s.stopSegment = "" + s.isFinalized = false + s.walSegments = nil + s.backupCreatedAt = time.Time{} + s.mu.Unlock() + + // Clean stored files + entries, _ := os.ReadDir(backupStorageDir) + for _, entry := range entries { + _ = os.Remove(filepath.Join(backupStorageDir, entry.Name())) + } + + log.Printf("POST /mock/reset -> state cleared") + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) +} + func (s *server) handleHealth(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) _, _ = w.Write([]byte("ok")) } + +// --- Private helpers --- + +func generateID() string { + b := make([]byte, 16) + _, _ = rand.Read(b) + + return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:16]) +} + +func (s *server) findBackupFile(backupID string) string { + s.mu.RLock() + defer s.mu.RUnlock() + + if s.backupID == backupID { + return s.backupFilePath + } + + for _, segment := range s.walSegments { + if segment.BackupID == backupID { + return segment.FilePath + } + } + + return "" +} diff --git a/agent/e2e/scripts/backup-restore-helpers.sh b/agent/e2e/scripts/backup-restore-helpers.sh new file mode 100644 index 0000000..499c1c4 --- /dev/null +++ b/agent/e2e/scripts/backup-restore-helpers.sh @@ -0,0 +1,357 @@ +#!/bin/bash +# Shared helper functions for backup-restore E2E tests. +# Source this file from test scripts: source "$(dirname "$0")/backup-restore-helpers.sh" + +AGENT="/tmp/test-agent" +AGENT_PID="" + +cleanup_agent() { + if [ -n "$AGENT_PID" ]; then + kill "$AGENT_PID" 2>/dev/null || true + wait "$AGENT_PID" 2>/dev/null || true + AGENT_PID="" + fi + + pkill -f "test-agent" 2>/dev/null || true + for i in $(seq 1 20); do + pgrep -f "test-agent" > /dev/null 2>&1 || break + sleep 0.5 + done + pkill -9 -f "test-agent" 2>/dev/null || true + sleep 0.5 + + rm -f "$AGENT" "$AGENT.update" databasus.lock databasus.log databasus.log.old databasus.json 2>/dev/null || true +} + +setup_agent() { + local artifacts="${1:-/opt/agent/artifacts}" + + cleanup_agent + cp "$artifacts/agent-v1" "$AGENT" + chmod +x "$AGENT" +} + +init_pg_local() { + local pgdata="$1" + local port="$2" + local wal_queue="$3" + local pg_bin_dir="$4" + + # Stop any leftover PG from previous test runs + su postgres -c "$pg_bin_dir/pg_ctl -D $pgdata stop -m immediate" 2>/dev/null || true + su postgres -c "$pg_bin_dir/pg_ctl -D /tmp/restore-pgdata stop -m immediate" 2>/dev/null || true + + mkdir -p "$wal_queue" + chown postgres:postgres "$wal_queue" + rm -rf "$pgdata" + + su postgres -c "$pg_bin_dir/initdb -D $pgdata" > /dev/null + + cat >> "$pgdata/postgresql.conf" < "$pgdata/pg_hba.conf" + echo "host all all 127.0.0.1/32 trust" >> "$pgdata/pg_hba.conf" + echo "host all all ::1/128 trust" >> "$pgdata/pg_hba.conf" + echo "local replication all trust" >> "$pgdata/pg_hba.conf" + echo "host replication all 127.0.0.1/32 trust" >> "$pgdata/pg_hba.conf" + echo "host replication all ::1/128 trust" >> "$pgdata/pg_hba.conf" + + su postgres -c "$pg_bin_dir/pg_ctl -D $pgdata -l /tmp/pg.log start -w" + + su postgres -c "$pg_bin_dir/psql -p $port -c \"CREATE USER testuser WITH SUPERUSER REPLICATION;\"" > /dev/null 2>&1 || true + su postgres -c "$pg_bin_dir/psql -p $port -c \"CREATE DATABASE testdb OWNER testuser;\"" > /dev/null 2>&1 || true + + echo "PostgreSQL initialized and started on port $port" +} + +insert_test_data() { + local port="$1" + local pg_bin_dir="$2" + + su postgres -c "$pg_bin_dir/psql -p $port -U testuser -d testdb" < /dev/null + echo "Checkpoint forced" +} + +run_agent_backup() { + local mock_server="$1" + local pg_host="$2" + local pg_port="$3" + local wal_queue="$4" + local pg_type="$5" + local pg_host_bin_dir="${6:-}" + local pg_docker_container="${7:-}" + + # Reset mock server state and set version to match agent (prevents background upgrade loop) + curl -sf -X POST "$mock_server/mock/reset" > /dev/null + curl -sf -X POST "$mock_server/mock/set-version" \ + -H "Content-Type: application/json" \ + -d '{"version":"v1.0.0"}' > /dev/null + + # Build JSON config + cd /tmp + + local extra_fields="" + if [ -n "$pg_host_bin_dir" ]; then + extra_fields="$extra_fields\"pgHostBinDir\": \"$pg_host_bin_dir\"," + fi + if [ -n "$pg_docker_container" ]; then + extra_fields="$extra_fields\"pgDockerContainerName\": \"$pg_docker_container\"," + fi + + cat > databasus.json < /tmp/agent-output.log 2>&1 & + AGENT_PID=$! + + echo "Agent started with PID $AGENT_PID" +} + +generate_wal_background() { + local port="$1" + local pg_bin_dir="$2" + + while true; do + su postgres -c "$pg_bin_dir/psql -p $port -U testuser -d testdb -c \" + INSERT INTO e2e_test_data (name, value) + SELECT 'bulk_' || g, g FROM generate_series(1, 1000) g; + SELECT pg_switch_wal(); + \"" > /dev/null 2>&1 || break + sleep 2 + done +} + +generate_wal_docker_background() { + local container="$1" + + while true; do + docker exec "$container" psql -U testuser -d testdb -c " + INSERT INTO e2e_test_data (name, value) + SELECT 'bulk_' || g, g FROM generate_series(1, 1000) g; + SELECT pg_switch_wal(); + " > /dev/null 2>&1 || break + sleep 2 + done +} + +wait_for_backup_complete() { + local mock_server="$1" + local timeout="${2:-120}" + + echo "Waiting for backup to complete (timeout: ${timeout}s)..." + + for i in $(seq 1 "$timeout"); do + STATUS=$(curl -sf "$mock_server/mock/backup-status" 2>/dev/null || echo '{}') + IS_FINALIZED=$(echo "$STATUS" | grep -o '"isFinalized":true' || true) + WAL_COUNT=$(echo "$STATUS" | grep -o '"walSegmentCount":[0-9]*' | grep -o '[0-9]*$' || echo "0") + + if [ -n "$IS_FINALIZED" ] && [ "$WAL_COUNT" -gt 0 ]; then + echo "Backup complete: finalized with $WAL_COUNT WAL segments" + return 0 + fi + + sleep 1 + done + + echo "FAIL: Backup did not complete within ${timeout} seconds" + echo "Last status: $STATUS" + echo "Agent output:" + cat /tmp/agent-output.log 2>/dev/null || true + return 1 +} + +stop_agent() { + if [ -n "$AGENT_PID" ]; then + kill "$AGENT_PID" 2>/dev/null || true + wait "$AGENT_PID" 2>/dev/null || true + AGENT_PID="" + fi + + echo "Agent stopped" +} + +stop_pg() { + local pgdata="$1" + local pg_bin_dir="$2" + + su postgres -c "$pg_bin_dir/pg_ctl -D $pgdata stop -m fast" 2>/dev/null || true + + echo "PostgreSQL stopped" +} + +run_agent_restore() { + local mock_server="$1" + local restore_dir="$2" + + rm -rf "$restore_dir" + mkdir -p "$restore_dir" + chown postgres:postgres "$restore_dir" + + cd /tmp + + "$AGENT" restore \ + --skip-update \ + --databasus-host "$mock_server" \ + --token test-token \ + --pgdata "$restore_dir" + + echo "Agent restore completed" +} + +start_restored_pg() { + local restore_dir="$1" + local port="$2" + local pg_bin_dir="$3" + + # Ensure port is set in restored config + if ! grep -q "^port" "$restore_dir/postgresql.conf" 2>/dev/null; then + echo "port = $port" >> "$restore_dir/postgresql.conf" + fi + + # Ensure listen_addresses is set + if ! grep -q "^listen_addresses" "$restore_dir/postgresql.conf" 2>/dev/null; then + echo "listen_addresses = 'localhost'" >> "$restore_dir/postgresql.conf" + fi + + chown -R postgres:postgres "$restore_dir" + chmod 700 "$restore_dir" + + if ! su postgres -c "$pg_bin_dir/pg_ctl -D $restore_dir -l /tmp/pg-restore.log start -w"; then + echo "FAIL: PostgreSQL failed to start on restored data" + echo "--- pg-restore.log ---" + cat /tmp/pg-restore.log 2>/dev/null || echo "(no log file)" + echo "--- postgresql.auto.conf ---" + cat "$restore_dir/postgresql.auto.conf" 2>/dev/null || echo "(no file)" + echo "--- pg_wal/ listing ---" + ls -la "$restore_dir/pg_wal/" 2>/dev/null || echo "(no pg_wal dir)" + echo "--- databasus-wal-restore/ listing ---" + ls -la "$restore_dir/databasus-wal-restore/" 2>/dev/null || echo "(no dir)" + echo "--- end diagnostics ---" + return 1 + fi + + echo "PostgreSQL started on restored data" +} + +wait_for_recovery_complete() { + local port="$1" + local pg_bin_dir="$2" + local timeout="${3:-60}" + + echo "Waiting for recovery to complete (timeout: ${timeout}s)..." + + for i in $(seq 1 "$timeout"); do + IS_READY=$(su postgres -c "$pg_bin_dir/pg_isready -p $port" 2>&1 || true) + + if echo "$IS_READY" | grep -q "accepting connections"; then + IN_RECOVERY=$(su postgres -c "$pg_bin_dir/psql -p $port -U testuser -d testdb -t -c 'SELECT pg_is_in_recovery();'" 2>/dev/null | tr -d ' \n' || echo "t") + + if [ "$IN_RECOVERY" = "f" ]; then + echo "PostgreSQL recovered and promoted to primary" + return 0 + fi + fi + + sleep 1 + done + + echo "FAIL: PostgreSQL did not recover within ${timeout} seconds" + echo "Recovery log:" + cat /tmp/pg-restore.log 2>/dev/null || true + return 1 +} + +verify_restored_data() { + local port="$1" + local pg_bin_dir="$2" + + ROW_COUNT=$(su postgres -c "$pg_bin_dir/psql -p $port -U testuser -d testdb -t -c 'SELECT COUNT(*) FROM e2e_test_data;'" | tr -d ' \n') + + if [ "$ROW_COUNT" -lt 3 ]; then + echo "FAIL: Expected at least 3 rows, got $ROW_COUNT" + su postgres -c "$pg_bin_dir/psql -p $port -U testuser -d testdb -c 'SELECT * FROM e2e_test_data;'" + return 1 + fi + + RESULT=$(su postgres -c "$pg_bin_dir/psql -p $port -U testuser -d testdb -t -c \"SELECT value FROM e2e_test_data WHERE name='row1';\"" | tr -d ' \n') + + if [ "$RESULT" != "100" ]; then + echo "FAIL: Expected row1 value=100, got $RESULT" + return 1 + fi + + RESULT2=$(su postgres -c "$pg_bin_dir/psql -p $port -U testuser -d testdb -t -c \"SELECT value FROM e2e_test_data WHERE name='row3';\"" | tr -d ' \n') + + if [ "$RESULT2" != "300" ]; then + echo "FAIL: Expected row3 value=300, got $RESULT2" + return 1 + fi + + echo "PASS: Found $ROW_COUNT rows, data integrity verified" + return 0 +} + +find_pg_bin_dir() { + # Find the PG bin dir from the installed version + local pg_config_path + pg_config_path=$(which pg_config 2>/dev/null || true) + + if [ -n "$pg_config_path" ]; then + pg_config --bindir + return + fi + + # Fallback: search common locations + for version in 18 17 16 15; do + if [ -d "/usr/lib/postgresql/$version/bin" ]; then + echo "/usr/lib/postgresql/$version/bin" + return + fi + done + + echo "ERROR: Cannot find PostgreSQL bin directory" >&2 + return 1 +} diff --git a/agent/e2e/scripts/run-all.sh b/agent/e2e/scripts/run-all.sh index a026e4d..05894d7 100644 --- a/agent/e2e/scripts/run-all.sh +++ b/agent/e2e/scripts/run-all.sh @@ -28,11 +28,11 @@ if [ "$MODE" = "host" ]; then run_test "Test 1: Upgrade success (v1 -> v2)" "$SCRIPT_DIR/test-upgrade-success.sh" run_test "Test 2: Upgrade skip (version matches)" "$SCRIPT_DIR/test-upgrade-skip.sh" run_test "Test 3: Background upgrade (v1 -> v2 while running)" "$SCRIPT_DIR/test-upgrade-background.sh" - run_test "Test 4: pg_basebackup in PATH" "$SCRIPT_DIR/test-pg-host-path.sh" - run_test "Test 5: pg_basebackup via bindir" "$SCRIPT_DIR/test-pg-host-bindir.sh" + run_test "Test 4: Backup-restore via host PATH" "$SCRIPT_DIR/test-pg-host-path.sh" + run_test "Test 5: Backup-restore via host bindir" "$SCRIPT_DIR/test-pg-host-bindir.sh" elif [ "$MODE" = "docker" ]; then - run_test "Test 6: pg_basebackup via docker exec" "$SCRIPT_DIR/test-pg-docker-exec.sh" + run_test "Test 6: Backup-restore via docker exec" "$SCRIPT_DIR/test-pg-docker-exec.sh" else echo "Unknown mode: $MODE (expected 'host' or 'docker')" diff --git a/agent/e2e/scripts/test-pg-docker-exec.sh b/agent/e2e/scripts/test-pg-docker-exec.sh index bc166bb..71473aa 100644 --- a/agent/e2e/scripts/test-pg-docker-exec.sh +++ b/agent/e2e/scripts/test-pg-docker-exec.sh @@ -1,23 +1,18 @@ #!/bin/bash set -euo pipefail -ARTIFACTS="/opt/agent/artifacts" -AGENT="/tmp/test-agent" +SCRIPT_DIR="$(dirname "$0")" +source "$SCRIPT_DIR/backup-restore-helpers.sh" + +MOCK_SERVER="${MOCK_SERVER_OVERRIDE:-http://e2e-mock-server:4050}" PG_CONTAINER="e2e-agent-postgres" +RESTORE_PGDATA="/tmp/restore-pgdata" +WAL_QUEUE="/wal-queue" +PG_PORT=5432 -# Cleanup from previous runs -pkill -f "test-agent" 2>/dev/null || true -for i in $(seq 1 20); do - pgrep -f "test-agent" > /dev/null 2>&1 || break - sleep 0.5 -done -pkill -9 -f "test-agent" 2>/dev/null || true -sleep 0.5 -rm -f "$AGENT" "$AGENT.update" databasus.lock databasus.log databasus.log.old databasus.json 2>/dev/null || true - -# Copy agent binary -cp "$ARTIFACTS/agent-v1" "$AGENT" -chmod +x "$AGENT" +# For restore verification we need a local PG bin dir +PG_BIN_DIR=$(find_pg_bin_dir) +echo "Using local PG bin dir for restore verification: $PG_BIN_DIR" # Verify docker CLI works and PG container is accessible if ! docker exec "$PG_CONTAINER" pg_basebackup --version > /dev/null 2>&1; then @@ -25,37 +20,76 @@ if ! docker exec "$PG_CONTAINER" pg_basebackup --version > /dev/null 2>&1; then exit 1 fi -# Run start with --skip-update and pg-type=docker -echo "Running agent start (pg_basebackup via docker exec)..." -OUTPUT=$("$AGENT" start \ - --skip-update \ - --databasus-host http://e2e-mock-server:4050 \ - --db-id test-db-id \ - --token test-token \ - --pg-host e2e-postgres \ - --pg-port 5432 \ - --pg-user testuser \ - --pg-password testpassword \ - --pg-wal-dir /tmp/wal \ - --pg-type docker \ - --pg-docker-container-name "$PG_CONTAINER" 2>&1) +echo "=== Phase 1: Setup agent ===" +setup_agent -EXIT_CODE=$? -echo "$OUTPUT" +echo "=== Phase 2: Insert test data into containerized PostgreSQL ===" +docker exec "$PG_CONTAINER" psql -U testuser -d testdb -c " +CREATE TABLE IF NOT EXISTS e2e_test_data ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + value INT NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW() +); +DELETE FROM e2e_test_data; +INSERT INTO e2e_test_data (name, value) VALUES + ('row1', 100), + ('row2', 200), + ('row3', 300); +" +echo "Test data inserted (3 rows)" -if [ "$EXIT_CODE" -ne 0 ]; then - echo "FAIL: Agent exited with code $EXIT_CODE" - exit 1 -fi +echo "=== Phase 3: Start agent backup (docker exec mode) ===" +curl -sf -X POST "$MOCK_SERVER/mock/reset" > /dev/null -if ! echo "$OUTPUT" | grep -q "pg_basebackup verified (docker)"; then - echo "FAIL: Expected output to contain 'pg_basebackup verified (docker)'" - exit 1 -fi +cd /tmp +cat > databasus.json < /tmp/agent-output.log 2>&1 & +AGENT_PID=$! +echo "Agent started with PID $AGENT_PID" -echo "pg_basebackup found via docker exec and DB connection verified" +echo "=== Phase 4: Generate WAL in background ===" +generate_wal_docker_background "$PG_CONTAINER" & +WAL_GEN_PID=$! + +echo "=== Phase 5: Wait for backup to complete ===" +wait_for_backup_complete "$MOCK_SERVER" 120 + +echo "=== Phase 6: Stop WAL generator and agent ===" +kill $WAL_GEN_PID 2>/dev/null || true +wait $WAL_GEN_PID 2>/dev/null || true +stop_agent + +echo "=== Phase 7: Restore to local directory ===" +run_agent_restore "$MOCK_SERVER" "$RESTORE_PGDATA" + +echo "=== Phase 8: Start local PostgreSQL on restored data ===" +# Use a different port to avoid conflict with the containerized PG +RESTORE_PORT=5433 +start_restored_pg "$RESTORE_PGDATA" "$RESTORE_PORT" "$PG_BIN_DIR" + +echo "=== Phase 9: Wait for recovery ===" +wait_for_recovery_complete "$RESTORE_PORT" "$PG_BIN_DIR" 60 + +echo "=== Phase 10: Verify data ===" +verify_restored_data "$RESTORE_PORT" "$PG_BIN_DIR" + +echo "=== Phase 11: Cleanup ===" +stop_pg "$RESTORE_PGDATA" "$PG_BIN_DIR" + +echo "pg_basebackup via docker exec: full backup-restore lifecycle passed" diff --git a/agent/e2e/scripts/test-pg-host-bindir.sh b/agent/e2e/scripts/test-pg-host-bindir.sh index 3be4033..fb681f9 100644 --- a/agent/e2e/scripts/test-pg-host-bindir.sh +++ b/agent/e2e/scripts/test-pg-host-bindir.sh @@ -1,67 +1,62 @@ #!/bin/bash set -euo pipefail -ARTIFACTS="/opt/agent/artifacts" -AGENT="/tmp/test-agent" +SCRIPT_DIR="$(dirname "$0")" +source "$SCRIPT_DIR/backup-restore-helpers.sh" + +MOCK_SERVER="${MOCK_SERVER_OVERRIDE:-http://e2e-mock-server:4050}" +PGDATA="/tmp/pgdata" +RESTORE_PGDATA="/tmp/restore-pgdata" +WAL_QUEUE="/tmp/wal-queue" +PG_PORT=5433 CUSTOM_BIN_DIR="/opt/pg/bin" -# Cleanup from previous runs -pkill -f "test-agent" 2>/dev/null || true -for i in $(seq 1 20); do - pgrep -f "test-agent" > /dev/null 2>&1 || break - sleep 0.5 -done -pkill -9 -f "test-agent" 2>/dev/null || true -sleep 0.5 -rm -f "$AGENT" "$AGENT.update" databasus.lock databasus.log databasus.log.old databasus.json 2>/dev/null || true +PG_BIN_DIR=$(find_pg_bin_dir) +echo "Using PG bin dir: $PG_BIN_DIR" -# Copy agent binary -cp "$ARTIFACTS/agent-v1" "$AGENT" -chmod +x "$AGENT" - -# Move pg_basebackup out of PATH into custom directory +# Copy pg_basebackup to a custom directory (simulates non-PATH installation) mkdir -p "$CUSTOM_BIN_DIR" -cp "$(which pg_basebackup)" "$CUSTOM_BIN_DIR/pg_basebackup" +cp "$PG_BIN_DIR/pg_basebackup" "$CUSTOM_BIN_DIR/pg_basebackup" -# Hide the system one by prepending an empty dir to PATH -export PATH="/opt/empty-path:$PATH" -mkdir -p /opt/empty-path +echo "=== Phase 1: Setup agent ===" +setup_agent -# Verify pg_basebackup is NOT directly callable from default location -# (we copied it, but the original is still there in debian — so we test -# that the agent uses the custom dir, not PATH, by checking the output) +echo "=== Phase 2: Initialize PostgreSQL ===" +init_pg_local "$PGDATA" "$PG_PORT" "$WAL_QUEUE" "$PG_BIN_DIR" -# Run start with --skip-update and custom bin dir -echo "Running agent start (pg_basebackup via --pg-host-bin-dir)..." -OUTPUT=$("$AGENT" start \ - --skip-update \ - --databasus-host http://e2e-mock-server:4050 \ - --db-id test-db-id \ - --token test-token \ - --pg-host e2e-postgres \ - --pg-port 5432 \ - --pg-user testuser \ - --pg-password testpassword \ - --pg-wal-dir /tmp/wal \ - --pg-type host \ - --pg-host-bin-dir "$CUSTOM_BIN_DIR" 2>&1) +echo "=== Phase 3: Insert test data ===" +insert_test_data "$PG_PORT" "$PG_BIN_DIR" -EXIT_CODE=$? -echo "$OUTPUT" +echo "=== Phase 4: Force checkpoint and start agent backup (using --pg-host-bin-dir) ===" +force_checkpoint "$PG_PORT" "$PG_BIN_DIR" +run_agent_backup "$MOCK_SERVER" "127.0.0.1" "$PG_PORT" "$WAL_QUEUE" "host" "$CUSTOM_BIN_DIR" -if [ "$EXIT_CODE" -ne 0 ]; then - echo "FAIL: Agent exited with code $EXIT_CODE" - exit 1 -fi +echo "=== Phase 5: Generate WAL in background ===" +generate_wal_background "$PG_PORT" "$PG_BIN_DIR" & +WAL_GEN_PID=$! -if ! echo "$OUTPUT" | grep -q "pg_basebackup verified"; then - echo "FAIL: Expected output to contain 'pg_basebackup verified'" - exit 1 -fi +echo "=== Phase 6: Wait for backup to complete ===" +wait_for_backup_complete "$MOCK_SERVER" 120 -if ! echo "$OUTPUT" | grep -q "PostgreSQL connection verified"; then - echo "FAIL: Expected output to contain 'PostgreSQL connection verified'" - exit 1 -fi +echo "=== Phase 7: Stop WAL generator, agent, and PostgreSQL ===" +kill $WAL_GEN_PID 2>/dev/null || true +wait $WAL_GEN_PID 2>/dev/null || true +stop_agent +stop_pg "$PGDATA" "$PG_BIN_DIR" -echo "pg_basebackup found via custom bin dir and DB connection verified" +echo "=== Phase 8: Restore ===" +run_agent_restore "$MOCK_SERVER" "$RESTORE_PGDATA" + +echo "=== Phase 9: Start PostgreSQL on restored data ===" +start_restored_pg "$RESTORE_PGDATA" "$PG_PORT" "$PG_BIN_DIR" + +echo "=== Phase 10: Wait for recovery ===" +wait_for_recovery_complete "$PG_PORT" "$PG_BIN_DIR" 60 + +echo "=== Phase 11: Verify data ===" +verify_restored_data "$PG_PORT" "$PG_BIN_DIR" + +echo "=== Phase 12: Cleanup ===" +stop_pg "$RESTORE_PGDATA" "$PG_BIN_DIR" + +echo "pg_basebackup via custom bindir: full backup-restore lifecycle passed" diff --git a/agent/e2e/scripts/test-pg-host-path.sh b/agent/e2e/scripts/test-pg-host-path.sh index bf6f5d8..ff081ed 100644 --- a/agent/e2e/scripts/test-pg-host-path.sh +++ b/agent/e2e/scripts/test-pg-host-path.sh @@ -1,22 +1,17 @@ #!/bin/bash set -euo pipefail -ARTIFACTS="/opt/agent/artifacts" -AGENT="/tmp/test-agent" +SCRIPT_DIR="$(dirname "$0")" +source "$SCRIPT_DIR/backup-restore-helpers.sh" -# Cleanup from previous runs -pkill -f "test-agent" 2>/dev/null || true -for i in $(seq 1 20); do - pgrep -f "test-agent" > /dev/null 2>&1 || break - sleep 0.5 -done -pkill -9 -f "test-agent" 2>/dev/null || true -sleep 0.5 -rm -f "$AGENT" "$AGENT.update" databasus.lock databasus.log databasus.log.old databasus.json 2>/dev/null || true +MOCK_SERVER="${MOCK_SERVER_OVERRIDE:-http://e2e-mock-server:4050}" +PGDATA="/tmp/pgdata" +RESTORE_PGDATA="/tmp/restore-pgdata" +WAL_QUEUE="/tmp/wal-queue" +PG_PORT=5433 -# Copy agent binary -cp "$ARTIFACTS/agent-v1" "$AGENT" -chmod +x "$AGENT" +PG_BIN_DIR=$(find_pg_bin_dir) +echo "Using PG bin dir: $PG_BIN_DIR" # Verify pg_basebackup is in PATH if ! which pg_basebackup > /dev/null 2>&1; then @@ -24,36 +19,45 @@ if ! which pg_basebackup > /dev/null 2>&1; then exit 1 fi -# Run start with --skip-update and pg-type=host -echo "Running agent start (pg_basebackup in PATH)..." -OUTPUT=$("$AGENT" start \ - --skip-update \ - --databasus-host http://e2e-mock-server:4050 \ - --db-id test-db-id \ - --token test-token \ - --pg-host e2e-postgres \ - --pg-port 5432 \ - --pg-user testuser \ - --pg-password testpassword \ - --pg-wal-dir /tmp/wal \ - --pg-type host 2>&1) +echo "=== Phase 1: Setup agent ===" +setup_agent -EXIT_CODE=$? -echo "$OUTPUT" +echo "=== Phase 2: Initialize PostgreSQL ===" +init_pg_local "$PGDATA" "$PG_PORT" "$WAL_QUEUE" "$PG_BIN_DIR" -if [ "$EXIT_CODE" -ne 0 ]; then - echo "FAIL: Agent exited with code $EXIT_CODE" - exit 1 -fi +echo "=== Phase 3: Insert test data ===" +insert_test_data "$PG_PORT" "$PG_BIN_DIR" -if ! echo "$OUTPUT" | grep -q "pg_basebackup verified"; then - echo "FAIL: Expected output to contain 'pg_basebackup verified'" - exit 1 -fi +echo "=== Phase 4: Force checkpoint and start agent backup ===" +force_checkpoint "$PG_PORT" "$PG_BIN_DIR" +run_agent_backup "$MOCK_SERVER" "127.0.0.1" "$PG_PORT" "$WAL_QUEUE" "host" -if ! echo "$OUTPUT" | grep -q "PostgreSQL connection verified"; then - echo "FAIL: Expected output to contain 'PostgreSQL connection verified'" - exit 1 -fi +echo "=== Phase 5: Generate WAL in background ===" +generate_wal_background "$PG_PORT" "$PG_BIN_DIR" & +WAL_GEN_PID=$! -echo "pg_basebackup found in PATH and DB connection verified" +echo "=== Phase 6: Wait for backup to complete ===" +wait_for_backup_complete "$MOCK_SERVER" 120 + +echo "=== Phase 7: Stop WAL generator, agent, and PostgreSQL ===" +kill $WAL_GEN_PID 2>/dev/null || true +wait $WAL_GEN_PID 2>/dev/null || true +stop_agent +stop_pg "$PGDATA" "$PG_BIN_DIR" + +echo "=== Phase 8: Restore ===" +run_agent_restore "$MOCK_SERVER" "$RESTORE_PGDATA" + +echo "=== Phase 9: Start PostgreSQL on restored data ===" +start_restored_pg "$RESTORE_PGDATA" "$PG_PORT" "$PG_BIN_DIR" + +echo "=== Phase 10: Wait for recovery ===" +wait_for_recovery_complete "$PG_PORT" "$PG_BIN_DIR" 60 + +echo "=== Phase 11: Verify data ===" +verify_restored_data "$PG_PORT" "$PG_BIN_DIR" + +echo "=== Phase 12: Cleanup ===" +stop_pg "$RESTORE_PGDATA" "$PG_BIN_DIR" + +echo "pg_basebackup in PATH: full backup-restore lifecycle passed" diff --git a/agent/internal/features/api/api.go b/agent/internal/features/api/api.go index 54da8f3..ee0c952 100644 --- a/agent/internal/features/api/api.go +++ b/agent/internal/features/api/api.go @@ -7,6 +7,7 @@ import ( "io" "log/slog" "net/http" + "net/url" "os" "time" @@ -14,25 +15,30 @@ import ( ) const ( - chainValidPath = "/api/v1/backups/postgres/wal/is-wal-chain-valid-since-last-full-backup" - nextBackupTimePath = "/api/v1/backups/postgres/wal/next-full-backup-time" - walUploadPath = "/api/v1/backups/postgres/wal/upload/wal" - fullStartPath = "/api/v1/backups/postgres/wal/upload/full-start" - fullCompletePath = "/api/v1/backups/postgres/wal/upload/full-complete" - reportErrorPath = "/api/v1/backups/postgres/wal/error" - versionPath = "/api/v1/system/version" - agentBinaryPath = "/api/v1/system/agent" + chainValidPath = "/api/v1/backups/postgres/wal/is-wal-chain-valid-since-last-full-backup" + nextBackupTimePath = "/api/v1/backups/postgres/wal/next-full-backup-time" + walUploadPath = "/api/v1/backups/postgres/wal/upload/wal" + fullStartPath = "/api/v1/backups/postgres/wal/upload/full-start" + fullCompletePath = "/api/v1/backups/postgres/wal/upload/full-complete" + reportErrorPath = "/api/v1/backups/postgres/wal/error" + restorePlanPath = "/api/v1/backups/postgres/wal/restore/plan" + restoreDownloadPath = "/api/v1/backups/postgres/wal/restore/download" + versionPath = "/api/v1/system/version" + agentBinaryPath = "/api/v1/system/agent" apiCallTimeout = 30 * time.Second maxRetryAttempts = 3 retryBaseDelay = 1 * time.Second ) +// For stream uploads (basebackup and WAL segments) the standard resty client is not used, +// because it buffers the entire body in memory before sending. type Client struct { - json *resty.Client - stream *resty.Client - host string - log *slog.Logger + json *resty.Client + streamHTTP *http.Client + host string + token string + log *slog.Logger } func NewClient(host, token string, log *slog.Logger) *Client { @@ -54,14 +60,12 @@ func NewClient(host, token string, log *slog.Logger) *Client { }). OnBeforeRequest(setAuth) - streamClient := resty.New(). - OnBeforeRequest(setAuth) - return &Client{ - json: jsonClient, - stream: streamClient, - host: host, - log: log, + json: jsonClient, + streamHTTP: &http.Client{}, + host: host, + token: token, + log: log, } } @@ -117,25 +121,28 @@ func (c *Client) UploadBasebackup( ctx context.Context, body io.Reader, ) (*UploadBasebackupResponse, error) { - resp, err := c.stream.R(). - SetContext(ctx). - SetBody(body). - SetHeader("Content-Type", "application/octet-stream"). - SetDoNotParseResponse(true). - Post(c.buildURL(fullStartPath)) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.buildURL(fullStartPath), body) + if err != nil { + return nil, fmt.Errorf("create upload request: %w", err) + } + + c.setStreamHeaders(req) + req.Header.Set("Content-Type", "application/octet-stream") + + resp, err := c.streamHTTP.Do(req) if err != nil { return nil, fmt.Errorf("upload request: %w", err) } - defer func() { _ = resp.RawBody().Close() }() + defer func() { _ = resp.Body.Close() }() - if resp.StatusCode() != http.StatusOK { - respBody, _ := io.ReadAll(resp.RawBody()) + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(resp.Body) - return nil, fmt.Errorf("upload failed with status %d: %s", resp.StatusCode(), string(respBody)) + return nil, fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(respBody)) } var result UploadBasebackupResponse - if err := json.NewDecoder(resp.RawBody()).Decode(&result); err != nil { + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { return nil, fmt.Errorf("decode upload response: %w", err) } @@ -195,26 +202,29 @@ func (c *Client) UploadWalSegment( segmentName string, body io.Reader, ) (*UploadWalSegmentResult, error) { - resp, err := c.stream.R(). - SetContext(ctx). - SetBody(body). - SetHeader("Content-Type", "application/octet-stream"). - SetHeader("X-Wal-Segment-Name", segmentName). - SetDoNotParseResponse(true). - Post(c.buildURL(walUploadPath)) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.buildURL(walUploadPath), body) + if err != nil { + return nil, fmt.Errorf("create WAL upload request: %w", err) + } + + c.setStreamHeaders(req) + req.Header.Set("Content-Type", "application/octet-stream") + req.Header.Set("X-Wal-Segment-Name", segmentName) + + resp, err := c.streamHTTP.Do(req) if err != nil { return nil, fmt.Errorf("upload request: %w", err) } - defer func() { _ = resp.RawBody().Close() }() + defer func() { _ = resp.Body.Close() }() - switch resp.StatusCode() { + switch resp.StatusCode { case http.StatusNoContent: return &UploadWalSegmentResult{IsGapDetected: false}, nil case http.StatusConflict: var errResp uploadErrorResponse - if err := json.NewDecoder(resp.RawBody()).Decode(&errResp); err != nil { + if err := json.NewDecoder(resp.Body).Decode(&errResp); err != nil { return &UploadWalSegmentResult{IsGapDetected: true}, nil } @@ -225,12 +235,79 @@ func (c *Client) UploadWalSegment( }, nil default: - respBody, _ := io.ReadAll(resp.RawBody()) + respBody, _ := io.ReadAll(resp.Body) - return nil, fmt.Errorf("upload failed with status %d: %s", resp.StatusCode(), string(respBody)) + return nil, fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(respBody)) } } +func (c *Client) GetRestorePlan( + ctx context.Context, + backupID string, +) (*GetRestorePlanResponse, *GetRestorePlanErrorResponse, error) { + request := c.json.R().SetContext(ctx) + + if backupID != "" { + request.SetQueryParam("backupId", backupID) + } + + httpResp, err := request.Get(c.buildURL(restorePlanPath)) + if err != nil { + return nil, nil, fmt.Errorf("get restore plan: %w", err) + } + + switch httpResp.StatusCode() { + case http.StatusOK: + var response GetRestorePlanResponse + if err := json.Unmarshal(httpResp.Body(), &response); err != nil { + return nil, nil, fmt.Errorf("decode restore plan response: %w", err) + } + + return &response, nil, nil + + case http.StatusBadRequest: + var errorResponse GetRestorePlanErrorResponse + if err := json.Unmarshal(httpResp.Body(), &errorResponse); err != nil { + return nil, nil, fmt.Errorf("decode restore plan error: %w", err) + } + + return nil, &errorResponse, nil + + default: + return nil, nil, fmt.Errorf("get restore plan: server returned status %d: %s", + httpResp.StatusCode(), httpResp.String()) + } +} + +func (c *Client) DownloadBackupFile( + ctx context.Context, + backupID string, +) (io.ReadCloser, error) { + requestURL := c.buildURL(restoreDownloadPath) + "?" + url.Values{"backupId": {backupID}}.Encode() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil) + if err != nil { + return nil, fmt.Errorf("create download request: %w", err) + } + + c.setStreamHeaders(req) + + resp, err := c.streamHTTP.Do(req) + if err != nil { + return nil, fmt.Errorf("download backup file: %w", err) + } + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + + return nil, fmt.Errorf("download backup file: server returned status %d: %s", + resp.StatusCode, string(respBody)) + } + + return resp.Body, nil +} + func (c *Client) FetchServerVersion(ctx context.Context) (string, error) { var ver versionResponse @@ -250,27 +327,32 @@ func (c *Client) FetchServerVersion(ctx context.Context) (string, error) { } func (c *Client) DownloadAgentBinary(ctx context.Context, arch, destPath string) error { - resp, err := c.stream.R(). - SetContext(ctx). - SetQueryParam("arch", arch). - SetDoNotParseResponse(true). - Get(c.buildURL(agentBinaryPath)) + requestURL := c.buildURL(agentBinaryPath) + "?" + url.Values{"arch": {arch}}.Encode() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil) + if err != nil { + return fmt.Errorf("create agent download request: %w", err) + } + + c.setStreamHeaders(req) + + resp, err := c.streamHTTP.Do(req) if err != nil { return err } - defer func() { _ = resp.RawBody().Close() }() + defer func() { _ = resp.Body.Close() }() - if resp.StatusCode() != http.StatusOK { - return fmt.Errorf("server returned %d for agent download", resp.StatusCode()) + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("server returned %d for agent download", resp.StatusCode) } - f, err := os.Create(destPath) + file, err := os.Create(destPath) if err != nil { return err } - defer func() { _ = f.Close() }() + defer func() { _ = file.Close() }() - _, err = io.Copy(f, resp.RawBody()) + _, err = io.Copy(file, resp.Body) return err } @@ -286,3 +368,9 @@ func (c *Client) checkResponse(resp *resty.Response, method string) error { return nil } + +func (c *Client) setStreamHeaders(req *http.Request) { + if c.token != "" { + req.Header.Set("Authorization", c.token) + } +} diff --git a/agent/internal/features/api/dto.go b/agent/internal/features/api/dto.go index 3495b96..f817f6e 100644 --- a/agent/internal/features/api/dto.go +++ b/agent/internal/features/api/dto.go @@ -42,3 +42,31 @@ type uploadErrorResponse struct { ExpectedSegmentName string `json:"expectedSegmentName"` ReceivedSegmentName string `json:"receivedSegmentName"` } + +type RestorePlanFullBackup struct { + BackupID string `json:"id"` + FullBackupWalStartSegment string `json:"fullBackupWalStartSegment"` + FullBackupWalStopSegment string `json:"fullBackupWalStopSegment"` + PgVersion string `json:"pgVersion"` + CreatedAt time.Time `json:"createdAt"` + SizeBytes int64 `json:"sizeBytes"` +} + +type RestorePlanWalSegment struct { + BackupID string `json:"backupId"` + SegmentName string `json:"segmentName"` + SizeBytes int64 `json:"sizeBytes"` +} + +type GetRestorePlanResponse struct { + FullBackup RestorePlanFullBackup `json:"fullBackup"` + WalSegments []RestorePlanWalSegment `json:"walSegments"` + TotalSizeBytes int64 `json:"totalSizeBytes"` + LatestAvailableSegment string `json:"latestAvailableSegment"` +} + +type GetRestorePlanErrorResponse struct { + Error string `json:"error"` + Message string `json:"message"` + LastContiguousSegment string `json:"lastContiguousSegment,omitempty"` +} diff --git a/agent/internal/features/full_backup/backuper.go b/agent/internal/features/full_backup/backuper.go index 478809e..e336311 100644 --- a/agent/internal/features/full_backup/backuper.go +++ b/agent/internal/features/full_backup/backuper.go @@ -38,8 +38,9 @@ type CmdBuilder func(ctx context.Context) *exec.Cmd // On failure the error is reported to the server and the backup retries after 1 minute, indefinitely. // WAL segment uploads (handled by wal.Streamer) continue independently and are not paused. // -// pg_basebackup runs as "pg_basebackup -Ft -D - -X none --verbose". Stdout (tar) is zstd-compressed -// and uploaded to the server. Stderr is parsed for WAL start/stop segment names (LSN → segment arithmetic). +// pg_basebackup runs as "pg_basebackup -Ft -D - -X fetch --verbose --checkpoint=fast". +// Stdout (tar) is zstd-compressed and uploaded to the server. +// Stderr is parsed for WAL start/stop segment names (LSN → segment arithmetic). type FullBackuper struct { cfg *config.Config apiClient *api.Client @@ -185,6 +186,11 @@ func (backuper *FullBackuper) executeAndUploadBasebackup(ctx context.Context) er cmdErr := cmd.Wait() if uploadErr != nil { + stderrStr := stderrBuf.String() + if stderrStr != "" { + return fmt.Errorf("upload basebackup: %w (pg_basebackup stderr: %s)", uploadErr, stderrStr) + } + return fmt.Errorf("upload basebackup: %w", uploadErr) } @@ -192,7 +198,7 @@ func (backuper *FullBackuper) executeAndUploadBasebackup(ctx context.Context) er errMsg := fmt.Sprintf("pg_basebackup exited with error: %v (stderr: %s)", cmdErr, stderrBuf.String()) _ = backuper.apiClient.FinalizeBasebackupWithError(ctx, uploadResp.BackupID, errMsg) - return fmt.Errorf("pg_basebackup: %w", cmdErr) + return fmt.Errorf("%s", errMsg) } // Phase 2: Parse stderr for WAL segments and finalize the backup. @@ -266,7 +272,7 @@ func (backuper *FullBackuper) buildHostCmd(ctx context.Context) *exec.Cmd { } cmd := exec.CommandContext(ctx, binary, - "-Ft", "-D", "-", "-X", "none", "--verbose", + "-Ft", "-D", "-", "-X", "fetch", "--verbose", "--checkpoint=fast", "-h", backuper.cfg.PgHost, "-p", fmt.Sprintf("%d", backuper.cfg.PgPort), "-U", backuper.cfg.PgUser, @@ -282,9 +288,9 @@ func (backuper *FullBackuper) buildDockerCmd(ctx context.Context) *exec.Cmd { "-e", "PGPASSWORD="+backuper.cfg.PgPassword, "-i", backuper.cfg.PgDockerContainerName, "pg_basebackup", - "-Ft", "-D", "-", "-X", "none", "--verbose", - "-h", backuper.cfg.PgHost, - "-p", fmt.Sprintf("%d", backuper.cfg.PgPort), + "-Ft", "-D", "-", "-X", "fetch", "--verbose", "--checkpoint=fast", + "-h", "localhost", + "-p", "5432", "-U", backuper.cfg.PgUser, ) diff --git a/agent/internal/features/full_backup/backuper_test.go b/agent/internal/features/full_backup/backuper_test.go index 72ecc09..0f4a8f1 100644 --- a/agent/internal/features/full_backup/backuper_test.go +++ b/agent/internal/features/full_backup/backuper_test.go @@ -632,9 +632,11 @@ func TestHelperProcess(t *testing.T) { func validStderr() string { return `pg_basebackup: initiating base backup, waiting for checkpoint to complete pg_basebackup: checkpoint completed -pg_basebackup: write-ahead log start point: 0/2000028, on timeline 1 -pg_basebackup: checkpoint redo point at 0/2000028 +pg_basebackup: write-ahead log start point: 0/2000028 on timeline 1 +pg_basebackup: starting background WAL receiver pg_basebackup: write-ahead log end point: 0/2000100 +pg_basebackup: waiting for background process to finish streaming ... +pg_basebackup: syncing data to disk ... pg_basebackup: base backup completed` } diff --git a/agent/internal/features/full_backup/stderr_parser.go b/agent/internal/features/full_backup/stderr_parser.go index 7c6069c..1414fff 100644 --- a/agent/internal/features/full_backup/stderr_parser.go +++ b/agent/internal/features/full_backup/stderr_parser.go @@ -10,7 +10,7 @@ import ( const defaultWalSegmentSize uint32 = 16 * 1024 * 1024 // 16 MB var ( - startLSNRegex = regexp.MustCompile(`checkpoint redo point at ([0-9A-Fa-f]+/[0-9A-Fa-f]+)`) + startLSNRegex = regexp.MustCompile(`write-ahead log start point: ([0-9A-Fa-f]+/[0-9A-Fa-f]+)`) stopLSNRegex = regexp.MustCompile(`write-ahead log end point: ([0-9A-Fa-f]+/[0-9A-Fa-f]+)`) ) diff --git a/agent/internal/features/full_backup/stderr_parser_test.go b/agent/internal/features/full_backup/stderr_parser_test.go index cd833fc..ad5c492 100644 --- a/agent/internal/features/full_backup/stderr_parser_test.go +++ b/agent/internal/features/full_backup/stderr_parser_test.go @@ -7,12 +7,11 @@ import ( "github.com/stretchr/testify/require" ) -func Test_ParseBasebackupStderr_WithPG17Output_ExtractsCorrectSegments(t *testing.T) { +func Test_ParseBasebackupStderr_WithPG17FetchOutput_ExtractsCorrectSegments(t *testing.T) { stderr := `pg_basebackup: initiating base backup, waiting for checkpoint to complete pg_basebackup: checkpoint completed -pg_basebackup: write-ahead log start point: 0/2000028, on timeline 1 +pg_basebackup: write-ahead log start point: 0/2000028 on timeline 1 pg_basebackup: starting background WAL receiver -pg_basebackup: checkpoint redo point at 0/2000028 pg_basebackup: write-ahead log end point: 0/2000100 pg_basebackup: waiting for background process to finish streaming ... pg_basebackup: syncing data to disk ... @@ -26,13 +25,9 @@ pg_basebackup: base backup completed` assert.Equal(t, "000000010000000000000002", stopSeg) } -func Test_ParseBasebackupStderr_WithPG15Output_ExtractsCorrectSegments(t *testing.T) { - stderr := `pg_basebackup: initiating base backup, waiting for checkpoint to complete -pg_basebackup: checkpoint completed -pg_basebackup: write-ahead log start point: 1/AB000028, on timeline 1 -pg_basebackup: checkpoint redo point at 1/AB000028 -pg_basebackup: write-ahead log end point: 1/AC000000 -pg_basebackup: base backup completed` +func Test_ParseBasebackupStderr_WithHighLSNValues_ExtractsCorrectSegments(t *testing.T) { + stderr := `pg_basebackup: write-ahead log start point: 1/AB000028 on timeline 1 +pg_basebackup: write-ahead log end point: 1/AC000000` startSeg, stopSeg, err := ParseBasebackupStderr(stderr) @@ -42,7 +37,7 @@ pg_basebackup: base backup completed` } func Test_ParseBasebackupStderr_WithHighLogID_ExtractsCorrectSegments(t *testing.T) { - stderr := `pg_basebackup: checkpoint redo point at A/FF000028 + stderr := `pg_basebackup: write-ahead log start point: A/FF000028 on timeline 1 pg_basebackup: write-ahead log end point: B/1000000` startSeg, stopSeg, err := ParseBasebackupStderr(stderr) @@ -63,7 +58,7 @@ pg_basebackup: base backup completed` } func Test_ParseBasebackupStderr_WhenStopLSNMissing_ReturnsError(t *testing.T) { - stderr := `pg_basebackup: checkpoint redo point at 0/2000028 + stderr := `pg_basebackup: write-ahead log start point: 0/2000028 on timeline 1 pg_basebackup: base backup completed` _, _, err := ParseBasebackupStderr(stderr) diff --git a/agent/internal/features/restore/restorer.go b/agent/internal/features/restore/restorer.go new file mode 100644 index 0000000..5200ffa --- /dev/null +++ b/agent/internal/features/restore/restorer.go @@ -0,0 +1,413 @@ +package restore + +import ( + "archive/tar" + "context" + "errors" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + "strings" + "time" + + "github.com/klauspost/compress/zstd" + + "databasus-agent/internal/features/api" +) + +const ( + walRestoreDir = "databasus-wal-restore" + maxRetryAttempts = 3 + retryBaseDelay = 1 * time.Second + recoverySignalFile = "recovery.signal" + autoConfFile = "postgresql.auto.conf" +) + +var retryDelayOverride *time.Duration + +type Restorer struct { + apiClient *api.Client + log *slog.Logger + targetPgDataDir string + backupID string + targetTime string +} + +func NewRestorer( + apiClient *api.Client, + log *slog.Logger, + targetPgDataDir string, + backupID string, + targetTime string, +) *Restorer { + return &Restorer{ + apiClient, + log, + targetPgDataDir, + backupID, + targetTime, + } +} + +func (r *Restorer) Run(ctx context.Context) error { + var parsedTargetTime *time.Time + + if r.targetTime != "" { + parsed, err := time.Parse(time.RFC3339, r.targetTime) + if err != nil { + return fmt.Errorf("invalid --target-time format (expected RFC3339, e.g. 2026-02-28T14:30:00Z): %w", err) + } + + parsedTargetTime = &parsed + } + + if err := r.validateTargetPgDataDir(); err != nil { + return err + } + + plan, err := r.getRestorePlanFromServer(ctx) + if err != nil { + return err + } + + r.logRestorePlan(plan, parsedTargetTime) + + r.log.Info("Downloading and extracting basebackup...") + if err := r.downloadAndExtractBasebackup(ctx, plan.FullBackup.BackupID); err != nil { + return fmt.Errorf("basebackup download failed: %w", err) + } + r.log.Info("Basebackup extracted successfully") + + if err := r.downloadAllWalSegments(ctx, plan.WalSegments); err != nil { + return err + } + + if err := r.configurePostgresRecovery(parsedTargetTime); err != nil { + return fmt.Errorf("failed to configure recovery: %w", err) + } + + if err := os.Chmod(r.targetPgDataDir, 0o700); err != nil { + return fmt.Errorf("set PGDATA permissions: %w", err) + } + + r.printCompletionMessage() + + return nil +} + +func (r *Restorer) validateTargetPgDataDir() error { + info, err := os.Stat(r.targetPgDataDir) + if err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("target pgdata directory does not exist: %s", r.targetPgDataDir) + } + + return fmt.Errorf("cannot access target pgdata directory: %w", err) + } + + if !info.IsDir() { + return fmt.Errorf("target pgdata path is not a directory: %s", r.targetPgDataDir) + } + + entries, err := os.ReadDir(r.targetPgDataDir) + if err != nil { + return fmt.Errorf("cannot read target pgdata directory: %w", err) + } + + if len(entries) > 0 { + return fmt.Errorf("target pgdata directory is not empty: %s", r.targetPgDataDir) + } + + return nil +} + +func (r *Restorer) getRestorePlanFromServer(ctx context.Context) (*api.GetRestorePlanResponse, error) { + plan, planErr, err := r.apiClient.GetRestorePlan(ctx, r.backupID) + if err != nil { + return nil, fmt.Errorf("failed to fetch restore plan: %w", err) + } + + if planErr != nil { + if planErr.LastContiguousSegment != "" { + return nil, fmt.Errorf("restore plan error: %s (last contiguous segment: %s)", + planErr.Message, planErr.LastContiguousSegment) + } + + return nil, fmt.Errorf("restore plan error: %s", planErr.Message) + } + + return plan, nil +} + +func (r *Restorer) logRestorePlan(plan *api.GetRestorePlanResponse, parsedTargetTime *time.Time) { + recoveryTarget := "full recovery (all available WAL)" + if parsedTargetTime != nil { + recoveryTarget = parsedTargetTime.Format(time.RFC3339) + } + + r.log.Info("Restore plan", + "fullBackupID", plan.FullBackup.BackupID, + "fullBackupCreatedAt", plan.FullBackup.CreatedAt.Format(time.RFC3339), + "pgVersion", plan.FullBackup.PgVersion, + "walSegmentCount", len(plan.WalSegments), + "totalDownloadSize", formatSizeBytes(plan.TotalSizeBytes), + "latestAvailableSegment", plan.LatestAvailableSegment, + "recoveryTarget", recoveryTarget, + ) +} + +func (r *Restorer) downloadAndExtractBasebackup(ctx context.Context, backupID string) error { + body, err := r.apiClient.DownloadBackupFile(ctx, backupID) + if err != nil { + return err + } + defer func() { _ = body.Close() }() + + zstdReader, err := zstd.NewReader(body) + if err != nil { + return fmt.Errorf("create zstd decompressor: %w", err) + } + defer zstdReader.Close() + + tarReader := tar.NewReader(zstdReader) + + return r.extractTarArchive(tarReader) +} + +func (r *Restorer) extractTarArchive(tarReader *tar.Reader) error { + for { + header, err := tarReader.Next() + if errors.Is(err, io.EOF) { + return nil + } + + if err != nil { + return fmt.Errorf("read tar entry: %w", err) + } + + targetPath := filepath.Join(r.targetPgDataDir, header.Name) + + relativePath, err := filepath.Rel(r.targetPgDataDir, targetPath) + if err != nil || strings.HasPrefix(relativePath, "..") { + return fmt.Errorf("tar entry attempts path traversal: %s", header.Name) + } + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(targetPath, os.FileMode(header.Mode)); err != nil { + return fmt.Errorf("create directory %s: %w", header.Name, err) + } + + case tar.TypeReg: + parentDir := filepath.Dir(targetPath) + if err := os.MkdirAll(parentDir, 0o755); err != nil { + return fmt.Errorf("create parent directory for %s: %w", header.Name, err) + } + + file, err := os.OpenFile(targetPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(header.Mode)) + if err != nil { + return fmt.Errorf("create file %s: %w", header.Name, err) + } + + if _, err := io.Copy(file, tarReader); err != nil { + _ = file.Close() + return fmt.Errorf("write file %s: %w", header.Name, err) + } + + _ = file.Close() + + case tar.TypeSymlink: + if err := os.Symlink(header.Linkname, targetPath); err != nil { + return fmt.Errorf("create symlink %s: %w", header.Name, err) + } + + case tar.TypeLink: + linkTarget := filepath.Join(r.targetPgDataDir, header.Linkname) + if err := os.Link(linkTarget, targetPath); err != nil { + return fmt.Errorf("create hard link %s: %w", header.Name, err) + } + + default: + r.log.Warn("Skipping unsupported tar entry type", + "name", header.Name, + "type", header.Typeflag, + ) + } + } +} + +func (r *Restorer) downloadAllWalSegments(ctx context.Context, segments []api.RestorePlanWalSegment) error { + walRestorePath := filepath.Join(r.targetPgDataDir, walRestoreDir) + if err := os.MkdirAll(walRestorePath, 0o755); err != nil { + return fmt.Errorf("create WAL restore directory: %w", err) + } + + for segmentIndex, segment := range segments { + if err := r.downloadWalSegmentWithRetry(ctx, segment, segmentIndex, len(segments)); err != nil { + return err + } + } + + return nil +} + +func (r *Restorer) downloadWalSegmentWithRetry( + ctx context.Context, + segment api.RestorePlanWalSegment, + segmentIndex int, + segmentsTotal int, +) error { + r.log.Info("Downloading WAL segment", + "segment", segment.SegmentName, + "progress", fmt.Sprintf("%d/%d", segmentIndex+1, segmentsTotal), + ) + + var lastErr error + + for attempt := range maxRetryAttempts { + if err := r.downloadWalSegment(ctx, segment); err != nil { + lastErr = err + + delay := r.getRetryDelay(attempt) + r.log.Warn("WAL segment download failed, retrying", + "segment", segment.SegmentName, + "attempt", attempt+1, + "maxAttempts", maxRetryAttempts, + "retryDelay", delay, + "error", err, + ) + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(delay): + continue + } + } + + return nil + } + + return fmt.Errorf("failed to download WAL segment %s after %d attempts: %w", + segment.SegmentName, maxRetryAttempts, lastErr) +} + +func (r *Restorer) downloadWalSegment(ctx context.Context, segment api.RestorePlanWalSegment) error { + body, err := r.apiClient.DownloadBackupFile(ctx, segment.BackupID) + if err != nil { + return err + } + defer func() { _ = body.Close() }() + + zstdReader, err := zstd.NewReader(body) + if err != nil { + return fmt.Errorf("create zstd decompressor: %w", err) + } + defer zstdReader.Close() + + segmentPath := filepath.Join(r.targetPgDataDir, walRestoreDir, segment.SegmentName) + + file, err := os.Create(segmentPath) + if err != nil { + return fmt.Errorf("create WAL segment file: %w", err) + } + defer func() { _ = file.Close() }() + + if _, err := io.Copy(file, zstdReader); err != nil { + return fmt.Errorf("write WAL segment: %w", err) + } + + return nil +} + +func (r *Restorer) configurePostgresRecovery(parsedTargetTime *time.Time) error { + recoverySignalPath := filepath.Join(r.targetPgDataDir, recoverySignalFile) + if err := os.WriteFile(recoverySignalPath, []byte{}, 0o644); err != nil { + return fmt.Errorf("create recovery.signal: %w", err) + } + + absPgDataDir, err := filepath.Abs(r.targetPgDataDir) + if err != nil { + return fmt.Errorf("resolve absolute path: %w", err) + } + + absPgDataDir = filepath.ToSlash(absPgDataDir) + walRestoreAbsPath := absPgDataDir + "/" + walRestoreDir + + autoConfPath := filepath.Join(r.targetPgDataDir, autoConfFile) + + autoConfFile, err := os.OpenFile(autoConfPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("open postgresql.auto.conf: %w", err) + } + defer func() { _ = autoConfFile.Close() }() + + var configLines strings.Builder + configLines.WriteString("\n# Added by databasus-agent restore\n") + fmt.Fprintf(&configLines, "restore_command = 'cp %s/%%f %%p'\n", walRestoreAbsPath) + fmt.Fprintf(&configLines, "recovery_end_command = 'rm -rf %s'\n", walRestoreAbsPath) + configLines.WriteString("recovery_target_action = 'promote'\n") + + if parsedTargetTime != nil { + fmt.Fprintf(&configLines, "recovery_target_time = '%s'\n", parsedTargetTime.Format(time.RFC3339)) + } + + if _, err := autoConfFile.WriteString(configLines.String()); err != nil { + return fmt.Errorf("write to postgresql.auto.conf: %w", err) + } + + return nil +} + +func (r *Restorer) printCompletionMessage() { + absPgDataDir, _ := filepath.Abs(r.targetPgDataDir) + + fmt.Printf(` +Restore complete. PGDATA directory is ready at %s. + +What happens when you start PostgreSQL: + 1. PostgreSQL detects recovery.signal and enters recovery mode + 2. It replays WAL from the basebackup's consistency point + 3. It executes restore_command to fetch WAL segments from databasus-wal-restore/ + 4. WAL replay continues until target_time (if PITR) or end of available WAL + 5. recovery_end_command automatically removes databasus-wal-restore/ + 6. PostgreSQL promotes to primary and removes recovery.signal + 7. Normal operations resume + +Start PostgreSQL: + pg_ctl -D %s start + +Note: If you move the PGDATA directory before starting PostgreSQL, +update restore_command and recovery_end_command paths in +postgresql.auto.conf accordingly. +`, absPgDataDir, absPgDataDir) +} + +func (r *Restorer) getRetryDelay(attempt int) time.Duration { + if retryDelayOverride != nil { + return *retryDelayOverride + } + + return retryBaseDelay * time.Duration(1<= gigabyte: + return fmt.Sprintf("%.2f GB", float64(sizeBytes)/float64(gigabyte)) + case sizeBytes >= megabyte: + return fmt.Sprintf("%.2f MB", float64(sizeBytes)/float64(megabyte)) + case sizeBytes >= kilobyte: + return fmt.Sprintf("%.2f KB", float64(sizeBytes)/float64(kilobyte)) + default: + return fmt.Sprintf("%d B", sizeBytes) + } +} diff --git a/agent/internal/features/restore/restorer_test.go b/agent/internal/features/restore/restorer_test.go new file mode 100644 index 0000000..b110e7f --- /dev/null +++ b/agent/internal/features/restore/restorer_test.go @@ -0,0 +1,616 @@ +package restore + +import ( + "archive/tar" + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/klauspost/compress/zstd" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "databasus-agent/internal/features/api" + "databasus-agent/internal/logger" +) + +const ( + testRestorePlanPath = "/api/v1/backups/postgres/wal/restore/plan" + testRestoreDownloadPath = "/api/v1/backups/postgres/wal/restore/download" + + testFullBackupID = "full-backup-id-1234" + testWalSegment1 = "000000010000000100000001" + testWalSegment2 = "000000010000000100000002" +) + +func Test_RunRestore_WhenBasebackupAndWalSegmentsAvailable_FilesExtractedAndRecoveryConfigured(t *testing.T) { + tarFiles := map[string][]byte{ + "PG_VERSION": []byte("16"), + "base/1/somefile": []byte("table-data"), + } + zstdTarData := createZstdTar(t, tarFiles) + walData1 := createZstdData(t, []byte("wal-segment-1-data")) + walData2 := createZstdData(t, []byte("wal-segment-2-data")) + + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case testRestorePlanPath: + writeJSON(w, api.GetRestorePlanResponse{ + FullBackup: api.RestorePlanFullBackup{ + BackupID: testFullBackupID, + FullBackupWalStartSegment: testWalSegment1, + FullBackupWalStopSegment: testWalSegment1, + PgVersion: "16", + CreatedAt: time.Now().UTC(), + SizeBytes: 1024, + }, + WalSegments: []api.RestorePlanWalSegment{ + {BackupID: "wal-1", SegmentName: testWalSegment1, SizeBytes: 512}, + {BackupID: "wal-2", SegmentName: testWalSegment2, SizeBytes: 512}, + }, + TotalSizeBytes: 2048, + LatestAvailableSegment: testWalSegment2, + }) + + case testRestoreDownloadPath: + backupID := r.URL.Query().Get("backupId") + switch backupID { + case testFullBackupID: + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(zstdTarData) + case "wal-1": + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(walData1) + case "wal-2": + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(walData2) + default: + w.WriteHeader(http.StatusBadRequest) + } + + default: + w.WriteHeader(http.StatusNotFound) + } + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "") + + err := restorer.Run(context.Background()) + require.NoError(t, err) + + pgVersionContent, err := os.ReadFile(filepath.Join(targetDir, "PG_VERSION")) + require.NoError(t, err) + assert.Equal(t, "16", string(pgVersionContent)) + + someFileContent, err := os.ReadFile(filepath.Join(targetDir, "base", "1", "somefile")) + require.NoError(t, err) + assert.Equal(t, "table-data", string(someFileContent)) + + walSegment1Content, err := os.ReadFile(filepath.Join(targetDir, walRestoreDir, testWalSegment1)) + require.NoError(t, err) + assert.Equal(t, "wal-segment-1-data", string(walSegment1Content)) + + walSegment2Content, err := os.ReadFile(filepath.Join(targetDir, walRestoreDir, testWalSegment2)) + require.NoError(t, err) + assert.Equal(t, "wal-segment-2-data", string(walSegment2Content)) + + recoverySignalPath := filepath.Join(targetDir, "recovery.signal") + recoverySignalInfo, err := os.Stat(recoverySignalPath) + require.NoError(t, err) + assert.Equal(t, int64(0), recoverySignalInfo.Size()) + + autoConfContent, err := os.ReadFile(filepath.Join(targetDir, "postgresql.auto.conf")) + require.NoError(t, err) + autoConfStr := string(autoConfContent) + + assert.Contains(t, autoConfStr, "restore_command") + assert.Contains(t, autoConfStr, walRestoreDir) + assert.Contains(t, autoConfStr, "recovery_target_action = 'promote'") + assert.Contains(t, autoConfStr, "recovery_end_command") + assert.NotContains(t, autoConfStr, "recovery_target_time") +} + +func Test_RunRestore_WhenTargetTimeProvided_RecoveryTargetTimeWrittenToConfig(t *testing.T) { + tarFiles := map[string][]byte{"PG_VERSION": []byte("16")} + zstdTarData := createZstdTar(t, tarFiles) + + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case testRestorePlanPath: + writeJSON(w, api.GetRestorePlanResponse{ + FullBackup: api.RestorePlanFullBackup{ + BackupID: testFullBackupID, + PgVersion: "16", + CreatedAt: time.Now().UTC(), + SizeBytes: 1024, + }, + WalSegments: []api.RestorePlanWalSegment{}, + TotalSizeBytes: 1024, + LatestAvailableSegment: "", + }) + + case testRestoreDownloadPath: + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(zstdTarData) + + default: + w.WriteHeader(http.StatusNotFound) + } + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "2026-02-28T14:30:00Z") + + err := restorer.Run(context.Background()) + require.NoError(t, err) + + autoConfContent, err := os.ReadFile(filepath.Join(targetDir, "postgresql.auto.conf")) + require.NoError(t, err) + + assert.Contains(t, string(autoConfContent), "recovery_target_time = '2026-02-28T14:30:00Z'") +} + +func Test_RunRestore_WhenPgDataDirNotEmpty_ReturnsError(t *testing.T) { + targetDir := createTestTargetDir(t) + + err := os.WriteFile(filepath.Join(targetDir, "existing-file"), []byte("data"), 0o644) + require.NoError(t, err) + + restorer := newTestRestorer("http://localhost:0", targetDir, "", "") + + err = restorer.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "not empty") +} + +func Test_RunRestore_WhenPgDataDirDoesNotExist_ReturnsError(t *testing.T) { + nonExistentDir := filepath.Join(os.TempDir(), "databasus-test-nonexistent-dir-12345") + + restorer := newTestRestorer("http://localhost:0", nonExistentDir, "", "") + + err := restorer.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "does not exist") +} + +func Test_RunRestore_WhenNoBackupsAvailable_ReturnsError(t *testing.T) { + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(api.GetRestorePlanErrorResponse{ + Error: "no_backups", + Message: "No full backups available", + }) + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "") + + err := restorer.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "No full backups available") +} + +func Test_RunRestore_WhenWalChainBroken_ReturnsError(t *testing.T) { + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(api.GetRestorePlanErrorResponse{ + Error: "wal_chain_broken", + Message: "WAL chain broken", + LastContiguousSegment: testWalSegment1, + }) + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "") + + err := restorer.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "WAL chain broken") + assert.Contains(t, err.Error(), testWalSegment1) +} + +func Test_DownloadWalSegment_WhenFirstAttemptFails_RetriesAndSucceeds(t *testing.T) { + tarFiles := map[string][]byte{"PG_VERSION": []byte("16")} + zstdTarData := createZstdTar(t, tarFiles) + walData := createZstdData(t, []byte("wal-segment-data")) + + var mu sync.Mutex + var walDownloadAttempts int + + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case testRestorePlanPath: + writeJSON(w, api.GetRestorePlanResponse{ + FullBackup: api.RestorePlanFullBackup{ + BackupID: testFullBackupID, + PgVersion: "16", + CreatedAt: time.Now().UTC(), + SizeBytes: 1024, + }, + WalSegments: []api.RestorePlanWalSegment{ + {BackupID: "wal-1", SegmentName: testWalSegment1, SizeBytes: 512}, + }, + TotalSizeBytes: 1536, + LatestAvailableSegment: testWalSegment1, + }) + + case testRestoreDownloadPath: + backupID := r.URL.Query().Get("backupId") + if backupID == testFullBackupID { + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(zstdTarData) + return + } + + mu.Lock() + walDownloadAttempts++ + attempt := walDownloadAttempts + mu.Unlock() + + if attempt == 1 { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":"storage unavailable"}`)) + return + } + + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(walData) + + default: + w.WriteHeader(http.StatusNotFound) + } + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "") + + origDelay := retryDelayOverride + testDelay := 10 * time.Millisecond + retryDelayOverride = &testDelay + defer func() { retryDelayOverride = origDelay }() + + err := restorer.Run(context.Background()) + require.NoError(t, err) + + mu.Lock() + attempts := walDownloadAttempts + mu.Unlock() + + assert.Equal(t, 2, attempts) + + walContent, err := os.ReadFile(filepath.Join(targetDir, walRestoreDir, testWalSegment1)) + require.NoError(t, err) + assert.Equal(t, "wal-segment-data", string(walContent)) +} + +func Test_DownloadWalSegment_WhenAllAttemptsFail_ReturnsErrorWithSegmentName(t *testing.T) { + tarFiles := map[string][]byte{"PG_VERSION": []byte("16")} + zstdTarData := createZstdTar(t, tarFiles) + + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case testRestorePlanPath: + writeJSON(w, api.GetRestorePlanResponse{ + FullBackup: api.RestorePlanFullBackup{ + BackupID: testFullBackupID, + PgVersion: "16", + CreatedAt: time.Now().UTC(), + SizeBytes: 1024, + }, + WalSegments: []api.RestorePlanWalSegment{ + {BackupID: "wal-1", SegmentName: testWalSegment1, SizeBytes: 512}, + }, + TotalSizeBytes: 1536, + LatestAvailableSegment: testWalSegment1, + }) + + case testRestoreDownloadPath: + backupID := r.URL.Query().Get("backupId") + if backupID == testFullBackupID { + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(zstdTarData) + return + } + + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":"storage unavailable"}`)) + + default: + w.WriteHeader(http.StatusNotFound) + } + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "") + + origDelay := retryDelayOverride + testDelay := 10 * time.Millisecond + retryDelayOverride = &testDelay + defer func() { retryDelayOverride = origDelay }() + + err := restorer.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), testWalSegment1) + assert.Contains(t, err.Error(), "3 attempts") +} + +func Test_RunRestore_WhenInvalidTargetTimeFormat_ReturnsError(t *testing.T) { + targetDir := createTestTargetDir(t) + restorer := newTestRestorer("http://localhost:0", targetDir, "", "not-a-valid-time") + + err := restorer.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid --target-time format") +} + +func Test_RunRestore_WhenBasebackupDownloadFails_ReturnsError(t *testing.T) { + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case testRestorePlanPath: + writeJSON(w, api.GetRestorePlanResponse{ + FullBackup: api.RestorePlanFullBackup{ + BackupID: testFullBackupID, + PgVersion: "16", + CreatedAt: time.Now().UTC(), + SizeBytes: 1024, + }, + WalSegments: []api.RestorePlanWalSegment{}, + TotalSizeBytes: 1024, + LatestAvailableSegment: "", + }) + + case testRestoreDownloadPath: + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":"storage error"}`)) + + default: + w.WriteHeader(http.StatusNotFound) + } + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "") + + err := restorer.Run(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "basebackup download failed") +} + +func Test_RunRestore_WhenNoWalSegmentsInPlan_BasebackupRestoredSuccessfully(t *testing.T) { + tarFiles := map[string][]byte{ + "PG_VERSION": []byte("16"), + "global/pg_control": []byte("control-data"), + } + zstdTarData := createZstdTar(t, tarFiles) + + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case testRestorePlanPath: + writeJSON(w, api.GetRestorePlanResponse{ + FullBackup: api.RestorePlanFullBackup{ + BackupID: testFullBackupID, + PgVersion: "16", + CreatedAt: time.Now().UTC(), + SizeBytes: 1024, + }, + WalSegments: []api.RestorePlanWalSegment{}, + TotalSizeBytes: 1024, + LatestAvailableSegment: "", + }) + + case testRestoreDownloadPath: + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(zstdTarData) + + default: + w.WriteHeader(http.StatusNotFound) + } + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "") + + err := restorer.Run(context.Background()) + require.NoError(t, err) + + pgVersionContent, err := os.ReadFile(filepath.Join(targetDir, "PG_VERSION")) + require.NoError(t, err) + assert.Equal(t, "16", string(pgVersionContent)) + + walRestoreDirInfo, err := os.Stat(filepath.Join(targetDir, walRestoreDir)) + require.NoError(t, err) + assert.True(t, walRestoreDirInfo.IsDir()) + + _, err = os.Stat(filepath.Join(targetDir, "recovery.signal")) + require.NoError(t, err) + + autoConfContent, err := os.ReadFile(filepath.Join(targetDir, "postgresql.auto.conf")) + require.NoError(t, err) + assert.Contains(t, string(autoConfContent), "restore_command") +} + +func Test_RunRestore_WhenMakingApiCalls_AuthTokenIncludedInRequests(t *testing.T) { + tarFiles := map[string][]byte{"PG_VERSION": []byte("16")} + zstdTarData := createZstdTar(t, tarFiles) + + var receivedAuthHeaders atomic.Int32 + var mu sync.Mutex + var authHeaderValues []string + + server := newTestServer(t, func(w http.ResponseWriter, r *http.Request) { + authHeader := r.Header.Get("Authorization") + if authHeader != "" { + receivedAuthHeaders.Add(1) + + mu.Lock() + authHeaderValues = append(authHeaderValues, authHeader) + mu.Unlock() + } + + switch r.URL.Path { + case testRestorePlanPath: + writeJSON(w, api.GetRestorePlanResponse{ + FullBackup: api.RestorePlanFullBackup{ + BackupID: testFullBackupID, + PgVersion: "16", + CreatedAt: time.Now().UTC(), + SizeBytes: 1024, + }, + WalSegments: []api.RestorePlanWalSegment{}, + TotalSizeBytes: 1024, + LatestAvailableSegment: "", + }) + + case testRestoreDownloadPath: + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(zstdTarData) + + default: + w.WriteHeader(http.StatusNotFound) + } + }) + + targetDir := createTestTargetDir(t) + restorer := newTestRestorer(server.URL, targetDir, "", "") + + err := restorer.Run(context.Background()) + require.NoError(t, err) + + assert.GreaterOrEqual(t, int(receivedAuthHeaders.Load()), 2) + + mu.Lock() + defer mu.Unlock() + + for _, headerValue := range authHeaderValues { + assert.Equal(t, "test-token", headerValue) + } +} + +func newTestServer(t *testing.T, handler http.HandlerFunc) *httptest.Server { + t.Helper() + + server := httptest.NewServer(handler) + t.Cleanup(server.Close) + + return server +} + +func createTestTargetDir(t *testing.T) string { + t.Helper() + + baseDir := filepath.Join(".", ".test-tmp") + if err := os.MkdirAll(baseDir, 0o755); err != nil { + t.Fatalf("failed to create base test dir: %v", err) + } + + dir, err := os.MkdirTemp(baseDir, t.Name()+"-*") + if err != nil { + t.Fatalf("failed to create test target dir: %v", err) + } + + t.Cleanup(func() { + _ = os.RemoveAll(dir) + }) + + return dir +} + +func createZstdTar(t *testing.T, files map[string][]byte) []byte { + t.Helper() + + var tarBuffer bytes.Buffer + tarWriter := tar.NewWriter(&tarBuffer) + + createdDirs := make(map[string]bool) + + for name, content := range files { + dir := filepath.Dir(name) + if dir != "." && !createdDirs[dir] { + parts := strings.Split(filepath.ToSlash(dir), "/") + for partIndex := range parts { + partialDir := strings.Join(parts[:partIndex+1], "/") + if !createdDirs[partialDir] { + err := tarWriter.WriteHeader(&tar.Header{ + Name: partialDir + "/", + Typeflag: tar.TypeDir, + Mode: 0o755, + }) + require.NoError(t, err) + + createdDirs[partialDir] = true + } + } + } + + err := tarWriter.WriteHeader(&tar.Header{ + Name: name, + Size: int64(len(content)), + Mode: 0o644, + Typeflag: tar.TypeReg, + }) + require.NoError(t, err) + + _, err = tarWriter.Write(content) + require.NoError(t, err) + } + + require.NoError(t, tarWriter.Close()) + + var zstdBuffer bytes.Buffer + + encoder, err := zstd.NewWriter(&zstdBuffer, + zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(5)), + zstd.WithEncoderCRC(true), + ) + require.NoError(t, err) + + _, err = encoder.Write(tarBuffer.Bytes()) + require.NoError(t, err) + require.NoError(t, encoder.Close()) + + return zstdBuffer.Bytes() +} + +func createZstdData(t *testing.T, data []byte) []byte { + t.Helper() + + var buffer bytes.Buffer + + encoder, err := zstd.NewWriter(&buffer, + zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(5)), + zstd.WithEncoderCRC(true), + ) + require.NoError(t, err) + + _, err = encoder.Write(data) + require.NoError(t, err) + require.NoError(t, encoder.Close()) + + return buffer.Bytes() +} + +func newTestRestorer(serverURL, targetPgDataDir, backupID, targetTime string) *Restorer { + apiClient := api.NewClient(serverURL, "test-token", logger.GetLogger()) + + return NewRestorer(apiClient, logger.GetLogger(), targetPgDataDir, backupID, targetTime) +} + +func writeJSON(w http.ResponseWriter, value any) { + w.Header().Set("Content-Type", "application/json") + + if err := json.NewEncoder(w).Encode(value); err != nil { + w.WriteHeader(http.StatusInternalServerError) + } +} diff --git a/backend/internal/features/backups/backups/backuping/scheduler.go b/backend/internal/features/backups/backups/backuping/scheduler.go index 4e032c9..3ae639d 100644 --- a/backend/internal/features/backups/backups/backuping/scheduler.go +++ b/backend/internal/features/backups/backups/backuping/scheduler.go @@ -342,6 +342,10 @@ func (s *BackupsScheduler) runPendingBackups() error { continue } + if database.IsAgentManagedBackup() { + continue + } + s.StartBackup(database, remainedBackupTryCount == 1) continue } diff --git a/backend/internal/features/backups/backups/backuping/scheduler_test.go b/backend/internal/features/backups/backups/backuping/scheduler_test.go index 75bc16d..a7440ab 100644 --- a/backend/internal/features/backups/backups/backuping/scheduler_test.go +++ b/backend/internal/features/backups/backups/backuping/scheduler_test.go @@ -1,6 +1,7 @@ package backuping import ( + "context" "testing" "time" @@ -20,6 +21,128 @@ import ( "databasus-backend/internal/util/period" ) +func Test_RunPendingBackups_ByDatabaseType_OnlySchedulesNonAgentManagedBackups(t *testing.T) { + type testCase struct { + name string + createDatabase func(workspaceID uuid.UUID, storage *storages.Storage, notifier *notifiers.Notifier) *databases.Database + isBackupExpected bool + needsBackuperNode bool + } + + testCases := []testCase{ + { + name: "PostgreSQL PG_DUMP - backup runs", + createDatabase: func(workspaceID uuid.UUID, storage *storages.Storage, notifier *notifiers.Notifier) *databases.Database { + return databases.CreateTestDatabase(workspaceID, storage, notifier) + }, + isBackupExpected: true, + needsBackuperNode: true, + }, + { + name: "PostgreSQL WAL_V1 - backup skipped (agent-managed)", + createDatabase: func(workspaceID uuid.UUID, _ *storages.Storage, notifier *notifiers.Notifier) *databases.Database { + return databases.CreateTestPostgresWalDatabase(workspaceID, notifier) + }, + isBackupExpected: false, + needsBackuperNode: false, + }, + { + name: "MariaDB - backup runs", + createDatabase: func(workspaceID uuid.UUID, _ *storages.Storage, notifier *notifiers.Notifier) *databases.Database { + return databases.CreateTestMariadbDatabase(workspaceID, notifier) + }, + isBackupExpected: true, + needsBackuperNode: true, + }, + { + name: "MongoDB - backup runs", + createDatabase: func(workspaceID uuid.UUID, _ *storages.Storage, notifier *notifiers.Notifier) *databases.Database { + return databases.CreateTestMongodbDatabase(workspaceID, notifier) + }, + isBackupExpected: true, + needsBackuperNode: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cache_utils.ClearAllCache() + + var backuperNode *BackuperNode + var cancel context.CancelFunc + + if tc.needsBackuperNode { + backuperNode = CreateTestBackuperNode() + cancel = StartBackuperNodeForTest(t, backuperNode) + defer StopBackuperNodeForTest(t, cancel, backuperNode) + } + + user := users_testing.CreateTestUser(users_enums.UserRoleAdmin) + router := CreateTestRouter() + workspace := workspaces_testing.CreateTestWorkspace("Test Workspace", user, router) + storage := storages.CreateTestStorage(workspace.ID) + notifier := notifiers.CreateTestNotifier(workspace.ID) + database := tc.createDatabase(workspace.ID, storage, notifier) + + defer func() { + backups, _ := backupRepository.FindByDatabaseID(database.ID) + for _, backup := range backups { + backupRepository.DeleteByID(backup.ID) + } + + databases.RemoveTestDatabase(database) + time.Sleep(50 * time.Millisecond) + storages.RemoveTestStorage(storage.ID) + notifiers.RemoveTestNotifier(notifier) + workspaces_testing.RemoveTestWorkspace(workspace, router) + }() + + backupConfig, err := backups_config.GetBackupConfigService().GetBackupConfigByDbId(database.ID) + assert.NoError(t, err) + + timeOfDay := "04:00" + backupConfig.BackupInterval = &intervals.Interval{ + Interval: intervals.IntervalDaily, + TimeOfDay: &timeOfDay, + } + backupConfig.IsBackupsEnabled = true + backupConfig.RetentionPolicyType = backups_config.RetentionPolicyTypeTimePeriod + backupConfig.RetentionTimePeriod = period.PeriodWeek + backupConfig.Storage = storage + backupConfig.StorageID = &storage.ID + + _, err = backups_config.GetBackupConfigService().SaveBackupConfig(backupConfig) + assert.NoError(t, err) + + // add old backup (24h ago) + backupRepository.Save(&backups_core.Backup{ + DatabaseID: database.ID, + StorageID: storage.ID, + Status: backups_core.BackupStatusCompleted, + CreatedAt: time.Now().UTC().Add(-24 * time.Hour), + }) + + GetBackupsScheduler().runPendingBackups() + + if tc.isBackupExpected { + WaitForBackupCompletion(t, database.ID, 1, 10*time.Second) + + backups, err := backupRepository.FindByDatabaseID(database.ID) + assert.NoError(t, err) + assert.Len(t, backups, 2) + } else { + time.Sleep(100 * time.Millisecond) + + backups, err := backupRepository.FindByDatabaseID(database.ID) + assert.NoError(t, err) + assert.Len(t, backups, 1) + } + + time.Sleep(200 * time.Millisecond) + }) + } +} + func Test_RunPendingBackups_WhenLastBackupWasYesterday_CreatesNewBackup(t *testing.T) { cache_utils.ClearAllCache() backuperNode := CreateTestBackuperNode() diff --git a/backend/internal/features/backups/backups/controllers/postgres_wal_controller_test.go b/backend/internal/features/backups/backups/controllers/postgres_wal_controller_test.go index 2bafb90..5255a90 100644 --- a/backend/internal/features/backups/backups/controllers/postgres_wal_controller_test.go +++ b/backend/internal/features/backups/backups/controllers/postgres_wal_controller_test.go @@ -938,6 +938,42 @@ func Test_GetRestorePlan_WithInvalidBackupId_Returns400(t *testing.T) { assert.Equal(t, "no_backups", errResp.Error) } +func Test_GetRestorePlan_WithWalSegmentId_ResolvesFullBackupAndReturnsWals(t *testing.T) { + router, db, storage, agentToken, _ := createWalTestSetup(t) + defer removeWalTestSetup(db, storage) + + uploadBasebackup(t, router, agentToken, "000000010000000100000001", "000000010000000100000010") + uploadWalSegment(t, router, agentToken, "000000010000000100000011") + uploadWalSegment(t, router, agentToken, "000000010000000100000012") + uploadWalSegment(t, router, agentToken, "000000010000000100000013") + + WaitForBackupCompletion(t, db.ID, 3, 5*time.Second) + + walSegment, err := backups_core.GetBackupRepository().FindWalSegmentByName( + db.ID, "000000010000000100000012", + ) + require.NoError(t, err) + require.NotNil(t, walSegment) + + var response backups_dto.GetRestorePlanResponse + test_utils.MakeGetRequestAndUnmarshal( + t, router, + "/api/v1/backups/postgres/wal/restore/plan?backupId="+walSegment.ID.String(), + agentToken, + http.StatusOK, + &response, + ) + + assert.NotEqual(t, uuid.Nil, response.FullBackup.BackupID) + assert.Equal(t, "000000010000000100000001", response.FullBackup.FullBackupWalStartSegment) + assert.Equal(t, "000000010000000100000010", response.FullBackup.FullBackupWalStopSegment) + require.Len(t, response.WalSegments, 3) + assert.Equal(t, "000000010000000100000011", response.WalSegments[0].SegmentName) + assert.Equal(t, "000000010000000100000012", response.WalSegments[1].SegmentName) + assert.Equal(t, "000000010000000100000013", response.WalSegments[2].SegmentName) + assert.Greater(t, response.TotalSizeBytes, int64(0)) +} + func Test_GetRestorePlan_WithInvalidToken_Returns401(t *testing.T) { router, db, storage, _, _ := createWalTestSetup(t) defer removeWalTestSetup(db, storage) @@ -995,6 +1031,140 @@ func Test_GetRestorePlan_WithInvalidBackupIdFormat_Returns400(t *testing.T) { assert.Contains(t, string(resp.Body), "invalid backupId format") } +func Test_WalUpload_WalSegment_CompletedBackup_HasNonZeroDuration(t *testing.T) { + router, db, storage, agentToken, _ := createWalTestSetup(t) + defer removeWalTestSetup(db, storage) + + uploadBasebackup(t, router, agentToken, "000000010000000100000001", "000000010000000100000010") + uploadWalSegment(t, router, agentToken, "000000010000000100000011") + + WaitForBackupCompletion(t, db.ID, 1, 5*time.Second) + + backups, err := backups_core.GetBackupRepository().FindByDatabaseID(db.ID) + require.NoError(t, err) + + var walBackup *backups_core.Backup + for _, b := range backups { + if b.PgWalBackupType != nil && + *b.PgWalBackupType == backups_core.PgWalBackupTypeWalSegment { + walBackup = b + break + } + } + + require.NotNil(t, walBackup) + assert.Equal(t, backups_core.BackupStatusCompleted, walBackup.Status) + assert.Greater(t, walBackup.BackupDurationMs, int64(0), + "WAL segment backup should have non-zero duration") +} + +func Test_WalUpload_Basebackup_CompletedBackup_HasNonZeroDuration(t *testing.T) { + router, db, storage, agentToken, _ := createWalTestSetup(t) + defer removeWalTestSetup(db, storage) + + backupID := uploadBasebackupPhase1(t, router, agentToken) + completeFullBackupUpload(t, router, agentToken, backupID, + "000000010000000100000001", "000000010000000100000010", nil) + + backup, err := backups_core.GetBackupRepository().FindByID(backupID) + require.NoError(t, err) + assert.Equal(t, backups_core.BackupStatusCompleted, backup.Status) + assert.Greater(t, backup.BackupDurationMs, int64(0), + "base backup should have non-zero duration") +} + +func Test_WalUpload_WalSegment_ProgressUpdatedDuringStream(t *testing.T) { + router, db, storage, agentToken, _ := createWalTestSetup(t) + defer removeWalTestSetup(db, storage) + + uploadBasebackup(t, router, agentToken, "000000010000000100000001", "000000010000000100000010") + + pipeReader, pipeWriter := io.Pipe() + req := newWalSegmentUploadRequest(pipeReader, agentToken, "000000010000000100000011") + + recorder := httptest.NewRecorder() + done := make(chan struct{}) + go func() { + router.ServeHTTP(recorder, req) + close(done) + }() + + // Write some data so the countingReader registers bytes. + _, err := pipeWriter.Write([]byte("wal-segment-progress-data")) + require.NoError(t, err) + + // Wait for the progress tracker to tick (1s interval + margin). + time.Sleep(1500 * time.Millisecond) + + backups, err := backups_core.GetBackupRepository().FindByDatabaseID(db.ID) + require.NoError(t, err) + + var walBackup *backups_core.Backup + for _, b := range backups { + if b.PgWalBackupType != nil && + *b.PgWalBackupType == backups_core.PgWalBackupTypeWalSegment { + walBackup = b + break + } + } + + require.NotNil(t, walBackup) + assert.Equal(t, backups_core.BackupStatusInProgress, walBackup.Status) + assert.Greater(t, walBackup.BackupDurationMs, int64(0), + "duration should be tracked in real-time during upload") + assert.Greater(t, walBackup.BackupSizeMb, float64(0), + "size should be tracked in real-time during upload") + + _ = pipeWriter.Close() + <-done +} + +func Test_WalUpload_Basebackup_ProgressUpdatedDuringStream(t *testing.T) { + router, db, storage, agentToken, _ := createWalTestSetup(t) + defer removeWalTestSetup(db, storage) + + pipeReader, pipeWriter := io.Pipe() + req, _ := http.NewRequest(http.MethodPost, "/api/v1/backups/postgres/wal/upload/full-start", pipeReader) + req.Header.Set("Authorization", agentToken) + req.Header.Set("Content-Type", "application/octet-stream") + + recorder := httptest.NewRecorder() + done := make(chan struct{}) + go func() { + router.ServeHTTP(recorder, req) + close(done) + }() + + // Write some data so the countingReader registers bytes. + _, err := pipeWriter.Write([]byte("basebackup-progress-data")) + require.NoError(t, err) + + // Wait for the progress tracker to tick (1s interval + margin). + time.Sleep(1500 * time.Millisecond) + + backups, err := backups_core.GetBackupRepository().FindByDatabaseID(db.ID) + require.NoError(t, err) + + var fullBackup *backups_core.Backup + for _, b := range backups { + if b.PgWalBackupType != nil && + *b.PgWalBackupType == backups_core.PgWalBackupTypeFullBackup { + fullBackup = b + break + } + } + + require.NotNil(t, fullBackup) + assert.Equal(t, backups_core.BackupStatusInProgress, fullBackup.Status) + assert.Greater(t, fullBackup.BackupDurationMs, int64(0), + "duration should be tracked in real-time during upload") + assert.Greater(t, fullBackup.BackupSizeMb, float64(0), + "size should be tracked in real-time during upload") + + _ = pipeWriter.Close() + <-done +} + func Test_DownloadRestoreFile_UploadThenDownload_ContentMatches(t *testing.T) { tests := []struct { name string diff --git a/backend/internal/features/backups/backups/core/repository.go b/backend/internal/features/backups/backups/core/repository.go index 3225601..427e1c1 100644 --- a/backend/internal/features/backups/backups/core/repository.go +++ b/backend/internal/features/backups/backups/core/repository.go @@ -349,6 +349,34 @@ func (r *BackupRepository) FindWalSegmentByName( return &backup, nil } +func (r *BackupRepository) FindLatestCompletedFullWalBackupBefore( + databaseID uuid.UUID, + before time.Time, +) (*Backup, error) { + var backup Backup + + err := storage. + GetDb(). + Where( + "database_id = ? AND pg_wal_backup_type = ? AND status = ? AND created_at <= ?", + databaseID, + PgWalBackupTypeFullBackup, + BackupStatusCompleted, + before, + ). + Order("created_at DESC"). + First(&backup).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, nil + } + + return nil, err + } + + return &backup, nil +} + func (r *BackupRepository) FindStaleUploadedBasebackups(olderThan time.Time) ([]*Backup, error) { var backups []*Backup diff --git a/backend/internal/features/backups/backups/services/postgres_wal_service.go b/backend/internal/features/backups/backups/services/postgres_wal_service.go index c33b04d..c8730c2 100644 --- a/backend/internal/features/backups/backups/services/postgres_wal_service.go +++ b/backend/internal/features/backups/backups/services/postgres_wal_service.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "log/slog" + "sync/atomic" "time" "github.com/google/uuid" @@ -38,6 +39,8 @@ func (s *PostgreWalBackupService) UploadWalSegment( walSegmentName string, body io.Reader, ) error { + uploadStart := time.Now().UTC() + if err := s.validateWalBackupType(database); err != nil { return err } @@ -72,14 +75,22 @@ func (s *PostgreWalBackupService) UploadWalSegment( return fmt.Errorf("failed to create backup record: %w", err) } - sizeBytes, streamErr := s.streamToStorage(ctx, backup, backupConfig, body) + inputCounter := &countingReader{r: body} + progressDone := make(chan struct{}) + go s.startProgressTracker(backup, inputCounter, uploadStart, progressDone) + + sizeBytes, streamErr := s.streamToStorage(ctx, backup, backupConfig, inputCounter) + close(progressDone) + if streamErr != nil { errMsg := streamErr.Error() + backup.BackupDurationMs = time.Since(uploadStart).Milliseconds() s.markFailed(backup, errMsg) return fmt.Errorf("upload failed: %w", streamErr) } + backup.BackupDurationMs = time.Since(uploadStart).Milliseconds() s.markCompleted(backup, sizeBytes) return nil @@ -93,6 +104,8 @@ func (s *PostgreWalBackupService) UploadBasebackup( database *databases.Database, body io.Reader, ) (uuid.UUID, error) { + uploadStart := time.Now().UTC() + if err := s.validateWalBackupType(database); err != nil { return uuid.Nil, err } @@ -117,9 +130,16 @@ func (s *PostgreWalBackupService) UploadBasebackup( return uuid.Nil, fmt.Errorf("failed to create backup record: %w", err) } - sizeBytes, streamErr := s.streamToStorage(ctx, backup, backupConfig, body) + inputCounter := &countingReader{r: body} + progressDone := make(chan struct{}) + go s.startProgressTracker(backup, inputCounter, uploadStart, progressDone) + + sizeBytes, streamErr := s.streamToStorage(ctx, backup, backupConfig, inputCounter) + close(progressDone) + if streamErr != nil { errMsg := streamErr.Error() + backup.BackupDurationMs = time.Since(uploadStart).Milliseconds() s.markFailed(backup, errMsg) return uuid.Nil, fmt.Errorf("upload failed: %w", streamErr) @@ -128,6 +148,7 @@ func (s *PostgreWalBackupService) UploadBasebackup( now := time.Now().UTC() backup.UploadCompletedAt = &now backup.BackupSizeMb = float64(sizeBytes) / (1024 * 1024) + backup.BackupDurationMs = time.Since(uploadStart).Milliseconds() if err := s.backupRepository.Save(backup); err != nil { return uuid.Nil, fmt.Errorf("failed to update backup after upload: %w", err) @@ -483,7 +504,7 @@ func (s *PostgreWalBackupService) streamDirect( return 0, err } - return cr.n, nil + return cr.n.Load(), nil } func (s *PostgreWalBackupService) streamEncrypted( @@ -544,7 +565,7 @@ func (s *PostgreWalBackupService) streamEncrypted( backup.EncryptionSalt = &encryptionSetup.SaltBase64 backup.EncryptionIV = &encryptionSetup.NonceBase64 - return cr.n, nil + return cr.n.Load(), nil } func (s *PostgreWalBackupService) markCompleted(backup *backups_core.Backup, sizeBytes int64) { @@ -562,6 +583,31 @@ func (s *PostgreWalBackupService) markCompleted(backup *backups_core.Backup, siz } } +func (s *PostgreWalBackupService) startProgressTracker( + backup *backups_core.Backup, + inputCounter *countingReader, + uploadStart time.Time, + done <-chan struct{}, +) { + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + + for { + select { + case <-done: + return + case <-ticker.C: + backup.BackupDurationMs = time.Since(uploadStart).Milliseconds() + backup.BackupSizeMb = float64(inputCounter.n.Load()) / (1024 * 1024) + + if err := s.backupRepository.Save(backup); err != nil { + s.logger.Error("failed to update backup progress", + "backupId", backup.ID, "error", err) + } + } + } +} + func (s *PostgreWalBackupService) markFailed(backup *backups_core.Backup, errMsg string) { backup.Status = backups_core.BackupStatusFailed backup.FailMessage = &errMsg @@ -575,11 +621,32 @@ func (s *PostgreWalBackupService) resolveFullBackup( databaseID uuid.UUID, backupID *uuid.UUID, ) (*backups_core.Backup, error) { - if backupID != nil { - return s.backupRepository.FindCompletedFullWalBackupByID(databaseID, *backupID) + if backupID == nil { + return s.backupRepository.FindLastCompletedFullWalBackupByDatabaseID(databaseID) } - return s.backupRepository.FindLastCompletedFullWalBackupByDatabaseID(databaseID) + fullBackup, err := s.backupRepository.FindCompletedFullWalBackupByID(databaseID, *backupID) + if err != nil { + return nil, err + } + + if fullBackup != nil { + return fullBackup, nil + } + + backup, err := s.backupRepository.FindByID(*backupID) + if err != nil { + return nil, nil + } + + if backup.DatabaseID != databaseID || + backup.Status != backups_core.BackupStatusCompleted || + backup.PgWalBackupType == nil || + *backup.PgWalBackupType != backups_core.PgWalBackupTypeWalSegment { + return nil, nil + } + + return s.backupRepository.FindLatestCompletedFullWalBackupBefore(databaseID, backup.CreatedAt) } func (s *PostgreWalBackupService) validateRestoreWalChain( @@ -667,12 +734,12 @@ func (s *PostgreWalBackupService) validateWalBackupType(database *databases.Data type countingReader struct { r io.Reader - n int64 + n atomic.Int64 } -func (cr *countingReader) Read(p []byte) (n int, err error) { - n, err = cr.r.Read(p) - cr.n += int64(n) +func (cr *countingReader) Read(p []byte) (int, error) { + n, err := cr.r.Read(p) + cr.n.Add(int64(n)) return n, err } diff --git a/backend/internal/features/databases/model.go b/backend/internal/features/databases/model.go index 0107dd7..0e8f012 100644 --- a/backend/internal/features/databases/model.go +++ b/backend/internal/features/databases/model.go @@ -183,6 +183,12 @@ func (d *Database) Update(incoming *Database) { } } +func (d *Database) IsAgentManagedBackup() bool { + return d.Type == DatabaseTypePostgres && + d.Postgresql != nil && + d.Postgresql.BackupType == postgresql.PostgresBackupTypeWalV1 +} + func (d *Database) getSpecificDatabase() DatabaseConnector { switch d.Type { case DatabaseTypePostgres: diff --git a/backend/internal/features/databases/testing.go b/backend/internal/features/databases/testing.go index 0b6c305..3e2084f 100644 --- a/backend/internal/features/databases/testing.go +++ b/backend/internal/features/databases/testing.go @@ -105,6 +105,76 @@ func CreateTestDatabase( return database } +func CreateTestPostgresWalDatabase( + workspaceID uuid.UUID, + notifier *notifiers.Notifier, +) *Database { + database := &Database{ + WorkspaceID: &workspaceID, + Name: "test-wal " + uuid.New().String(), + Type: DatabaseTypePostgres, + Postgresql: &postgresql.PostgresqlDatabase{ + BackupType: postgresql.PostgresBackupTypeWalV1, + Version: tools.PostgresqlVersion16, + CpuCount: 1, + }, + Notifiers: []notifiers.Notifier{ + *notifier, + }, + } + + database, err := databaseRepository.Save(database) + if err != nil { + panic(err) + } + + return database +} + +func CreateTestMariadbDatabase( + workspaceID uuid.UUID, + notifier *notifiers.Notifier, +) *Database { + database := &Database{ + WorkspaceID: &workspaceID, + Name: "test-mariadb " + uuid.New().String(), + Type: DatabaseTypeMariadb, + Mariadb: GetTestMariadbConfig(), + Notifiers: []notifiers.Notifier{ + *notifier, + }, + } + + database, err := databaseRepository.Save(database) + if err != nil { + panic(err) + } + + return database +} + +func CreateTestMongodbDatabase( + workspaceID uuid.UUID, + notifier *notifiers.Notifier, +) *Database { + database := &Database{ + WorkspaceID: &workspaceID, + Name: "test-mongodb " + uuid.New().String(), + Type: DatabaseTypeMongodb, + Mongodb: GetTestMongodbConfig(), + Notifiers: []notifiers.Notifier{ + *notifier, + }, + } + + database, err := databaseRepository.Save(database) + if err != nil { + panic(err) + } + + return database +} + func RemoveTestDatabase(database *Database) { // Delete backups and backup configs associated with this database // We hardcode SQL here because we cannot call backups feature due to DI inversion diff --git a/backend/internal/features/healthcheck/config/service.go b/backend/internal/features/healthcheck/config/service.go index 4b57ab3..d6b3194 100644 --- a/backend/internal/features/healthcheck/config/service.go +++ b/backend/internal/features/healthcheck/config/service.go @@ -133,9 +133,20 @@ func (s *HealthcheckConfigService) GetDatabasesWithEnabledHealthcheck() ( func (s *HealthcheckConfigService) initializeDefaultConfig( databaseID uuid.UUID, ) error { + isHealthcheckEnabled := true + + database, err := s.databaseService.GetDatabaseByID(databaseID) + if err != nil { + return err + } + + if database.IsAgentManagedBackup() { + isHealthcheckEnabled = false + } + return s.healthcheckConfigRepository.Save(&HealthcheckConfig{ DatabaseID: databaseID, - IsHealthcheckEnabled: true, + IsHealthcheckEnabled: isHealthcheckEnabled, IsSentNotificationWhenUnavailable: true, IntervalMinutes: 1, AttemptsBeforeConcideredAsDown: 3, diff --git a/frontend/src/entity/backups/index.ts b/frontend/src/entity/backups/index.ts index 96a1e28..f0c17ca 100644 --- a/frontend/src/entity/backups/index.ts +++ b/frontend/src/entity/backups/index.ts @@ -5,6 +5,7 @@ export type { Backup } from './model/Backup'; export type { BackupConfig } from './model/BackupConfig'; export { BackupNotificationType } from './model/BackupNotificationType'; export { BackupEncryption } from './model/BackupEncryption'; +export { PgWalBackupType } from './model/PgWalBackupType'; export { RetentionPolicyType } from './model/RetentionPolicyType'; export type { TransferDatabaseRequest } from './model/TransferDatabaseRequest'; export type { DatabasePlan } from '../plan'; diff --git a/frontend/src/entity/backups/model/Backup.ts b/frontend/src/entity/backups/model/Backup.ts index 7cc98be..616a239 100644 --- a/frontend/src/entity/backups/model/Backup.ts +++ b/frontend/src/entity/backups/model/Backup.ts @@ -2,21 +2,17 @@ import type { Database } from '../../databases/model/Database'; import type { Storage } from '../../storages'; import { BackupEncryption } from './BackupEncryption'; import { BackupStatus } from './BackupStatus'; +import type { PgWalBackupType } from './PgWalBackupType'; export interface Backup { id: string; - database: Database; storage: Storage; - status: BackupStatus; failMessage?: string; - backupSizeMb: number; - backupDurationMs: number; - encryption: BackupEncryption; - + pgWalBackupType?: PgWalBackupType; createdAt: Date; } diff --git a/frontend/src/entity/backups/model/PgWalBackupType.ts b/frontend/src/entity/backups/model/PgWalBackupType.ts new file mode 100644 index 0000000..6cfc4a6 --- /dev/null +++ b/frontend/src/entity/backups/model/PgWalBackupType.ts @@ -0,0 +1,4 @@ +export enum PgWalBackupType { + PG_FULL_BACKUP = 'PG_FULL_BACKUP', + PG_WAL_SEGMENT = 'PG_WAL_SEGMENT', +} diff --git a/frontend/src/entity/databases/api/databaseApi.ts b/frontend/src/entity/databases/api/databaseApi.ts index f574208..4f083f8 100644 --- a/frontend/src/entity/databases/api/databaseApi.ts +++ b/frontend/src/entity/databases/api/databaseApi.ts @@ -118,4 +118,12 @@ export const databaseApi = { requestOptions, ); }, + + async regenerateAgentToken(id: string): Promise<{ token: string }> { + const requestOptions: RequestOptions = new RequestOptions(); + return apiHelper.fetchPostJson<{ token: string }>( + `${getApplicationServer()}/api/v1/databases/${id}/regenerate-token`, + requestOptions, + ); + }, }; diff --git a/frontend/src/entity/databases/index.ts b/frontend/src/entity/databases/index.ts index a090cf2..5ce49ae 100644 --- a/frontend/src/entity/databases/index.ts +++ b/frontend/src/entity/databases/index.ts @@ -3,6 +3,7 @@ export { type Database } from './model/Database'; export { DatabaseType } from './model/DatabaseType'; export { getDatabaseLogoFromType } from './model/getDatabaseLogoFromType'; export { Period } from './model/Period'; +export { PostgresBackupType } from './model/postgresql/PostgresBackupType'; export { type PostgresqlDatabase } from './model/postgresql/PostgresqlDatabase'; export { PostgresqlVersion } from './model/postgresql/PostgresqlVersion'; export { type MysqlDatabase } from './model/mysql/MysqlDatabase'; diff --git a/frontend/src/entity/databases/model/Database.ts b/frontend/src/entity/databases/model/Database.ts index b64dfdb..2c63d24 100644 --- a/frontend/src/entity/databases/model/Database.ts +++ b/frontend/src/entity/databases/model/Database.ts @@ -23,4 +23,6 @@ export interface Database { lastBackupErrorMessage?: string; healthStatus?: HealthStatus; + + isAgentTokenGenerated: boolean; } diff --git a/frontend/src/entity/databases/model/postgresql/PostgresBackupType.ts b/frontend/src/entity/databases/model/postgresql/PostgresBackupType.ts new file mode 100644 index 0000000..c5515c1 --- /dev/null +++ b/frontend/src/entity/databases/model/postgresql/PostgresBackupType.ts @@ -0,0 +1,4 @@ +export enum PostgresBackupType { + PG_DUMP = 'PG_DUMP', + WAL_V1 = 'WAL_V1', +} diff --git a/frontend/src/entity/databases/model/postgresql/PostgresqlDatabase.ts b/frontend/src/entity/databases/model/postgresql/PostgresqlDatabase.ts index 990fe5b..d8125b7 100644 --- a/frontend/src/entity/databases/model/postgresql/PostgresqlDatabase.ts +++ b/frontend/src/entity/databases/model/postgresql/PostgresqlDatabase.ts @@ -1,8 +1,10 @@ +import type { PostgresBackupType } from './PostgresBackupType'; import type { PostgresqlVersion } from './PostgresqlVersion'; export interface PostgresqlDatabase { id: string; version: PostgresqlVersion; + backupType?: PostgresBackupType; // connection data host: string; diff --git a/frontend/src/features/backups/ui/AgentRestoreComponent.tsx b/frontend/src/features/backups/ui/AgentRestoreComponent.tsx new file mode 100644 index 0000000..d72557d --- /dev/null +++ b/frontend/src/features/backups/ui/AgentRestoreComponent.tsx @@ -0,0 +1,227 @@ +import { CopyOutlined } from '@ant-design/icons'; +import { App, Tooltip } from 'antd'; +import dayjs from 'dayjs'; +import { useState } from 'react'; + +import { getApplicationServer } from '../../../constants'; +import { type Backup, PgWalBackupType } from '../../../entity/backups'; +import { type Database } from '../../../entity/databases'; +import { getUserTimeFormat } from '../../../shared/time'; + +interface Props { + database: Database; + backup: Backup; +} + +type Architecture = 'amd64' | 'arm64'; +type DeploymentType = 'host' | 'docker'; + +export const AgentRestoreComponent = ({ database, backup }: Props) => { + const { message } = App.useApp(); + const [selectedArch, setSelectedArch] = useState('amd64'); + const [deploymentType, setDeploymentType] = useState('host'); + + const databasusHost = getApplicationServer(); + const isDocker = deploymentType === 'docker'; + + const copyToClipboard = async (text: string) => { + try { + await navigator.clipboard.writeText(text); + message.success('Copied to clipboard'); + } catch { + message.error('Failed to copy'); + } + }; + + const renderCodeBlock = (code: string) => ( +
+
+        {code}
+      
+ + + +
+ ); + + const renderTabButton = (label: string, isActive: boolean, onClick: () => void) => ( + + ); + + const isWalSegment = backup.pgWalBackupType === PgWalBackupType.PG_WAL_SEGMENT; + const isFullBackup = backup.pgWalBackupType === PgWalBackupType.PG_FULL_BACKUP; + + const downloadCommand = `curl -L -o databasus-agent "${databasusHost}/api/v1/system/agent?arch=${selectedArch}" && chmod +x databasus-agent`; + + const targetDirPlaceholder = isDocker ? '' : ''; + + const restoreCommand = [ + './databasus-agent restore \\', + ` --databasus-host=${databasusHost} \\`, + ` --db-id=${database.id} \\`, + ` --token= \\`, + ` --backup-id=${backup.id} \\`, + ` --target-dir=${targetDirPlaceholder}`, + ].join('\n'); + + const restoreCommandWithPitr = [ + './databasus-agent restore \\', + ` --databasus-host=${databasusHost} \\`, + ` --db-id=${database.id} \\`, + ` --token= \\`, + ` --backup-id=${backup.id} \\`, + ` --target-dir=${targetDirPlaceholder} \\`, + ` --target-time=`, + ].join('\n'); + + const dockerVolumeExample = `# In your docker run command: +docker run ... -v :/var/lib/postgresql/data ... + +# Or in docker-compose.yml: +volumes: + - :/var/lib/postgresql/data`; + + const formatSize = (sizeMb: number) => { + if (sizeMb >= 1024) { + return `${Number((sizeMb / 1024).toFixed(2)).toLocaleString()} GB`; + } + return `${Number(sizeMb?.toFixed(2)).toLocaleString()} MB`; + }; + + return ( +
+
+
+ Backup: + {isFullBackup && ( + + FULL + + )} + {isWalSegment && ( + + WAL + + )} + + {dayjs.utc(backup.createdAt).local().format(getUserTimeFormat().format)} + + + ({formatSize(backup.backupSizeMb)}) + +
+
+ +
+
+ Architecture +
+
+ {renderTabButton('amd64', selectedArch === 'amd64', () => setSelectedArch('amd64'))} + {renderTabButton('arm64', selectedArch === 'arm64', () => setSelectedArch('arm64'))} +
+
+ +
+
+ PostgreSQL deployment +
+
+ {renderTabButton('Host', deploymentType === 'host', () => setDeploymentType('host'))} + {renderTabButton('Docker', deploymentType === 'docker', () => + setDeploymentType('docker'), + )} +
+
+ +
+
Step 1 — Download the agent
+

+ Download the agent binary on the server where you want to restore. +

+ {renderCodeBlock(downloadCommand)} +
+ +
+
Step 2 — Stop PostgreSQL
+

+ PostgreSQL must be stopped before restoring. The target directory must be empty. +

+ {isDocker + ? renderCodeBlock('docker stop ') + : renderCodeBlock('pg_ctl -D stop')} +
+ + {isDocker && ( +
+
Step 3 — Prepare volume mount
+

+ The agent runs on the host and writes directly to the filesystem.{' '} + {''} must be an empty directory on the host that will be + mounted as the container's pgdata volume. +

+ {renderCodeBlock('mkdir -p ')} +

+ Mount this directory as the PostgreSQL data volume when starting the container: +

+ {renderCodeBlock(dockerVolumeExample)} +
+ )} + +
+
+ Step {isDocker ? '4' : '3'} — Run restore +
+

+ Replace {''} with your agent token and{' '} + {targetDirPlaceholder} with the path to an empty PostgreSQL data directory + {isDocker && ' on the host'}. +

+ {renderCodeBlock(restoreCommand)} + +
+

+ For Point-in-Time Recovery (PITR), add --target-time with + an RFC 3339 timestamp (e.g. {dayjs.utc().format('YYYY-MM-DDTHH:mm:ss[Z]')} + ): +

+ {renderCodeBlock(restoreCommandWithPitr)} +
+
+ +
+
+ Step {isDocker ? '5' : '4'} — Start PostgreSQL +
+

+ Start PostgreSQL to begin WAL recovery. It will automatically replay WAL segments. +

+ {isDocker + ? renderCodeBlock('docker start ') + : renderCodeBlock('pg_ctl -D start')} +
+ +
+
Step {isDocker ? '6' : '5'} — Clean up
+

+ After recovery completes, remove the WAL restore directory: +

+ {renderCodeBlock(`rm -rf ${targetDirPlaceholder}/databasus-wal-restore/`)} +
+
+ ); +}; diff --git a/frontend/src/features/backups/ui/BackupsComponent.tsx b/frontend/src/features/backups/ui/BackupsComponent.tsx index d8701ca..d9acb08 100644 --- a/frontend/src/features/backups/ui/BackupsComponent.tsx +++ b/frontend/src/features/backups/ui/BackupsComponent.tsx @@ -19,23 +19,31 @@ import { type BackupConfig, BackupEncryption, BackupStatus, + PgWalBackupType, backupConfigApi, backupsApi, } from '../../../entity/backups'; -import { type Database, DatabaseType } from '../../../entity/databases'; +import { type Database, DatabaseType, PostgresBackupType } from '../../../entity/databases'; import { getUserTimeFormat } from '../../../shared/time'; import { ConfirmationComponent } from '../../../shared/ui'; import { RestoresComponent } from '../../restores'; +import { AgentRestoreComponent } from './AgentRestoreComponent'; const BACKUPS_PAGE_SIZE = 50; interface Props { database: Database; isCanManageDBs: boolean; + isDirectlyUnderTab?: boolean; scrollContainerRef?: React.RefObject; } -export const BackupsComponent = ({ database, isCanManageDBs, scrollContainerRef }: Props) => { +export const BackupsComponent = ({ + database, + isCanManageDBs, + isDirectlyUnderTab, + scrollContainerRef, +}: Props) => { const [isBackupsLoading, setIsBackupsLoading] = useState(false); const [backups, setBackups] = useState([]); @@ -457,7 +465,21 @@ export const BackupsComponent = ({ database, isCanManageDBs, scrollContainerRef dataIndex: 'backupSizeMb', key: 'backupSizeMb', width: 150, - render: (sizeMb: number) => formatSize(sizeMb), + render: (sizeMb: number, record: Backup) => ( +
+ {formatSize(sizeMb)} + {record.pgWalBackupType === PgWalBackupType.PG_FULL_BACKUP && ( + + FULL + + )} + {record.pgWalBackupType === PgWalBackupType.PG_WAL_SEGMENT && ( + + WAL + + )} +
+ ), }, { title: 'Duration', @@ -483,7 +505,9 @@ export const BackupsComponent = ({ database, isCanManageDBs, scrollContainerRef } return ( -
+

Backups

{!isBackupConfigLoading && !backupConfig?.isBackupsEnabled && ( @@ -494,18 +518,20 @@ export const BackupsComponent = ({ database, isCanManageDBs, scrollContainerRef
-
- -
+ {database.postgresql?.backupType !== PostgresBackupType.WAL_V1 && ( +
+ +
+ )}
{/* Mobile card view */} @@ -538,7 +564,19 @@ export const BackupsComponent = ({ database, isCanManageDBs, scrollContainerRef
Size
-
{formatSize(backup.backupSizeMb)}
+
+ {formatSize(backup.backupSizeMb)} + {backup.pgWalBackupType === PgWalBackupType.PG_FULL_BACKUP && ( + + FULL + + )} + {backup.pgWalBackupType === PgWalBackupType.PG_WAL_SEGMENT && ( + + WAL + + )} +
Duration
@@ -606,21 +644,36 @@ export const BackupsComponent = ({ database, isCanManageDBs, scrollContainerRef /> )} - {showingRestoresBackupId && ( - setShowingRestoresBackupId(undefined)} - title="Restore from backup" - footer={null} - maskClosable={false} - > - b.id === showingRestoresBackupId) as Backup} - /> - - )} + {showingRestoresBackupId && + (database.postgresql?.backupType === PostgresBackupType.WAL_V1 ? ( + setShowingRestoresBackupId(undefined)} + title="Restore from backup" + footer={null} + maskClosable={false} + > + b.id === showingRestoresBackupId) as Backup} + /> + + ) : ( + setShowingRestoresBackupId(undefined)} + title="Restore from backup" + footer={null} + maskClosable={false} + > + b.id === showingRestoresBackupId) as Backup} + /> + + ))} {showingBackupError && ( void; +} + +export const AgentInstallationComponent = ({ database, onTokenGenerated }: Props) => { + const { message } = App.useApp(); + + const [selectedArch, setSelectedArch] = useState('amd64'); + const [pgDeploymentType, setPgDeploymentType] = useState('system'); + const [isGenerating, setIsGenerating] = useState(false); + const [generatedToken, setGeneratedToken] = useState(null); + + const databasusHost = getApplicationServer(); + + const handleGenerateToken = async () => { + setIsGenerating(true); + try { + const result = await databaseApi.regenerateAgentToken(database.id); + setGeneratedToken(result.token); + } catch { + message.error('Failed to generate token'); + } finally { + setIsGenerating(false); + } + }; + + const handleTokenModalClose = () => { + setGeneratedToken(null); + onTokenGenerated(); + }; + + const copyToClipboard = async (text: string) => { + try { + await navigator.clipboard.writeText(text); + message.success('Copied to clipboard'); + } catch { + message.error('Failed to copy'); + } + }; + + const renderCodeBlock = (code: string) => ( +
+
+        {code}
+      
+ + + +
+ ); + + const renderTabButton = (label: string, isActive: boolean, onClick: () => void) => ( + + ); + + const downloadCommand = `curl -L -o databasus-agent "${databasusHost}/api/v1/system/agent?arch=${selectedArch}" && chmod +x databasus-agent`; + + const walQueuePath = pgDeploymentType === 'docker' ? '/wal-queue' : '/opt/databasus/wal-queue'; + + const postgresqlConfSettings = `wal_level = replica +archive_mode = on +archive_command = 'cp %p ${walQueuePath}/%f.tmp && mv ${walQueuePath}/%f.tmp ${walQueuePath}/%f'`; + + const pgHbaEntry = `host replication all 127.0.0.1/32 md5`; + + const grantReplicationSql = `ALTER ROLE WITH REPLICATION;`; + + const createWalDirCommand = `mkdir -p /opt/databasus/wal-queue`; + + const walDirPermissionsCommand = `chown postgres:postgres /opt/databasus/wal-queue +chmod 755 /opt/databasus/wal-queue`; + + const dockerWalDirPermissionsCommand = `# Inside the container (or via docker exec): +chown postgres:postgres /wal-queue`; + + const dockerVolumeExample = `# In your docker run command: +docker run ... -v /opt/databasus/wal-queue:/wal-queue ... + +# Or in docker-compose.yml: +volumes: + - /opt/databasus/wal-queue:/wal-queue`; + + const buildStartCommand = () => { + const baseFlags = [ + ` --databasus-host=${databasusHost}`, + ` --db-id=${database.id}`, + ` --token=`, + ` --pg-host=localhost`, + ` --pg-port=5432`, + ` --pg-user=`, + ` --pg-password=`, + ]; + + const baseFlagsWithContinuation = baseFlags.map((f) => f + ' \\'); + + if (pgDeploymentType === 'system') { + return [ + './databasus-agent start \\', + ...baseFlagsWithContinuation, + ` --pg-type=host \\`, + ` --pg-wal-dir=/opt/databasus/wal-queue`, + ].join('\n'); + } + + if (pgDeploymentType === 'folder') { + return [ + './databasus-agent start \\', + ...baseFlagsWithContinuation, + ` --pg-type=host \\`, + ` --pg-host-bin-dir= \\`, + ` --pg-wal-dir=/opt/databasus/wal-queue`, + ].join('\n'); + } + + return [ + './databasus-agent start \\', + ...baseFlagsWithContinuation, + ` --pg-type=docker \\`, + ` --pg-docker-container-name= \\`, + ` --pg-wal-dir=/opt/databasus/wal-queue`, + ].join('\n'); + }; + + return ( +
+

Agent installation

+ +
+ Database ID: + + {database.id} + + + + +
+ +

+ WAL backup mode requires the Databasus agent to be installed on the server where PostgreSQL + runs. Follow the steps below to set it up. +

+ +

+ Requires PostgreSQL 15 or newer. +

+ +
+
+ Architecture +
+
+ {renderTabButton('amd64', selectedArch === 'amd64', () => setSelectedArch('amd64'))} + {renderTabButton('arm64', selectedArch === 'arm64', () => setSelectedArch('arm64'))} +
+
+ +
+
+ PostgreSQL installation type +
+
+ {renderTabButton('System-wide', pgDeploymentType === 'system', () => + setPgDeploymentType('system'), + )} + {renderTabButton('Specific folder', pgDeploymentType === 'folder', () => + setPgDeploymentType('folder'), + )} + {renderTabButton('Docker', pgDeploymentType === 'docker', () => + setPgDeploymentType('docker'), + )} +
+
+ {pgDeploymentType === 'system' && + 'pg_basebackup is available in the system PATH (default PostgreSQL install)'} + {pgDeploymentType === 'folder' && + 'pg_basebackup is in a specific directory (e.g. /usr/lib/postgresql/17/bin)'} + {pgDeploymentType === 'docker' && 'PostgreSQL runs inside a Docker container'} +
+
+ +
+
Agent token
+ {database.isAgentTokenGenerated ? ( +
+

+ A token has already been generated. Regenerating will invalidate the existing one. +

+ +
+ ) : ( +
+

+ Generate a token the agent will use to authenticate with Databasus. +

+ +
+ )} +
+ + + I've saved the token + + } + > + {renderCodeBlock(generatedToken ?? '')} +

+ This token will only be shown once. Store it securely — you won't be able to retrieve + it again. +

+
+ +
+
+
Step 1 — Download the agent
+ {renderCodeBlock(downloadCommand)} +
+ +
+
Step 2 — Configure postgresql.conf
+

+ Add or update these settings in your postgresql.conf, then{' '} + restart PostgreSQL. +

+ {pgDeploymentType === 'docker' && ( +

+ The archive_command path (/wal-queue) is the path{' '} + inside the container. It must match the volume mount target — see + Step 5. +

+ )} + {renderCodeBlock(postgresqlConfSettings)} +
+ +
+
Step 3 — Configure pg_hba.conf
+

+ Add this line to pg_hba.conf. This is required for{' '} + pg_basebackup to take full backups — not for streaming replication. Adjust + the address and auth method as needed, then reload PostgreSQL. +

+ {renderCodeBlock(pgHbaEntry)} +
+ +
+
Step 4 — Grant replication privilege
+

+ This is a PostgreSQL requirement for running pg_basebackup — it does not + set up a replica. +

+ {renderCodeBlock(grantReplicationSql)} +
+ +
+
+ Step 5 —{' '} + {pgDeploymentType === 'docker' + ? 'Set up WAL queue volume' + : 'Create WAL queue directory'} +
+ {pgDeploymentType === 'docker' ? ( + <> +

+ The WAL queue directory must be a volume mount shared between the + PostgreSQL container and the host. The agent reads WAL files from the host path, + while PostgreSQL writes to the container path via archive_command. +

+ {renderCodeBlock(createWalDirCommand)} +

+ Then mount it as a volume so both the container and the agent can access it: +

+ {renderCodeBlock(dockerVolumeExample)} +

+ Ensure the directory inside the container is owned by the postgres{' '} + user: +

+ {renderCodeBlock(dockerWalDirPermissionsCommand)} + + ) : ( + <> +

+ PostgreSQL will place WAL archive files here for the agent to upload. +

+ {renderCodeBlock(createWalDirCommand)} +

+ Ensure the directory is writable by PostgreSQL and readable by the agent: +

+ {renderCodeBlock(walDirPermissionsCommand)} + + )} +
+ +
+
Step 6 — Start the agent
+

+ Replace placeholders in {''} with your actual values. +

+ {pgDeploymentType === 'docker' && ( +

+ Use the PostgreSQL port inside the container (usually 5432), not the + host-mapped port. +

+ )} + {renderCodeBlock(buildStartCommand())} +
+ +
+
After installation
+
    +
  • + The agent runs in the background after start +
  • +
  • + Check status: ./databasus-agent status +
  • +
  • + View logs: databasus.log in the working directory +
  • +
  • + Stop the agent: ./databasus-agent stop +
  • +
+
+
+
+ ); +}; diff --git a/frontend/src/features/databases/ui/CreateDatabaseComponent.tsx b/frontend/src/features/databases/ui/CreateDatabaseComponent.tsx index 7fa70fc..6f4b5ff 100644 --- a/frontend/src/features/databases/ui/CreateDatabaseComponent.tsx +++ b/frontend/src/features/databases/ui/CreateDatabaseComponent.tsx @@ -8,6 +8,7 @@ import { type MongodbDatabase, type MysqlDatabase, Period, + PostgresBackupType, type PostgresqlDatabase, databaseApi, } from '../../../entity/databases'; @@ -38,6 +39,8 @@ const createInitialDatabase = (workspaceId: string): Database => notifiers: [], sendNotificationsOn: [], + + isAgentTokenGenerated: false, }) as Database; const initializeDatabaseTypeData = (db: Database): Database => { @@ -51,7 +54,15 @@ const initializeDatabaseTypeData = (db: Database): Database => { switch (db.type) { case DatabaseType.POSTGRES: - return { ...base, postgresql: db.postgresql ?? ({ cpuCount: 1 } as PostgresqlDatabase) }; + return { + ...base, + postgresql: + db.postgresql ?? + ({ + cpuCount: 1, + backupType: PostgresBackupType.PG_DUMP, + } as PostgresqlDatabase), + }; case DatabaseType.MYSQL: return { ...base, mysql: db.mysql ?? ({} as MysqlDatabase) }; case DatabaseType.MARIADB: @@ -81,7 +92,11 @@ export const CreateDatabaseComponent = ({ user, workspaceId, onCreated, onClose backupConfig.databaseId = createdDatabase.id; await backupConfigApi.saveBackupConfig(backupConfig); - if (backupConfig.isBackupsEnabled) { + + if ( + backupConfig.isBackupsEnabled && + createdDatabase.postgresql?.backupType !== PostgresBackupType.WAL_V1 + ) { await backupsApi.makeBackup(createdDatabase.id); } @@ -126,7 +141,12 @@ export const CreateDatabaseComponent = ({ user, workspaceId, onCreated, onClose isSaveToApi={false} onSaved={(database) => { setDatabase({ ...database }); - setStep('create-readonly-user'); + + const isWalBackup = + database.type === DatabaseType.POSTGRES && + database.postgresql?.backupType === PostgresBackupType.WAL_V1; + + setStep(isWalBackup ? 'backup-config' : 'create-readonly-user'); }} /> ); diff --git a/frontend/src/features/databases/ui/DatabaseComponent.tsx b/frontend/src/features/databases/ui/DatabaseComponent.tsx index 823cbd9..3382b23 100644 --- a/frontend/src/features/databases/ui/DatabaseComponent.tsx +++ b/frontend/src/features/databases/ui/DatabaseComponent.tsx @@ -2,10 +2,12 @@ import { Spin } from 'antd'; import { useRef, useState } from 'react'; import { useEffect } from 'react'; -import { type Database, databaseApi } from '../../../entity/databases'; +import { backupsApi } from '../../../entity/backups'; +import { type Database, PostgresBackupType, databaseApi } from '../../../entity/databases'; import type { UserProfile } from '../../../entity/users'; import { BackupsComponent } from '../../backups'; import { HealthckeckAttemptsComponent } from '../../healthcheck'; +import { AgentInstallationComponent } from './AgentInstallationComponent'; import { DatabaseConfigComponent } from './DatabaseConfigComponent'; interface Props { @@ -25,13 +27,21 @@ export const DatabaseComponent = ({ onDatabaseDeleted, isCanManageDBs, }: Props) => { - const [currentTab, setCurrentTab] = useState<'config' | 'backups' | 'metrics'>('backups'); + const [currentTab, setCurrentTab] = useState<'config' | 'backups' | 'installation'>('backups'); const [database, setDatabase] = useState(); const [editDatabase, setEditDatabase] = useState(); const scrollContainerRef = useRef(null); + const [isHealthcheckVisible, setIsHealthcheckVisible] = useState(false); + + const handleHealthcheckVisibilityChange = (isVisible: boolean) => { + setIsHealthcheckVisible(isVisible); + }; + + const isWalDatabase = database?.postgresql?.backupType === PostgresBackupType.WAL_V1; + const loadSettings = () => { setDatabase(undefined); setEditDatabase(undefined); @@ -42,6 +52,21 @@ export const DatabaseComponent = ({ loadSettings(); }, [databaseId]); + useEffect(() => { + if (!database) return; + + if (!isWalDatabase) { + setCurrentTab((prev) => (prev === 'installation' ? 'backups' : prev)); + return; + } + + backupsApi.getBackups(database.id, 1, 0).then((response) => { + if (response.total === 0) { + setCurrentTab('installation'); + } + }); + }, [database]); + if (!database) { return ; } @@ -66,6 +91,15 @@ export const DatabaseComponent = ({ > Backups
+ + {isWalDatabase && ( +
setCurrentTab('installation')} + > + Agent +
+ )}
{currentTab === 'config' && ( @@ -83,14 +117,22 @@ export const DatabaseComponent = ({ {currentTab === 'backups' && ( <> - + )} + + {currentTab === 'installation' && isWalDatabase && ( + + )}
); }; diff --git a/frontend/src/features/databases/ui/edit/EditDatabaseSpecificDataComponent.tsx b/frontend/src/features/databases/ui/edit/EditDatabaseSpecificDataComponent.tsx index 6b12b1d..f775241 100644 --- a/frontend/src/features/databases/ui/edit/EditDatabaseSpecificDataComponent.tsx +++ b/frontend/src/features/databases/ui/edit/EditDatabaseSpecificDataComponent.tsx @@ -1,7 +1,12 @@ import { Modal } from 'antd'; import { useState } from 'react'; -import { type Database, DatabaseType, databaseApi } from '../../../../entity/databases'; +import { + type Database, + DatabaseType, + PostgresBackupType, + databaseApi, +} from '../../../../entity/databases'; import { CreateReadOnlyComponent } from './CreateReadOnlyComponent'; import { EditMariaDbSpecificDataComponent } from './EditMariaDbSpecificDataComponent'; import { EditMongoDbSpecificDataComponent } from './EditMongoDbSpecificDataComponent'; @@ -51,6 +56,15 @@ export const EditDatabaseSpecificDataComponent = ({ return; } + const isWalBackup = + databaseToSave.type === DatabaseType.POSTGRES && + databaseToSave.postgresql?.backupType === PostgresBackupType.WAL_V1; + + if (isWalBackup) { + onSaved(databaseToSave); + return; + } + try { const result = await databaseApi.isUserReadOnly(databaseToSave); diff --git a/frontend/src/features/databases/ui/edit/EditPostgreSqlSpecificDataComponent.tsx b/frontend/src/features/databases/ui/edit/EditPostgreSqlSpecificDataComponent.tsx index 9382676..ff623fc 100644 --- a/frontend/src/features/databases/ui/edit/EditPostgreSqlSpecificDataComponent.tsx +++ b/frontend/src/features/databases/ui/edit/EditPostgreSqlSpecificDataComponent.tsx @@ -3,7 +3,7 @@ import { App, Button, Checkbox, Input, InputNumber, Select, Switch, Tooltip } fr import { useEffect, useState } from 'react'; import { IS_CLOUD } from '../../../../constants'; -import { type Database, databaseApi } from '../../../../entity/databases'; +import { type Database, PostgresBackupType, databaseApi } from '../../../../entity/databases'; import { ConnectionStringParser } from '../../../../entity/databases/model/postgresql/ConnectionStringParser'; import { ToastHelper } from '../../../../shared/toast'; @@ -185,341 +185,424 @@ export const EditPostgreSqlSpecificDataComponent = ({ if (!editingDatabase) return null; - let isAllFieldsFilled = true; - if (!editingDatabase.postgresql?.host) isAllFieldsFilled = false; - if (!editingDatabase.postgresql?.port) isAllFieldsFilled = false; - if (!editingDatabase.postgresql?.username) isAllFieldsFilled = false; - if (!editingDatabase.id && !editingDatabase.postgresql?.password) isAllFieldsFilled = false; - if (!editingDatabase.postgresql?.database) isAllFieldsFilled = false; + const backupType = editingDatabase.postgresql?.backupType; - const isLocalhostDb = - editingDatabase.postgresql?.host?.includes('localhost') || - editingDatabase.postgresql?.host?.includes('127.0.0.1'); + const renderBackupTypeSelector = () => { + if (editingDatabase.id || IS_CLOUD) return null; - const isSupabaseDb = - editingDatabase.postgresql?.host?.includes('supabase') || - editingDatabase.postgresql?.username?.includes('supabase'); - - return ( -
-
-
-
- - Parse from clipboard -
-
- -
-
Host
- { - if (!editingDatabase.postgresql) return; - - const updatedDatabase = { - ...editingDatabase, - postgresql: { - ...editingDatabase.postgresql, - host: e.target.value.trim().replace('https://', '').replace('http://', ''), - }, - }; - setEditingDatabase(autoAddPublicSchemaForSupabase(updatedDatabase)); - setIsConnectionTested(false); - }} - size="small" - className="max-w-[200px] grow" - placeholder="Enter PG host" - /> -
- - {isLocalhostDb && !IS_CLOUD && ( -
-
-
- Please{' '} - - read this document - {' '} - to study how to backup local database -
-
- )} - - {isSupabaseDb && ( -
-
-
- Please{' '} - - read this document - {' '} - to study how to backup Supabase database -
-
- )} - -
-
Port
- { - if (!editingDatabase.postgresql || e === null) return; - - setEditingDatabase({ - ...editingDatabase, - postgresql: { ...editingDatabase.postgresql, port: e }, - }); - setIsConnectionTested(false); - }} - size="small" - className="max-w-[200px] grow" - placeholder="Enter PG port" - /> -
- -
-
Username
- { - if (!editingDatabase.postgresql) return; - - const updatedDatabase = { - ...editingDatabase, - postgresql: { ...editingDatabase.postgresql, username: e.target.value.trim() }, - }; - setEditingDatabase(autoAddPublicSchemaForSupabase(updatedDatabase)); - setIsConnectionTested(false); - }} - size="small" - className="max-w-[200px] grow" - placeholder="Enter PG username" - /> -
- -
-
Password
- { + return ( +
+
Backup type
+ { if (!editingDatabase.postgresql) return; + const updatedDatabase = { + ...editingDatabase, + postgresql: { + ...editingDatabase.postgresql, + host: e.target.value.trim().replace('https://', '').replace('http://', ''), + }, + }; + setEditingDatabase(autoAddPublicSchemaForSupabase(updatedDatabase)); + setIsConnectionTested(false); + }} + size="small" + className="max-w-[200px] grow" + placeholder="Enter PG host" + /> +
+ + {isLocalhostDb && !IS_CLOUD && ( +
+
+
+ Please{' '} + + read this document + {' '} + to study how to backup local database +
+
+ )} + + {isSupabaseDb && ( +
+
+
+ Please{' '} + + read this document + {' '} + to study how to backup Supabase database +
+
+ )} + +
+
Port
+ { + if (!editingDatabase.postgresql || e === null) return; + setEditingDatabase({ ...editingDatabase, - postgresql: { ...editingDatabase.postgresql, database: e.target.value.trim() }, + postgresql: { ...editingDatabase.postgresql, port: e }, }); setIsConnectionTested(false); }} size="small" className="max-w-[200px] grow" - placeholder="Enter PG database name" + placeholder="Enter PG port" />
- )} -
-
Use HTTPS
- { - if (!editingDatabase.postgresql) return; +
+
Username
+ { + if (!editingDatabase.postgresql) return; - setEditingDatabase({ - ...editingDatabase, - postgresql: { ...editingDatabase.postgresql, isHttps: checked }, - }); - setIsConnectionTested(false); - }} - size="small" - /> -
+ const updatedDatabase = { + ...editingDatabase, + postgresql: { ...editingDatabase.postgresql, username: e.target.value.trim() }, + }; + setEditingDatabase(autoAddPublicSchemaForSupabase(updatedDatabase)); + setIsConnectionTested(false); + }} + size="small" + className="max-w-[200px] grow" + placeholder="Enter PG username" + /> +
- {isRestoreMode && !IS_CLOUD && ( -
-
CPU count
-
- { +
+
Password
+ { + if (!editingDatabase.postgresql) return; + + setEditingDatabase({ + ...editingDatabase, + postgresql: { ...editingDatabase.postgresql, password: e.target.value }, + }); + setIsConnectionTested(false); + }} + size="small" + className="max-w-[200px] grow" + placeholder="Enter PG password" + autoComplete="off" + data-1p-ignore + data-lpignore="true" + data-form-type="other" + /> +
+ + {isShowDbName && ( +
+
DB name
+ { if (!editingDatabase.postgresql) return; setEditingDatabase({ ...editingDatabase, - postgresql: { ...editingDatabase.postgresql, cpuCount: value || 1 }, + postgresql: { ...editingDatabase.postgresql, database: e.target.value.trim() }, }); setIsConnectionTested(false); }} size="small" - className="max-w-[75px] grow" + className="max-w-[200px] grow" + placeholder="Enter PG database name" /> - - - -
+ )} + +
+
Use HTTPS
+ { + if (!editingDatabase.postgresql) return; + + setEditingDatabase({ + ...editingDatabase, + postgresql: { ...editingDatabase.postgresql, isHttps: checked }, + }); + setIsConnectionTested(false); + }} + size="small" + />
- )} -
-
setShowAdvanced(!isShowAdvanced)} - > - Advanced settings - - {isShowAdvanced ? ( - - ) : ( - - )} -
-
- - {isShowAdvanced && ( - <> - {!isRestoreMode && ( -
-
Include schemas
- { if (!editingDatabase.postgresql) return; setEditingDatabase({ ...editingDatabase, - postgresql: { - ...editingDatabase.postgresql, - isExcludeExtensions: e.target.checked, - }, + postgresql: { ...editingDatabase.postgresql, includeSchemas: values }, }); }} - > - Skip extensions - - - - - + size="small" + className="max-w-[200px] grow" + placeholder="All schemas (default)" + tokenSeparators={[',']} + />
-
- )} - - )} + )} -
- {isShowCancelButton && ( - + {isRestoreMode && ( +
+
Exclude extensions
+
+ { + if (!editingDatabase.postgresql) return; + + setEditingDatabase({ + ...editingDatabase, + postgresql: { + ...editingDatabase.postgresql, + isExcludeExtensions: e.target.checked, + }, + }); + }} + > + Skip extensions + + + + + +
+
+ )} + )} - {isShowBackButton && ( - + {renderFooter( + <> + {!isConnectionTested && ( + + )} + + {isConnectionTested && ( + + )} + , )} - {!isConnectionTested && ( - + {isConnectionFailed && !IS_CLOUD && ( +
+ If your database uses IP whitelist, make sure Databasus server IP is added to the + allowed list. +
)} + + ); + }; - {isConnectionTested && ( - - )} -
- - {isConnectionFailed && !IS_CLOUD && ( -
- If your database uses IP whitelist, make sure Databasus server IP is added to the allowed - list. + const renderWalForm = () => { + return ( + <> +
+
+ Agent mode uses physical and WAL-based incremental backups. Best suited for DBs without + public access, for large databases (100 GB+) or when PITR is required +
+
+ Configuration is more complicated than remote backup and requires installing a Databasus + agent near DB +
- )} + + {renderFooter( + , + )} + + ); + }; + + const renderFormContent = () => { + switch (backupType) { + case PostgresBackupType.WAL_V1: + return renderWalForm(); + default: + return renderPgDumpForm(); + } + }; + + return ( +
+ {renderBackupTypeSelector()} + {renderFormContent()}
); }; diff --git a/frontend/src/features/databases/ui/show/ShowPostgreSqlSpecificDataComponent.tsx b/frontend/src/features/databases/ui/show/ShowPostgreSqlSpecificDataComponent.tsx index 8ea247d..0af00d6 100644 --- a/frontend/src/features/databases/ui/show/ShowPostgreSqlSpecificDataComponent.tsx +++ b/frontend/src/features/databases/ui/show/ShowPostgreSqlSpecificDataComponent.tsx @@ -1,4 +1,4 @@ -import { type Database, PostgresqlVersion } from '../../../../entity/databases'; +import { type Database, PostgresBackupType, PostgresqlVersion } from '../../../../entity/databases'; interface Props { database: Database; @@ -14,9 +14,19 @@ const postgresqlVersionLabels = { [PostgresqlVersion.PostgresqlVersion18]: '18', }; +const backupTypeLabels: Record = { + [PostgresBackupType.PG_DUMP]: 'Remote (logical)', + [PostgresBackupType.WAL_V1]: 'Agent (physical)', +}; + export const ShowPostgreSqlSpecificDataComponent = ({ database }: Props) => { - return ( -
+ const backupType = database.postgresql?.backupType; + const backupTypeLabel = backupType + ? (backupTypeLabels[backupType] ?? backupType) + : 'Remote (pg_dump)'; + + const renderPgDumpDetails = () => ( + <>
PG version
@@ -60,6 +70,37 @@ export const ShowPostgreSqlSpecificDataComponent = ({ database }: Props) => {
{database.postgresql.includeSchemas.join(', ')}
)} + + ); + + const renderWalDetails = () => ( + <> + {database.postgresql?.version && ( +
+
PG version
+
{postgresqlVersionLabels[database.postgresql.version]}
+
+ )} + + ); + + const renderDetails = () => { + switch (backupType) { + case PostgresBackupType.WAL_V1: + return renderWalDetails(); + default: + return renderPgDumpDetails(); + } + }; + + return ( +
+
+
Backup type
+
{backupTypeLabel}
+
+ + {renderDetails()}
); }; diff --git a/frontend/src/features/healthcheck/ui/HealthckeckAttemptsComponent.tsx b/frontend/src/features/healthcheck/ui/HealthckeckAttemptsComponent.tsx index 6645298..0857ebf 100644 --- a/frontend/src/features/healthcheck/ui/HealthckeckAttemptsComponent.tsx +++ b/frontend/src/features/healthcheck/ui/HealthckeckAttemptsComponent.tsx @@ -13,6 +13,7 @@ import { getUserShortTimeFormat } from '../../../shared/time/getUserTimeFormat'; interface Props { database: Database; + onVisibilityChange?: (isVisible: boolean) => void; } let lastLoadTime = 0; @@ -39,7 +40,7 @@ const getAfterDateByPeriod = (period: 'today' | '7d' | '30d' | 'all'): Date => { return afterDate; }; -export const HealthckeckAttemptsComponent = ({ database }: Props) => { +export const HealthckeckAttemptsComponent = ({ database, onVisibilityChange }: Props) => { const [isHealthcheckConfigLoading, setIsHealthcheckConfigLoading] = useState(false); const [isShowHealthcheckConfig, setIsShowHealthcheckConfig] = useState(false); @@ -87,8 +88,13 @@ export const HealthckeckAttemptsComponent = ({ database }: Props) => { setIsHealthcheckConfigLoading(false); + if (!healthcheckConfig.isHealthcheckEnabled) { + onVisibilityChange?.(false); + } + if (healthcheckConfig.isHealthcheckEnabled) { setIsShowHealthcheckConfig(true); + onVisibilityChange?.(true); loadHealthcheckAttempts(); // Set up interval only if healthcheck @@ -118,11 +124,11 @@ export const HealthckeckAttemptsComponent = ({ database }: Props) => { } if (!isShowHealthcheckConfig) { - return
; + return null; } return ( -
+

Healthcheck attempts