From b0f190595c16cea9b3e1ce5c8f471f9dd30104e1 Mon Sep 17 00:00:00 2001 From: wayneshn Date: Sat, 28 Mar 2026 13:56:43 +0100 Subject: [PATCH] feat: add preserve-original-file mode for email ingestion for GoBD compliance - Add `preserveOriginalFile` option to ingestion sources and connectors - Stream original EML/MBOX/PST emails to temp files instead of holding full buffers in memory, reducing memory allocation during ingestion - Skip attachment binary extraction and EML re-serialization when preserve mode is enabled; use raw file on disk as source of truth - Update `EmailObject` to use `tempFilePath` instead of in-memory `eml` buffer across all connectors (EML, MBOX, PST) - Add new database migration (0032) for `preserve_original_file` column - Add frontend UI toggle with tooltip (tippy.js) for the new option - Replace console.warn calls with structured pino logger in connectors --- .../migrations/0031_bouncy_boomerang.sql | 1 + .../migrations/meta/0031_snapshot.json | 1840 +++++++++++++++++ .../database/migrations/meta/_journal.json | 451 ++-- .../src/database/schema/ingestion-sources.ts | 3 +- .../src/services/ArchivedEmailService.ts | 9 +- .../src/services/EmailProviderFactory.ts | 30 +- .../backend/src/services/IndexingService.ts | 62 +- .../backend/src/services/IngestionService.ts | 79 +- .../ingestion-connectors/EMLConnector.ts | 19 +- .../GoogleWorkspaceConnector.ts | 155 +- .../ingestion-connectors/ImapConnector.ts | 23 +- .../ingestion-connectors/MboxConnector.ts | 44 +- .../MicrosoftConnector.ts | 17 +- .../ingestion-connectors/PSTConnector.ts | 25 +- .../ingestion-connectors/helpers/tempFile.ts | 15 + packages/frontend/package.json | 3 +- .../custom/IngestionSourceForm.svelte | 28 +- .../frontend/src/lib/translations/en.json | 10 +- .../archived-emails/[id]/+page.svelte | 126 ++ packages/types/src/email.types.ts | 6 +- packages/types/src/ingestion.types.ts | 5 + pnpm-lock.yaml | 15 + 22 files changed, 2615 insertions(+), 351 deletions(-) create mode 100644 packages/backend/src/database/migrations/0031_bouncy_boomerang.sql create mode 100644 packages/backend/src/database/migrations/meta/0031_snapshot.json create mode 100644 packages/backend/src/services/ingestion-connectors/helpers/tempFile.ts diff --git a/packages/backend/src/database/migrations/0031_bouncy_boomerang.sql b/packages/backend/src/database/migrations/0031_bouncy_boomerang.sql new file mode 100644 index 0000000..3bd74e3 --- /dev/null +++ b/packages/backend/src/database/migrations/0031_bouncy_boomerang.sql @@ -0,0 +1 @@ +ALTER TABLE "ingestion_sources" ADD COLUMN "preserve_original_file" boolean DEFAULT false NOT NULL; \ No newline at end of file diff --git a/packages/backend/src/database/migrations/meta/0031_snapshot.json b/packages/backend/src/database/migrations/meta/0031_snapshot.json new file mode 100644 index 0000000..bd614fd --- /dev/null +++ b/packages/backend/src/database/migrations/meta/0031_snapshot.json @@ -0,0 +1,1840 @@ +{ + "id": "a1e1d446-db1b-4316-961b-82dcd0e1423d", + "prevId": "a9094976-87e1-4a52-b5a5-ddec968bbecd", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.archived_emails": { + "name": "archived_emails", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "thread_id": { + "name": "thread_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ingestion_source_id": { + "name": "ingestion_source_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "user_email": { + "name": "user_email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "message_id_header": { + "name": "message_id_header", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "provider_message_id": { + "name": "provider_message_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "sent_at": { + "name": "sent_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "subject": { + "name": "subject", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "sender_name": { + "name": "sender_name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "sender_email": { + "name": "sender_email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "recipients": { + "name": "recipients", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "storage_path": { + "name": "storage_path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "storage_hash_sha256": { + "name": "storage_hash_sha256", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "size_bytes": { + "name": "size_bytes", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "is_indexed": { + "name": "is_indexed", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "has_attachments": { + "name": "has_attachments", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "is_on_legal_hold": { + "name": "is_on_legal_hold", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "archived_at": { + "name": "archived_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "tags": { + "name": "tags", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "thread_id_idx": { + "name": "thread_id_idx", + "columns": [ + { + "expression": "thread_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "provider_msg_source_idx": { + "name": "provider_msg_source_idx", + "columns": [ + { + "expression": "provider_message_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ingestion_source_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "archived_emails_ingestion_source_id_ingestion_sources_id_fk": { + "name": "archived_emails_ingestion_source_id_ingestion_sources_id_fk", + "tableFrom": "archived_emails", + "tableTo": "ingestion_sources", + "columnsFrom": [ + "ingestion_source_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.attachments": { + "name": "attachments", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "filename": { + "name": "filename", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mime_type": { + "name": "mime_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "size_bytes": { + "name": "size_bytes", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "content_hash_sha256": { + "name": "content_hash_sha256", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "storage_path": { + "name": "storage_path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ingestion_source_id": { + "name": "ingestion_source_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "source_hash_idx": { + "name": "source_hash_idx", + "columns": [ + { + "expression": "ingestion_source_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "content_hash_sha256", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "attachments_ingestion_source_id_ingestion_sources_id_fk": { + "name": "attachments_ingestion_source_id_ingestion_sources_id_fk", + "tableFrom": "attachments", + "tableTo": "ingestion_sources", + "columnsFrom": [ + "ingestion_source_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.email_attachments": { + "name": "email_attachments", + "schema": "", + "columns": { + "email_id": { + "name": "email_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "attachment_id": { + "name": "attachment_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "email_attachments_email_id_archived_emails_id_fk": { + "name": "email_attachments_email_id_archived_emails_id_fk", + "tableFrom": "email_attachments", + "tableTo": "archived_emails", + "columnsFrom": [ + "email_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "email_attachments_attachment_id_attachments_id_fk": { + "name": "email_attachments_attachment_id_attachments_id_fk", + "tableFrom": "email_attachments", + "tableTo": "attachments", + "columnsFrom": [ + "attachment_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "restrict", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "email_attachments_email_id_attachment_id_pk": { + "name": "email_attachments_email_id_attachment_id_pk", + "columns": [ + "email_id", + "attachment_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.audit_logs": { + "name": "audit_logs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "bigserial", + "primaryKey": true, + "notNull": true + }, + "previous_hash": { + "name": "previous_hash", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "timestamp": { + "name": "timestamp", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "actor_identifier": { + "name": "actor_identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "actor_ip": { + "name": "actor_ip", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "action_type": { + "name": "action_type", + "type": "audit_log_action", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "target_type": { + "name": "target_type", + "type": "audit_log_target_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": false + }, + "target_id": { + "name": "target_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "details": { + "name": "details", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "current_hash": { + "name": "current_hash", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ediscovery_cases": { + "name": "ediscovery_cases", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'open'" + }, + "created_by_identifier": { + "name": "created_by_identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "ediscovery_cases_name_unique": { + "name": "ediscovery_cases_name_unique", + "nullsNotDistinct": false, + "columns": [ + "name" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.email_legal_holds": { + "name": "email_legal_holds", + "schema": "", + "columns": { + "email_id": { + "name": "email_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "legal_hold_id": { + "name": "legal_hold_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "applied_at": { + "name": "applied_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "applied_by_user_id": { + "name": "applied_by_user_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "email_legal_holds_email_id_archived_emails_id_fk": { + "name": "email_legal_holds_email_id_archived_emails_id_fk", + "tableFrom": "email_legal_holds", + "tableTo": "archived_emails", + "columnsFrom": [ + "email_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "email_legal_holds_legal_hold_id_legal_holds_id_fk": { + "name": "email_legal_holds_legal_hold_id_legal_holds_id_fk", + "tableFrom": "email_legal_holds", + "tableTo": "legal_holds", + "columnsFrom": [ + "legal_hold_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "email_legal_holds_applied_by_user_id_users_id_fk": { + "name": "email_legal_holds_applied_by_user_id_users_id_fk", + "tableFrom": "email_legal_holds", + "tableTo": "users", + "columnsFrom": [ + "applied_by_user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "email_legal_holds_email_id_legal_hold_id_pk": { + "name": "email_legal_holds_email_id_legal_hold_id_pk", + "columns": [ + "email_id", + "legal_hold_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.email_retention_labels": { + "name": "email_retention_labels", + "schema": "", + "columns": { + "email_id": { + "name": "email_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "label_id": { + "name": "label_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "applied_at": { + "name": "applied_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "applied_by_user_id": { + "name": "applied_by_user_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "email_retention_labels_email_id_archived_emails_id_fk": { + "name": "email_retention_labels_email_id_archived_emails_id_fk", + "tableFrom": "email_retention_labels", + "tableTo": "archived_emails", + "columnsFrom": [ + "email_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "email_retention_labels_label_id_retention_labels_id_fk": { + "name": "email_retention_labels_label_id_retention_labels_id_fk", + "tableFrom": "email_retention_labels", + "tableTo": "retention_labels", + "columnsFrom": [ + "label_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "email_retention_labels_applied_by_user_id_users_id_fk": { + "name": "email_retention_labels_applied_by_user_id_users_id_fk", + "tableFrom": "email_retention_labels", + "tableTo": "users", + "columnsFrom": [ + "applied_by_user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "email_retention_labels_email_id_label_id_pk": { + "name": "email_retention_labels_email_id_label_id_pk", + "columns": [ + "email_id", + "label_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.export_jobs": { + "name": "export_jobs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "case_id": { + "name": "case_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "format": { + "name": "format", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + }, + "query": { + "name": "query", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "file_path": { + "name": "file_path", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_by_identifier": { + "name": "created_by_identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "export_jobs_case_id_ediscovery_cases_id_fk": { + "name": "export_jobs_case_id_ediscovery_cases_id_fk", + "tableFrom": "export_jobs", + "tableTo": "ediscovery_cases", + "columnsFrom": [ + "case_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.legal_holds": { + "name": "legal_holds", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "reason": { + "name": "reason", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "case_id": { + "name": "case_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "legal_holds_case_id_ediscovery_cases_id_fk": { + "name": "legal_holds_case_id_ediscovery_cases_id_fk", + "tableFrom": "legal_holds", + "tableTo": "ediscovery_cases", + "columnsFrom": [ + "case_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.retention_events": { + "name": "retention_events", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "event_name": { + "name": "event_name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "event_type": { + "name": "event_type", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "event_timestamp": { + "name": "event_timestamp", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "target_criteria": { + "name": "target_criteria", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.retention_labels": { + "name": "retention_labels", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "retention_period_days": { + "name": "retention_period_days", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "is_disabled": { + "name": "is_disabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.retention_policies": { + "name": "retention_policies", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "retention_period_days": { + "name": "retention_period_days", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "action_on_expiry": { + "name": "action_on_expiry", + "type": "retention_action", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "conditions": { + "name": "conditions", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "ingestion_scope": { + "name": "ingestion_scope", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "default": "'null'::jsonb" + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "retention_policies_name_unique": { + "name": "retention_policies_name_unique", + "nullsNotDistinct": false, + "columns": [ + "name" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.custodians": { + "name": "custodians", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "display_name": { + "name": "display_name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "source_type": { + "name": "source_type", + "type": "ingestion_provider", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "custodians_email_unique": { + "name": "custodians_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ingestion_sources": { + "name": "ingestion_sources", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "user_id": { + "name": "user_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "ingestion_provider", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "credentials": { + "name": "credentials", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "ingestion_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'pending_auth'" + }, + "last_sync_started_at": { + "name": "last_sync_started_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "last_sync_finished_at": { + "name": "last_sync_finished_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "last_sync_status_message": { + "name": "last_sync_status_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "sync_state": { + "name": "sync_state", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "preserve_original_file": { + "name": "preserve_original_file", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "ingestion_sources_user_id_users_id_fk": { + "name": "ingestion_sources_user_id_users_id_fk", + "tableFrom": "ingestion_sources", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.roles": { + "name": "roles", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "policies": { + "name": "policies", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "roles_name_unique": { + "name": "roles_name_unique", + "nullsNotDistinct": false, + "columns": [ + "name" + ] + }, + "roles_slug_unique": { + "name": "roles_slug_unique", + "nullsNotDistinct": false, + "columns": [ + "slug" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sessions": { + "name": "sessions", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "sessions_user_id_users_id_fk": { + "name": "sessions_user_id_users_id_fk", + "tableFrom": "sessions", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.user_roles": { + "name": "user_roles", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "role_id": { + "name": "role_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "user_roles_user_id_users_id_fk": { + "name": "user_roles_user_id_users_id_fk", + "tableFrom": "user_roles", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "user_roles_role_id_roles_id_fk": { + "name": "user_roles_role_id_roles_id_fk", + "tableFrom": "user_roles", + "tableTo": "roles", + "columnsFrom": [ + "role_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "user_roles_user_id_role_id_pk": { + "name": "user_roles_user_id_role_id_pk", + "columns": [ + "user_id", + "role_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.users": { + "name": "users", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "first_name": { + "name": "first_name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "last_name": { + "name": "last_name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": false, + "default": "'local'" + }, + "provider_id": { + "name": "provider_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "users_email_unique": { + "name": "users_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.system_settings": { + "name": "system_settings", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "config": { + "name": "config", + "type": "jsonb", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.api_keys": { + "name": "api_keys", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "key": { + "name": "key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "key_hash": { + "name": "key_hash", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "api_keys_user_id_users_id_fk": { + "name": "api_keys_user_id_users_id_fk", + "tableFrom": "api_keys", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sync_sessions": { + "name": "sync_sessions", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "ingestion_source_id": { + "name": "ingestion_source_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "is_initial_import": { + "name": "is_initial_import", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "total_mailboxes": { + "name": "total_mailboxes", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "completed_mailboxes": { + "name": "completed_mailboxes", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "failed_mailboxes": { + "name": "failed_mailboxes", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "error_messages": { + "name": "error_messages", + "type": "text[]", + "primaryKey": false, + "notNull": true, + "default": "'{}'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_activity_at": { + "name": "last_activity_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "sync_sessions_ingestion_source_id_ingestion_sources_id_fk": { + "name": "sync_sessions_ingestion_source_id_ingestion_sources_id_fk", + "tableFrom": "sync_sessions", + "tableTo": "ingestion_sources", + "columnsFrom": [ + "ingestion_source_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.journaling_sources": { + "name": "journaling_sources", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "allowed_ips": { + "name": "allowed_ips", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "require_tls": { + "name": "require_tls", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "smtp_username": { + "name": "smtp_username", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "smtp_password_hash": { + "name": "smtp_password_hash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "journaling_source_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'active'" + }, + "ingestion_source_id": { + "name": "ingestion_source_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "routing_address": { + "name": "routing_address", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "total_received": { + "name": "total_received", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "last_received_at": { + "name": "last_received_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "journaling_sources_ingestion_source_id_ingestion_sources_id_fk": { + "name": "journaling_sources_ingestion_source_id_ingestion_sources_id_fk", + "tableFrom": "journaling_sources", + "tableTo": "ingestion_sources", + "columnsFrom": [ + "ingestion_source_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.retention_action": { + "name": "retention_action", + "schema": "public", + "values": [ + "delete_permanently", + "notify_admin" + ] + }, + "public.ingestion_provider": { + "name": "ingestion_provider", + "schema": "public", + "values": [ + "google_workspace", + "microsoft_365", + "generic_imap", + "pst_import", + "eml_import", + "mbox_import", + "smtp_journaling" + ] + }, + "public.ingestion_status": { + "name": "ingestion_status", + "schema": "public", + "values": [ + "active", + "paused", + "error", + "pending_auth", + "syncing", + "importing", + "auth_success", + "imported" + ] + }, + "public.audit_log_action": { + "name": "audit_log_action", + "schema": "public", + "values": [ + "CREATE", + "READ", + "UPDATE", + "DELETE", + "LOGIN", + "LOGOUT", + "SETUP", + "IMPORT", + "PAUSE", + "SYNC", + "UPLOAD", + "SEARCH", + "DOWNLOAD", + "GENERATE" + ] + }, + "public.audit_log_target_type": { + "name": "audit_log_target_type", + "schema": "public", + "values": [ + "ApiKey", + "ArchivedEmail", + "Dashboard", + "IngestionSource", + "JournalingSource", + "RetentionPolicy", + "RetentionLabel", + "LegalHold", + "Role", + "SystemEvent", + "SystemSettings", + "User", + "File" + ] + }, + "public.journaling_source_status": { + "name": "journaling_source_status", + "schema": "public", + "values": [ + "active", + "paused" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/packages/backend/src/database/migrations/meta/_journal.json b/packages/backend/src/database/migrations/meta/_journal.json index 4d26115..56797af 100644 --- a/packages/backend/src/database/migrations/meta/_journal.json +++ b/packages/backend/src/database/migrations/meta/_journal.json @@ -1,223 +1,230 @@ { - "version": "7", - "dialect": "postgresql", - "entries": [ - { - "idx": 0, - "version": "7", - "when": 1752225352591, - "tag": "0000_amusing_namora", - "breakpoints": true - }, - { - "idx": 1, - "version": "7", - "when": 1752326803882, - "tag": "0001_odd_night_thrasher", - "breakpoints": true - }, - { - "idx": 2, - "version": "7", - "when": 1752332648392, - "tag": "0002_lethal_quentin_quire", - "breakpoints": true - }, - { - "idx": 3, - "version": "7", - "when": 1752332967084, - "tag": "0003_petite_wrecker", - "breakpoints": true - }, - { - "idx": 4, - "version": "7", - "when": 1752606108876, - "tag": "0004_sleepy_paper_doll", - "breakpoints": true - }, - { - "idx": 5, - "version": "7", - "when": 1752606327253, - "tag": "0005_chunky_sue_storm", - "breakpoints": true - }, - { - "idx": 6, - "version": "7", - "when": 1753112018514, - "tag": "0006_majestic_caretaker", - "breakpoints": true - }, - { - "idx": 7, - "version": "7", - "when": 1753190159356, - "tag": "0007_handy_archangel", - "breakpoints": true - }, - { - "idx": 8, - "version": "7", - "when": 1753370737317, - "tag": "0008_eminent_the_spike", - "breakpoints": true - }, - { - "idx": 9, - "version": "7", - "when": 1754337938241, - "tag": "0009_late_lenny_balinger", - "breakpoints": true - }, - { - "idx": 10, - "version": "7", - "when": 1754420780849, - "tag": "0010_perpetual_lightspeed", - "breakpoints": true - }, - { - "idx": 11, - "version": "7", - "when": 1754422064158, - "tag": "0011_tan_blackheart", - "breakpoints": true - }, - { - "idx": 12, - "version": "7", - "when": 1754476962901, - "tag": "0012_warm_the_stranger", - "breakpoints": true - }, - { - "idx": 13, - "version": "7", - "when": 1754659373517, - "tag": "0013_classy_talkback", - "breakpoints": true - }, - { - "idx": 14, - "version": "7", - "when": 1754831765718, - "tag": "0014_foamy_vapor", - "breakpoints": true - }, - { - "idx": 15, - "version": "7", - "when": 1755443936046, - "tag": "0015_wakeful_norman_osborn", - "breakpoints": true - }, - { - "idx": 16, - "version": "7", - "when": 1755780572342, - "tag": "0016_lonely_mariko_yashida", - "breakpoints": true - }, - { - "idx": 17, - "version": "7", - "when": 1755961566627, - "tag": "0017_tranquil_shooting_star", - "breakpoints": true - }, - { - "idx": 18, - "version": "7", - "when": 1756911118035, - "tag": "0018_flawless_owl", - "breakpoints": true - }, - { - "idx": 19, - "version": "7", - "when": 1756937533843, - "tag": "0019_confused_scream", - "breakpoints": true - }, - { - "idx": 20, - "version": "7", - "when": 1757860242528, - "tag": "0020_panoramic_wolverine", - "breakpoints": true - }, - { - "idx": 21, - "version": "7", - "when": 1759412986134, - "tag": "0021_nosy_veda", - "breakpoints": true - }, - { - "idx": 22, - "version": "7", - "when": 1759701622932, - "tag": "0022_complete_triton", - "breakpoints": true - }, - { - "idx": 23, - "version": "7", - "when": 1760354094610, - "tag": "0023_swift_swordsman", - "breakpoints": true - }, - { - "idx": 24, - "version": "7", - "when": 1772842674479, - "tag": "0024_careful_black_panther", - "breakpoints": true - }, - { - "idx": 25, - "version": "7", - "when": 1773013461190, - "tag": "0025_peaceful_grim_reaper", - "breakpoints": true - }, - { - "idx": 26, - "version": "7", - "when": 1773326266420, - "tag": "0026_pink_fantastic_four", - "breakpoints": true - }, - { - "idx": 27, - "version": "7", - "when": 1773768709477, - "tag": "0027_black_morph", - "breakpoints": true - }, - { - "idx": 28, - "version": "7", - "when": 1773770326402, - "tag": "0028_youthful_kitty_pryde", - "breakpoints": true - }, - { - "idx": 29, - "version": "7", - "when": 1773927678269, - "tag": "0029_lethal_brood", - "breakpoints": true - }, - { - "idx": 30, - "version": "7", - "when": 1774440788278, - "tag": "0030_strong_ultron", - "breakpoints": true - } - ] -} + "version": "7", + "dialect": "postgresql", + "entries": [ + { + "idx": 0, + "version": "7", + "when": 1752225352591, + "tag": "0000_amusing_namora", + "breakpoints": true + }, + { + "idx": 1, + "version": "7", + "when": 1752326803882, + "tag": "0001_odd_night_thrasher", + "breakpoints": true + }, + { + "idx": 2, + "version": "7", + "when": 1752332648392, + "tag": "0002_lethal_quentin_quire", + "breakpoints": true + }, + { + "idx": 3, + "version": "7", + "when": 1752332967084, + "tag": "0003_petite_wrecker", + "breakpoints": true + }, + { + "idx": 4, + "version": "7", + "when": 1752606108876, + "tag": "0004_sleepy_paper_doll", + "breakpoints": true + }, + { + "idx": 5, + "version": "7", + "when": 1752606327253, + "tag": "0005_chunky_sue_storm", + "breakpoints": true + }, + { + "idx": 6, + "version": "7", + "when": 1753112018514, + "tag": "0006_majestic_caretaker", + "breakpoints": true + }, + { + "idx": 7, + "version": "7", + "when": 1753190159356, + "tag": "0007_handy_archangel", + "breakpoints": true + }, + { + "idx": 8, + "version": "7", + "when": 1753370737317, + "tag": "0008_eminent_the_spike", + "breakpoints": true + }, + { + "idx": 9, + "version": "7", + "when": 1754337938241, + "tag": "0009_late_lenny_balinger", + "breakpoints": true + }, + { + "idx": 10, + "version": "7", + "when": 1754420780849, + "tag": "0010_perpetual_lightspeed", + "breakpoints": true + }, + { + "idx": 11, + "version": "7", + "when": 1754422064158, + "tag": "0011_tan_blackheart", + "breakpoints": true + }, + { + "idx": 12, + "version": "7", + "when": 1754476962901, + "tag": "0012_warm_the_stranger", + "breakpoints": true + }, + { + "idx": 13, + "version": "7", + "when": 1754659373517, + "tag": "0013_classy_talkback", + "breakpoints": true + }, + { + "idx": 14, + "version": "7", + "when": 1754831765718, + "tag": "0014_foamy_vapor", + "breakpoints": true + }, + { + "idx": 15, + "version": "7", + "when": 1755443936046, + "tag": "0015_wakeful_norman_osborn", + "breakpoints": true + }, + { + "idx": 16, + "version": "7", + "when": 1755780572342, + "tag": "0016_lonely_mariko_yashida", + "breakpoints": true + }, + { + "idx": 17, + "version": "7", + "when": 1755961566627, + "tag": "0017_tranquil_shooting_star", + "breakpoints": true + }, + { + "idx": 18, + "version": "7", + "when": 1756911118035, + "tag": "0018_flawless_owl", + "breakpoints": true + }, + { + "idx": 19, + "version": "7", + "when": 1756937533843, + "tag": "0019_confused_scream", + "breakpoints": true + }, + { + "idx": 20, + "version": "7", + "when": 1757860242528, + "tag": "0020_panoramic_wolverine", + "breakpoints": true + }, + { + "idx": 21, + "version": "7", + "when": 1759412986134, + "tag": "0021_nosy_veda", + "breakpoints": true + }, + { + "idx": 22, + "version": "7", + "when": 1759701622932, + "tag": "0022_complete_triton", + "breakpoints": true + }, + { + "idx": 23, + "version": "7", + "when": 1760354094610, + "tag": "0023_swift_swordsman", + "breakpoints": true + }, + { + "idx": 24, + "version": "7", + "when": 1772842674479, + "tag": "0024_careful_black_panther", + "breakpoints": true + }, + { + "idx": 25, + "version": "7", + "when": 1773013461190, + "tag": "0025_peaceful_grim_reaper", + "breakpoints": true + }, + { + "idx": 26, + "version": "7", + "when": 1773326266420, + "tag": "0026_pink_fantastic_four", + "breakpoints": true + }, + { + "idx": 27, + "version": "7", + "when": 1773768709477, + "tag": "0027_black_morph", + "breakpoints": true + }, + { + "idx": 28, + "version": "7", + "when": 1773770326402, + "tag": "0028_youthful_kitty_pryde", + "breakpoints": true + }, + { + "idx": 29, + "version": "7", + "when": 1773927678269, + "tag": "0029_lethal_brood", + "breakpoints": true + }, + { + "idx": 30, + "version": "7", + "when": 1774440788278, + "tag": "0030_strong_ultron", + "breakpoints": true + }, + { + "idx": 31, + "version": "7", + "when": 1774623960683, + "tag": "0031_bouncy_boomerang", + "breakpoints": true + } + ] +} \ No newline at end of file diff --git a/packages/backend/src/database/schema/ingestion-sources.ts b/packages/backend/src/database/schema/ingestion-sources.ts index 5907a0c..f065809 100644 --- a/packages/backend/src/database/schema/ingestion-sources.ts +++ b/packages/backend/src/database/schema/ingestion-sources.ts @@ -1,4 +1,4 @@ -import { jsonb, pgEnum, pgTable, text, timestamp, uuid } from 'drizzle-orm/pg-core'; +import { boolean, jsonb, pgEnum, pgTable, text, timestamp, uuid } from 'drizzle-orm/pg-core'; import { users } from './users'; import { relations } from 'drizzle-orm'; @@ -34,6 +34,7 @@ export const ingestionSources = pgTable('ingestion_sources', { lastSyncFinishedAt: timestamp('last_sync_finished_at', { withTimezone: true }), lastSyncStatusMessage: text('last_sync_status_message'), syncState: jsonb('sync_state'), + preserveOriginalFile: boolean('preserve_original_file').notNull().default(false), createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(), updatedAt: timestamp('updated_at', { withTimezone: true }).notNull().defaultNow(), }); diff --git a/packages/backend/src/services/ArchivedEmailService.ts b/packages/backend/src/services/ArchivedEmailService.ts index ec7518f..8d1af61 100644 --- a/packages/backend/src/services/ArchivedEmailService.ts +++ b/packages/backend/src/services/ArchivedEmailService.ts @@ -21,6 +21,7 @@ import { AuditService } from './AuditService'; import { User } from '@open-archiver/types'; import { checkDeletionEnabled } from '../helpers/deletionGuard'; import { RetentionHook } from '../hooks/RetentionHook'; +import { logger } from '../config/logger'; interface DbRecipients { to: { name: string; address: string }[]; @@ -263,7 +264,13 @@ export class ArchivedEmailService { } } } catch (error) { - console.error('Failed to delete email attachments', error); + logger.error( + { + emailId, + error: error instanceof Error ? error.message : String(error), + }, + 'Failed to delete email attachments' + ); throw new Error('Failed to delete email attachments'); } } diff --git a/packages/backend/src/services/EmailProviderFactory.ts b/packages/backend/src/services/EmailProviderFactory.ts index 42bd8ad..d52d592 100644 --- a/packages/backend/src/services/EmailProviderFactory.ts +++ b/packages/backend/src/services/EmailProviderFactory.ts @@ -17,6 +17,18 @@ import { PSTConnector } from './ingestion-connectors/PSTConnector'; import { EMLConnector } from './ingestion-connectors/EMLConnector'; import { MboxConnector } from './ingestion-connectors/MboxConnector'; +/** + * Options passed to connectors to control ingestion behaviour. + * Currently used to skip extracting full attachment binary content + * in preserve-original-file (GoBD) mode, where attachments are never + * stored separately and the raw EML is kept as-is. + */ +export interface ConnectorOptions { + /** When true, connectors omit attachment binary content from the + * yielded EmailObject to avoid unnecessary memory allocation. */ + preserveOriginalFile: boolean; +} + // Define a common interface for all connectors export interface IEmailConnector { testConnection(): Promise; @@ -34,20 +46,26 @@ export class EmailProviderFactory { static createConnector(source: IngestionSource): IEmailConnector { // Credentials are now decrypted by the IngestionService before being passed around const credentials = source.credentials; + const options: ConnectorOptions = { + preserveOriginalFile: source.preserveOriginalFile ?? false, + }; switch (source.provider) { case 'google_workspace': - return new GoogleWorkspaceConnector(credentials as GoogleWorkspaceCredentials); + return new GoogleWorkspaceConnector( + credentials as GoogleWorkspaceCredentials, + options + ); case 'microsoft_365': - return new MicrosoftConnector(credentials as Microsoft365Credentials); + return new MicrosoftConnector(credentials as Microsoft365Credentials, options); case 'generic_imap': - return new ImapConnector(credentials as GenericImapCredentials); + return new ImapConnector(credentials as GenericImapCredentials, options); case 'pst_import': - return new PSTConnector(credentials as PSTImportCredentials); + return new PSTConnector(credentials as PSTImportCredentials, options); case 'eml_import': - return new EMLConnector(credentials as EMLImportCredentials); + return new EMLConnector(credentials as EMLImportCredentials, options); case 'mbox_import': - return new MboxConnector(credentials as MboxImportCredentials); + return new MboxConnector(credentials as MboxImportCredentials, options); default: throw new Error(`Unsupported provider: ${source.provider}`); } diff --git a/packages/backend/src/services/IndexingService.ts b/packages/backend/src/services/IndexingService.ts index a2f847a..e954f73 100644 --- a/packages/backend/src/services/IndexingService.ts +++ b/packages/backend/src/services/IndexingService.ts @@ -12,7 +12,7 @@ import { DatabaseService } from './DatabaseService'; import { archivedEmails, attachments, emailAttachments } from '../database/schema'; import { eq } from 'drizzle-orm'; import { streamToBuffer } from '../helpers/streamToBuffer'; -import { simpleParser } from 'mailparser'; +import { simpleParser, type Attachment as ParsedAttachment } from 'mailparser'; import { logger } from '../config/logger'; interface DbRecipients { @@ -351,9 +351,13 @@ export class IndexingService { content: textContent, }); } catch (error) { - console.error( - `Failed to extract text from attachment: ${attachment.filename}`, - error + logger.error( + { + filename: attachment.filename, + mimeType: attachment.mimeType, + error: error instanceof Error ? error.message : String(error), + }, + 'Failed to extract text from attachment' ); } } @@ -378,8 +382,6 @@ export class IndexingService { attachments: Attachment[], userEmail: string //the owner of the email inbox ): Promise { - const attachmentContents = await this.extractAttachmentContents(attachments); - const emailBodyStream = await this.storageService.get(email.storagePath); const emailBodyBuffer = await streamToBuffer(emailBodyStream); const parsedEmail = await simpleParser(emailBodyBuffer); @@ -389,6 +391,20 @@ export class IndexingService { (await extractText(emailBodyBuffer, 'text/plain')) || ''; + // If there are linked attachment records, extract text from storage (default mode). + // Otherwise, if the email has attachments but no records (preserve original file mode), + // extract attachment text directly from the parsed EML body. + let attachmentContents: { filename: string; content: string }[]; + if (attachments.length > 0) { + attachmentContents = await this.extractAttachmentContents(attachments); + } else if (email.hasAttachments && parsedEmail.attachments.length > 0) { + attachmentContents = await this.extractInlineAttachmentContents( + parsedEmail.attachments + ); + } else { + attachmentContents = []; + } + const recipients = email.recipients as DbRecipients; // console.log('email.userEmail', email.userEmail); return { @@ -406,6 +422,40 @@ export class IndexingService { }; } + /** + * Extracts text content from attachments embedded in the parsed EML. + * Used in preserve-original-file (GoBD) mode where no separate attachment + * records exist — the full MIME body is stored unmodified, so we parse + * attachments directly from the in-memory parsed email. + */ + private async extractInlineAttachmentContents( + parsedAttachments: ParsedAttachment[] + ): Promise<{ filename: string; content: string }[]> { + const extracted: { filename: string; content: string }[] = []; + for (const attachment of parsedAttachments) { + try { + const textContent = await extractText( + attachment.content, + attachment.contentType || '' + ); + extracted.push({ + filename: attachment.filename || 'untitled', + content: textContent, + }); + } catch (error) { + logger.warn( + { + filename: attachment.filename, + mimeType: attachment.contentType, + error: error instanceof Error ? error.message : String(error), + }, + 'Failed to extract text from inline attachment in preserve-original mode' + ); + } + } + return extracted; + } + private async extractAttachmentContents( attachments: Attachment[] ): Promise<{ filename: string; content: string }[]> { diff --git a/packages/backend/src/services/IngestionService.ts b/packages/backend/src/services/IngestionService.ts index e3388af..ec0d2d8 100644 --- a/packages/backend/src/services/IngestionService.ts +++ b/packages/backend/src/services/IngestionService.ts @@ -22,6 +22,7 @@ import { emailAttachments, } from '../database/schema'; import { createHash, randomUUID } from 'crypto'; +import { readFile, unlink } from 'fs/promises'; import { logger } from '../config/logger'; import { SearchService } from './SearchService'; import { config } from '../config/index'; @@ -420,6 +421,9 @@ export class IngestionService { userEmail: string ): Promise { try { + // Read the raw bytes from the temp file written by the connector + const rawEmlBuffer = await readFile(email.tempFilePath); + // Generate a unique message ID for the email. If the email already has a message-id header, use that. // Otherwise, generate a new one based on the email's hash, source ID, and email ID. const messageIdHeader = email.headers.get('message-id'); @@ -431,7 +435,7 @@ export class IngestionService { } if (!messageId) { messageId = `generated-${createHash('sha256') - .update(email.eml ?? Buffer.from(email.body, 'utf-8')) + .update(rawEmlBuffer) .digest('hex')}-${source.id}-${email.id}`; } // Check if an email with the same message ID has already been imported for the current ingestion source. This is to prevent duplicate imports when an email is present in multiple mailboxes (e.g., "Inbox" and "All Mail"). @@ -450,13 +454,70 @@ export class IngestionService { return null; } - const rawEmlBuffer = email.eml ?? Buffer.from(email.body, 'utf-8'); - // Strip non-inline attachments from the .eml to avoid double-storing + const sanitizedPath = email.path ? email.path : ''; + const emailPath = `${config.storage.openArchiverFolderName}/${source.name.replaceAll(' ', '-')}-${source.id}/emails/${sanitizedPath}${email.id}.eml`; + + // GoBD / Preserve Original File mode: store the unmodified raw EML as-is. + // No attachment stripping, no attachment table records — the full MIME body + // including attachments is preserved in the single .eml file. + if (source.preserveOriginalFile) { + const emailHash = createHash('sha256').update(rawEmlBuffer).digest('hex'); + + // Message-level deduplication by file hash + const hashDuplicate = await db.query.archivedEmails.findFirst({ + where: and( + eq(archivedEmails.storageHashSha256, emailHash), + eq(archivedEmails.ingestionSourceId, source.id) + ), + columns: { id: true }, + }); + + if (hashDuplicate) { + logger.info( + { emailHash, ingestionSourceId: source.id }, + 'Skipping duplicate email (hash-level dedup, preserve original mode)' + ); + return null; + } + + // Store the unmodified raw buffer — no modifications + await storage.put(emailPath, rawEmlBuffer); + + const [archivedEmail] = await db + .insert(archivedEmails) + .values({ + ingestionSourceId: source.id, + userEmail, + threadId: email.threadId, + messageIdHeader: messageId, + providerMessageId: email.id, + sentAt: email.receivedAt, + subject: email.subject, + senderName: email.from[0]?.name, + senderEmail: email.from[0]?.address, + recipients: { + to: email.to, + cc: email.cc, + bcc: email.bcc, + }, + storagePath: emailPath, + storageHashSha256: emailHash, + sizeBytes: rawEmlBuffer.length, + hasAttachments: email.attachments.length > 0, + path: email.path, + tags: email.tags, + }) + .returning(); + + return { + archivedEmailId: archivedEmail.id, + }; + } + + // Default mode: strip non-inline attachments from the .eml to avoid double-storing // attachment data (attachments are stored separately). const emlBuffer = await stripAttachmentsFromEml(rawEmlBuffer); const emailHash = createHash('sha256').update(emlBuffer).digest('hex'); - const sanitizedPath = email.path ? email.path : ''; - const emailPath = `${config.storage.openArchiverFolderName}/${source.name.replaceAll(' ', '-')}-${source.id}/emails/${sanitizedPath}${email.id}.eml`; await storage.put(emailPath, emlBuffer); const [archivedEmail] = await db @@ -564,6 +625,14 @@ export class IngestionService { ingestionSourceId: source.id, }); return null; + } finally { + // Always clean up the temp file, regardless of success or failure + await unlink(email.tempFilePath).catch((err) => + logger.warn( + { err, tempFilePath: email.tempFilePath }, + 'Failed to delete temp email file' + ) + ); } } } diff --git a/packages/backend/src/services/ingestion-connectors/EMLConnector.ts b/packages/backend/src/services/ingestion-connectors/EMLConnector.ts index 81aadac..bd86c14 100644 --- a/packages/backend/src/services/ingestion-connectors/EMLConnector.ts +++ b/packages/backend/src/services/ingestion-connectors/EMLConnector.ts @@ -5,10 +5,11 @@ import type { SyncState, MailboxUser, } from '@open-archiver/types'; -import type { IEmailConnector } from '../EmailProviderFactory'; +import type { IEmailConnector, ConnectorOptions } from '../EmailProviderFactory'; import { simpleParser, ParsedMail, Attachment, AddressObject } from 'mailparser'; import { logger } from '../../config/logger'; import { getThreadId } from './helpers/utils'; +import { writeEmailToTempFile } from './helpers/tempFile'; import { StorageService } from '../StorageService'; import { Readable } from 'stream'; import { createHash } from 'crypto'; @@ -27,8 +28,13 @@ const streamToBuffer = (stream: Readable): Promise => { export class EMLConnector implements IEmailConnector { private storage: StorageService; + private options: ConnectorOptions; - constructor(private credentials: EMLImportCredentials) { + constructor( + private credentials: EMLImportCredentials, + options?: ConnectorOptions + ) { + this.options = options ?? { preserveOriginalFile: false }; this.storage = new StorageService(); } @@ -266,13 +272,18 @@ export class EMLConnector implements IEmailConnector { emlBuffer = await streamToBuffer(input); } + const tempFilePath = await writeEmailToTempFile(emlBuffer); const parsedEmail: ParsedMail = await simpleParser(emlBuffer); + // In preserve-original mode, skip extracting full attachment binary content + // to avoid unnecessary memory allocation — the raw EML on disk is the source of truth. const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({ filename: attachment.filename || 'untitled', contentType: attachment.contentType, size: attachment.size, - content: attachment.content as Buffer, + content: this.options.preserveOriginalFile + ? Buffer.alloc(0) + : (attachment.content as Buffer), })); const mapAddresses = ( @@ -313,7 +324,7 @@ export class EMLConnector implements IEmailConnector { headers: parsedEmail.headers, attachments, receivedAt: parsedEmail.date || new Date(), - eml: emlBuffer, + tempFilePath, path, }; } diff --git a/packages/backend/src/services/ingestion-connectors/GoogleWorkspaceConnector.ts b/packages/backend/src/services/ingestion-connectors/GoogleWorkspaceConnector.ts index cd704f1..ca16f8b 100644 --- a/packages/backend/src/services/ingestion-connectors/GoogleWorkspaceConnector.ts +++ b/packages/backend/src/services/ingestion-connectors/GoogleWorkspaceConnector.ts @@ -7,10 +7,11 @@ import type { SyncState, MailboxUser, } from '@open-archiver/types'; -import type { IEmailConnector } from '../EmailProviderFactory'; +import type { IEmailConnector, ConnectorOptions } from '../EmailProviderFactory'; import { logger } from '../../config/logger'; import { simpleParser, ParsedMail, Attachment, AddressObject, Headers } from 'mailparser'; import { getThreadId } from './helpers/utils'; +import { writeEmailToTempFile } from './helpers/tempFile'; /** * A connector for Google Workspace that uses a service account with domain-wide delegation @@ -20,9 +21,11 @@ export class GoogleWorkspaceConnector implements IEmailConnector { private credentials: GoogleWorkspaceCredentials; private serviceAccountCreds: { client_email: string; private_key: string }; private newHistoryId: string | undefined; + private options: ConnectorOptions; - constructor(credentials: GoogleWorkspaceCredentials) { + constructor(credentials: GoogleWorkspaceCredentials, options?: ConnectorOptions) { this.credentials = credentials; + this.options = options ?? { preserveOriginalFile: false }; try { // Pre-parse the JSON key to catch errors early. const parsedKey = JSON.parse(this.credentials.serviceAccountKeyJson); @@ -201,48 +204,13 @@ export class GoogleWorkspaceConnector implements IEmailConnector { if (msgResponse.data.raw) { const rawEmail = Buffer.from(msgResponse.data.raw, 'base64url'); - const parsedEmail: ParsedMail = await simpleParser(rawEmail); - const attachments = parsedEmail.attachments.map( - (attachment: Attachment) => ({ - filename: attachment.filename || 'untitled', - contentType: attachment.contentType, - size: attachment.size, - content: attachment.content as Buffer, - }) + yield this.parseRawEmail( + rawEmail, + msgResponse.data.id!, + userEmail, + labels.path, + labels.tags ); - const mapAddresses = ( - addresses: AddressObject | AddressObject[] | undefined - ): EmailAddress[] => { - if (!addresses) return []; - const addressArray = Array.isArray(addresses) - ? addresses - : [addresses]; - return addressArray.flatMap((a) => - a.value.map((v) => ({ - name: v.name, - address: v.address || '', - })) - ); - }; - const threadId = getThreadId(parsedEmail.headers); - yield { - id: msgResponse.data.id!, - threadId, - userEmail: userEmail, - eml: rawEmail, - from: mapAddresses(parsedEmail.from), - to: mapAddresses(parsedEmail.to), - cc: mapAddresses(parsedEmail.cc), - bcc: mapAddresses(parsedEmail.bcc), - subject: parsedEmail.subject || '', - body: parsedEmail.text || '', - html: parsedEmail.html || '', - headers: parsedEmail.headers, - attachments, - receivedAt: parsedEmail.date || new Date(), - path: labels.path, - tags: labels.tags, - }; } } catch (error: any) { if (error.code === 404) { @@ -326,45 +294,13 @@ export class GoogleWorkspaceConnector implements IEmailConnector { if (msgResponse.data.raw) { const rawEmail = Buffer.from(msgResponse.data.raw, 'base64url'); - const parsedEmail: ParsedMail = await simpleParser(rawEmail); - const attachments = parsedEmail.attachments.map( - (attachment: Attachment) => ({ - filename: attachment.filename || 'untitled', - contentType: attachment.contentType, - size: attachment.size, - content: attachment.content as Buffer, - }) + yield this.parseRawEmail( + rawEmail, + msgResponse.data.id!, + userEmail, + labels.path, + labels.tags ); - const mapAddresses = ( - addresses: AddressObject | AddressObject[] | undefined - ): EmailAddress[] => { - if (!addresses) return []; - const addressArray = Array.isArray(addresses) - ? addresses - : [addresses]; - return addressArray.flatMap((a) => - a.value.map((v) => ({ name: v.name, address: v.address || '' })) - ); - }; - const threadId = getThreadId(parsedEmail.headers); - yield { - id: msgResponse.data.id!, - threadId, - userEmail: userEmail, - eml: rawEmail, - from: mapAddresses(parsedEmail.from), - to: mapAddresses(parsedEmail.to), - cc: mapAddresses(parsedEmail.cc), - bcc: mapAddresses(parsedEmail.bcc), - subject: parsedEmail.subject || '', - body: parsedEmail.text || '', - html: parsedEmail.html || '', - headers: parsedEmail.headers, - attachments, - receivedAt: parsedEmail.date || new Date(), - path: labels.path, - tags: labels.tags, - }; } } catch (error: any) { if (error.code === 404) { @@ -382,6 +318,63 @@ export class GoogleWorkspaceConnector implements IEmailConnector { } while (pageToken); } + /** + * Parses a raw email buffer into an EmailObject, extracting metadata via simpleParser. + * In preserve-original mode, attachment binary content is omitted to save memory. + */ + private async parseRawEmail( + rawEmail: Buffer, + messageId: string, + userEmail: string, + path: string, + tags: string[] + ): Promise { + const tempFilePath = await writeEmailToTempFile(rawEmail); + const parsedEmail: ParsedMail = await simpleParser(rawEmail); + + // In preserve-original mode, skip extracting full attachment binary content + // to avoid unnecessary memory allocation — the raw EML on disk is the source of truth. + const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({ + filename: attachment.filename || 'untitled', + contentType: attachment.contentType, + size: attachment.size, + content: this.options.preserveOriginalFile + ? Buffer.alloc(0) + : (attachment.content as Buffer), + })); + + const mapAddresses = ( + addresses: AddressObject | AddressObject[] | undefined + ): EmailAddress[] => { + if (!addresses) return []; + const addressArray = Array.isArray(addresses) ? addresses : [addresses]; + return addressArray.flatMap((a) => + a.value.map((v) => ({ name: v.name, address: v.address || '' })) + ); + }; + + const threadId = getThreadId(parsedEmail.headers); + + return { + id: messageId, + threadId, + userEmail, + tempFilePath, + from: mapAddresses(parsedEmail.from), + to: mapAddresses(parsedEmail.to), + cc: mapAddresses(parsedEmail.cc), + bcc: mapAddresses(parsedEmail.bcc), + subject: parsedEmail.subject || '', + body: parsedEmail.text || '', + html: parsedEmail.html || '', + headers: parsedEmail.headers, + attachments, + receivedAt: parsedEmail.date || new Date(), + path, + tags, + }; + } + public getUpdatedSyncState(userEmail: string): SyncState { if (!this.newHistoryId) { return {}; diff --git a/packages/backend/src/services/ingestion-connectors/ImapConnector.ts b/packages/backend/src/services/ingestion-connectors/ImapConnector.ts index d843ed7..2764052 100644 --- a/packages/backend/src/services/ingestion-connectors/ImapConnector.ts +++ b/packages/backend/src/services/ingestion-connectors/ImapConnector.ts @@ -5,19 +5,25 @@ import type { SyncState, MailboxUser, } from '@open-archiver/types'; -import type { IEmailConnector } from '../EmailProviderFactory'; +import type { IEmailConnector, ConnectorOptions } from '../EmailProviderFactory'; import { ImapFlow } from 'imapflow'; import { simpleParser, ParsedMail, Attachment, AddressObject, Headers } from 'mailparser'; import { config } from '../../config'; import { logger } from '../../config/logger'; import { getThreadId } from './helpers/utils'; +import { writeEmailToTempFile } from './helpers/tempFile'; export class ImapConnector implements IEmailConnector { private client: ImapFlow; private newMaxUids: { [mailboxPath: string]: number } = {}; private statusMessage: string | undefined; + private options: ConnectorOptions; - constructor(private credentials: GenericImapCredentials) { + constructor( + private credentials: GenericImapCredentials, + options?: ConnectorOptions + ) { + this.options = options ?? { preserveOriginalFile: false }; this.client = this.createClient(); } @@ -298,12 +304,21 @@ export class ImapConnector implements IEmailConnector { } private async parseMessage(msg: any, mailboxPath: string): Promise { + // Write raw bytes to temp file to keep large buffers off the JS heap + const tempFilePath = await writeEmailToTempFile(msg.source); + + // Parse only for metadata extraction (read-only) const parsedEmail: ParsedMail = await simpleParser(msg.source); + + // In preserve-original mode, skip extracting full attachment binary content + // to avoid unnecessary memory allocation — the raw EML on disk is the source of truth. const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({ filename: attachment.filename || 'untitled', contentType: attachment.contentType, size: attachment.size, - content: attachment.content as Buffer, + content: this.options.preserveOriginalFile + ? Buffer.alloc(0) + : (attachment.content as Buffer), })); const mapAddresses = ( @@ -331,7 +346,7 @@ export class ImapConnector implements IEmailConnector { headers: parsedEmail.headers, attachments, receivedAt: parsedEmail.date || new Date(), - eml: msg.source, + tempFilePath, path: mailboxPath, }; } diff --git a/packages/backend/src/services/ingestion-connectors/MboxConnector.ts b/packages/backend/src/services/ingestion-connectors/MboxConnector.ts index b9c8005..9d5707b 100644 --- a/packages/backend/src/services/ingestion-connectors/MboxConnector.ts +++ b/packages/backend/src/services/ingestion-connectors/MboxConnector.ts @@ -5,10 +5,11 @@ import type { SyncState, MailboxUser, } from '@open-archiver/types'; -import type { IEmailConnector } from '../EmailProviderFactory'; +import type { IEmailConnector, ConnectorOptions } from '../EmailProviderFactory'; import { simpleParser, ParsedMail, Attachment, AddressObject } from 'mailparser'; import { logger } from '../../config/logger'; import { getThreadId } from './helpers/utils'; +import { writeEmailToTempFile } from './helpers/tempFile'; import { StorageService } from '../StorageService'; import { Readable, Transform } from 'stream'; import { createHash } from 'crypto'; @@ -54,8 +55,13 @@ class MboxSplitter extends Transform { export class MboxConnector implements IEmailConnector { private storage: StorageService; + private options: ConnectorOptions; - constructor(private credentials: MboxImportCredentials) { + constructor( + private credentials: MboxImportCredentials, + options?: ConnectorOptions + ) { + this.options = options ?? { preserveOriginalFile: false }; this.storage = new StorageService(); } @@ -164,14 +170,42 @@ export class MboxConnector implements IEmailConnector { } } - private async parseMessage(emlBuffer: Buffer, path: string): Promise { + /** + * Strips the mbox "From " envelope line from the raw buffer. + * The mbox format prepends each message with a "From sender@... timestamp\n" + * line that is NOT part of the RFC 5322 message. Storing this line in the + * .eml would produce an invalid file and corrupt the SHA-256 hash for GoBD + * compliance purposes. + */ + private stripMboxEnvelope(buffer: Buffer): Buffer { + // The "From " line ends at the first \n — everything after is the real RFC 5322 message. + const fromPrefix = Buffer.from('From '); + if (buffer.subarray(0, fromPrefix.length).equals(fromPrefix)) { + const newlineIndex = buffer.indexOf(0x0a); // \n + if (newlineIndex !== -1) { + return buffer.subarray(newlineIndex + 1); + } + } + return buffer; + } + + private async parseMessage(rawMboxBuffer: Buffer, path: string): Promise { + // Strip the mbox "From " envelope line before writing to temp file. + // This line is an mbox transport artifact, not part of the RFC 5322 message. + const emlBuffer = this.stripMboxEnvelope(rawMboxBuffer); + + const tempFilePath = await writeEmailToTempFile(emlBuffer); const parsedEmail: ParsedMail = await simpleParser(emlBuffer); + // In preserve-original mode, skip extracting full attachment binary content + // to avoid unnecessary memory allocation — the raw EML on disk is the source of truth. const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({ filename: attachment.filename || 'untitled', contentType: attachment.contentType, size: attachment.size, - content: attachment.content as Buffer, + content: this.options.preserveOriginalFile + ? Buffer.alloc(0) + : (attachment.content as Buffer), })); const mapAddresses = ( @@ -226,7 +260,7 @@ export class MboxConnector implements IEmailConnector { headers: parsedEmail.headers, attachments, receivedAt: parsedEmail.date || new Date(), - eml: emlBuffer, + tempFilePath, path: finalPath, }; } diff --git a/packages/backend/src/services/ingestion-connectors/MicrosoftConnector.ts b/packages/backend/src/services/ingestion-connectors/MicrosoftConnector.ts index 83e9457..89cc371 100644 --- a/packages/backend/src/services/ingestion-connectors/MicrosoftConnector.ts +++ b/packages/backend/src/services/ingestion-connectors/MicrosoftConnector.ts @@ -6,9 +6,10 @@ import type { SyncState, MailboxUser, } from '@open-archiver/types'; -import type { IEmailConnector } from '../EmailProviderFactory'; +import type { IEmailConnector, ConnectorOptions } from '../EmailProviderFactory'; import { logger } from '../../config/logger'; import { simpleParser, ParsedMail, Attachment, AddressObject } from 'mailparser'; +import { writeEmailToTempFile } from './helpers/tempFile'; import { ConfidentialClientApplication, Configuration, LogLevel } from '@azure/msal-node'; import { Client } from '@microsoft/microsoft-graph-client'; import type { User, MailFolder } from 'microsoft-graph'; @@ -23,9 +24,11 @@ export class MicrosoftConnector implements IEmailConnector { private graphClient: Client; // Store delta tokens for each folder during a sync operation. private newDeltaTokens: { [folderId: string]: string }; + private options: ConnectorOptions; - constructor(credentials: Microsoft365Credentials) { + constructor(credentials: Microsoft365Credentials, options?: ConnectorOptions) { this.credentials = credentials; + this.options = options ?? { preserveOriginalFile: false }; this.newDeltaTokens = {}; // Initialize as an empty object const msalConfig: Configuration = { @@ -299,12 +302,18 @@ export class MicrosoftConnector implements IEmailConnector { userEmail: string, path: string ): Promise { + const tempFilePath = await writeEmailToTempFile(rawEmail); const parsedEmail: ParsedMail = await simpleParser(rawEmail); + + // In preserve-original mode, skip extracting full attachment binary content + // to avoid unnecessary memory allocation — the raw EML on disk is the source of truth. const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({ filename: attachment.filename || 'untitled', contentType: attachment.contentType, size: attachment.size, - content: attachment.content as Buffer, + content: this.options.preserveOriginalFile + ? Buffer.alloc(0) + : (attachment.content as Buffer), })); const mapAddresses = ( addresses: AddressObject | AddressObject[] | undefined @@ -319,7 +328,7 @@ export class MicrosoftConnector implements IEmailConnector { return { id: messageId, userEmail: userEmail, - eml: rawEmail, + tempFilePath, from: mapAddresses(parsedEmail.from), to: mapAddresses(parsedEmail.to), cc: mapAddresses(parsedEmail.cc), diff --git a/packages/backend/src/services/ingestion-connectors/PSTConnector.ts b/packages/backend/src/services/ingestion-connectors/PSTConnector.ts index 6499d42..f3d9e88 100644 --- a/packages/backend/src/services/ingestion-connectors/PSTConnector.ts +++ b/packages/backend/src/services/ingestion-connectors/PSTConnector.ts @@ -5,13 +5,13 @@ import type { SyncState, MailboxUser, } from '@open-archiver/types'; -import type { IEmailConnector } from '../EmailProviderFactory'; +import type { IEmailConnector, ConnectorOptions } from '../EmailProviderFactory'; import { PSTFile, PSTFolder, PSTMessage } from 'pst-extractor'; import { simpleParser, ParsedMail, Attachment, AddressObject } from 'mailparser'; import { logger } from '../../config/logger'; import { getThreadId } from './helpers/utils'; +import { writeEmailToTempFile } from './helpers/tempFile'; import { StorageService } from '../StorageService'; -import { Readable } from 'stream'; import { createHash } from 'crypto'; import { join } from 'path'; import { createWriteStream, createReadStream, promises as fs } from 'fs'; @@ -106,8 +106,13 @@ const JUNK_FOLDERS = new Set([ export class PSTConnector implements IEmailConnector { private storage: StorageService; + private options: ConnectorOptions; - constructor(private credentials: PSTImportCredentials) { + constructor( + private credentials: PSTImportCredentials, + options?: ConnectorOptions + ) { + this.options = options ?? { preserveOriginalFile: false }; this.storage = new StorageService(); } @@ -263,7 +268,10 @@ export class PSTConnector implements IEmailConnector { try { email = folder.getNextChild(); } catch (error) { - console.warn("Folder doesn't have child"); + logger.warn( + { folder: folder.displayName, error }, + "Folder doesn't have child or failed to read next child." + ); email = null; } } @@ -283,13 +291,18 @@ export class PSTConnector implements IEmailConnector { ): Promise { const emlContent = await this.constructEml(msg); const emlBuffer = Buffer.from(emlContent, 'utf-8'); + const tempFilePath = await writeEmailToTempFile(emlBuffer); const parsedEmail: ParsedMail = await simpleParser(emlBuffer); + // In preserve-original mode, skip extracting full attachment binary content + // to avoid unnecessary memory allocation — the raw EML on disk is the source of truth. const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({ filename: attachment.filename || 'untitled', contentType: attachment.contentType, size: attachment.size, - content: attachment.content as Buffer, + content: this.options.preserveOriginalFile + ? Buffer.alloc(0) + : (attachment.content as Buffer), })); const mapAddresses = ( @@ -336,7 +349,7 @@ export class PSTConnector implements IEmailConnector { headers: parsedEmail.headers, attachments, receivedAt: parsedEmail.date || new Date(), - eml: emlBuffer, + tempFilePath, path, }; } diff --git a/packages/backend/src/services/ingestion-connectors/helpers/tempFile.ts b/packages/backend/src/services/ingestion-connectors/helpers/tempFile.ts new file mode 100644 index 0000000..a5b0368 --- /dev/null +++ b/packages/backend/src/services/ingestion-connectors/helpers/tempFile.ts @@ -0,0 +1,15 @@ +import { tmpdir } from 'os'; +import { join } from 'path'; +import { writeFile } from 'fs/promises'; +import { randomUUID } from 'crypto'; + +/** + * Writes a raw email buffer to a temporary file on disk and returns the path. + * This keeps large buffers off the JS heap between connector yield and processEmail(). + * The caller (IngestionService.processEmail) is responsible for deleting the file. + */ +export async function writeEmailToTempFile(buffer: Buffer): Promise { + const tempFilePath = join(tmpdir(), `oa-email-${randomUUID()}.eml`); + await writeFile(tempFilePath, buffer); + return tempFilePath; +} diff --git a/packages/frontend/package.json b/packages/frontend/package.json index 787556d..fef99d0 100644 --- a/packages/frontend/package.json +++ b/packages/frontend/package.json @@ -28,7 +28,8 @@ "svelte-persisted-store": "^0.12.0", "sveltekit-i18n": "^2.4.2", "tailwind-merge": "^3.3.1", - "tailwind-variants": "^1.0.0" + "tailwind-variants": "^1.0.0", + "tippy.js": "^6.3.7" }, "devDependencies": { "@internationalized/date": "^3.8.2", diff --git a/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte b/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte index c6e1e14..14e96aa 100644 --- a/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte +++ b/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte @@ -11,7 +11,9 @@ import { Textarea } from '$lib/components/ui/textarea/index.js'; import { setAlert } from '$lib/components/custom/alert/alert-state.svelte'; import { api } from '$lib/api.client'; - import { Loader2 } from 'lucide-svelte'; + import { Loader2, Info } from 'lucide-svelte'; + import tippy from 'tippy.js'; + import 'tippy.js/dist/tippy.css'; import { t } from '$lib/translations'; let { source = null, @@ -56,6 +58,7 @@ secure: true, allowInsecureCert: false, }, + preserveOriginalFile: source?.preserveOriginalFile ?? false, }); $effect(() => { @@ -439,6 +442,29 @@ {/if} + +
+
+ + + + +
+ +
+ + + {/each} + + + {/if} @@ -920,6 +1014,38 @@ + + + + + + + {$t('app.archive.embedded_attachment_title')} + + + {selectedEmbeddedFilename} +

+ {$t('app.archive.embedded_attachment_description')} +
+
+ + + + + + +
+
{:else}

{$t('app.archive.not_found')}

{/if} diff --git a/packages/types/src/email.types.ts b/packages/types/src/email.types.ts index 03be5d3..16531fb 100644 --- a/packages/types/src/email.types.ts +++ b/packages/types/src/email.types.ts @@ -45,8 +45,10 @@ export interface EmailObject { attachments: EmailAttachment[]; /** The date and time when the email was received. */ receivedAt: Date; - /** An optional buffer containing the full raw EML content of the email, which is useful for archival and compliance purposes. */ - eml?: Buffer; + /** Path to a temporary file on disk containing the raw EML bytes. + * Connectors write the raw email to tmpdir() and pass only the path, + * keeping large buffers off the JS heap between yield and processEmail(). */ + tempFilePath: string; /** The email address of the user whose mailbox this email belongs to. */ userEmail?: string; /** The folder path of the email in the source mailbox. */ diff --git a/packages/types/src/ingestion.types.ts b/packages/types/src/ingestion.types.ts index 72bd836..e398286 100644 --- a/packages/types/src/ingestion.types.ts +++ b/packages/types/src/ingestion.types.ts @@ -120,6 +120,9 @@ export interface IngestionSource { lastSyncFinishedAt?: Date | null; lastSyncStatusMessage?: string | null; syncState?: SyncState | null; + /** When true, the raw EML file is stored without any modification (no attachment + * stripping). Required for GoBD / SEC 17a-4 compliance. Defaults to false. */ + preserveOriginalFile: boolean; } /** @@ -133,6 +136,8 @@ export interface CreateIngestionSourceDto { name: string; provider: IngestionProvider; providerConfig: Record; + /** Store the unmodified raw EML for GoBD compliance. Defaults to false. */ + preserveOriginalFile?: boolean; } export interface UpdateIngestionSourceDto { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c0f56eb..7018014 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -334,6 +334,9 @@ importers: tailwind-variants: specifier: ^1.0.0 version: 1.0.0(tailwindcss@4.1.11) + tippy.js: + specifier: ^6.3.7 + version: 6.3.7 devDependencies: '@internationalized/date': specifier: ^3.8.2 @@ -1313,6 +1316,9 @@ packages: '@polka/url@1.0.0-next.29': resolution: {integrity: sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==} + '@popperjs/core@2.11.8': + resolution: {integrity: sha512-P1st0aksCrn9sGZhp8GMYwBnQsbvAWsZAX44oXNNvLHGqAOcoVxmjZiohstwQ7SqKnbR47akdNi+uleWD8+g6A==} + '@rollup/plugin-commonjs@28.0.6': resolution: {integrity: sha512-XSQB1K7FUU5QP+3lOQmVCE3I0FcbbNvmNT4VJSj93iUjayaARrTQeoRdiYQoftAJBLrR9t2agwAd3ekaTgHNlw==} engines: {node: '>=16.0.0 || 14 >= 14.17'} @@ -4580,6 +4586,9 @@ packages: resolution: {integrity: sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==} engines: {node: '>=12.0.0'} + tippy.js@6.3.7: + resolution: {integrity: sha512-E1d3oP2emgJ9dRQZdf3Kkn0qJgI6ZLpyS5z6ZkY1DF3kaQaBsGZsndEpHwx+eC+tYM41HaSNvNtLx8tU57FzTQ==} + tlds@1.259.0: resolution: {integrity: sha512-AldGGlDP0PNgwppe2quAvuBl18UcjuNtOnDuUkqhd6ipPqrYYBt3aTxK1QTsBVknk97lS2JcafWMghjGWFtunw==} hasBin: true @@ -5996,6 +6005,8 @@ snapshots: '@polka/url@1.0.0-next.29': {} + '@popperjs/core@2.11.8': {} + '@rollup/plugin-commonjs@28.0.6(rollup@4.44.2)': dependencies: '@rollup/pluginutils': 5.2.0(rollup@4.44.2) @@ -9535,6 +9546,10 @@ snapshots: fdir: 6.4.6(picomatch@4.0.2) picomatch: 4.0.2 + tippy.js@6.3.7: + dependencies: + '@popperjs/core': 2.11.8 + tlds@1.259.0: {} to-regex-range@5.0.1: