diff --git a/.env.example b/.env.example index 8c3997e..60425f8 100644 --- a/.env.example +++ b/.env.example @@ -43,7 +43,11 @@ REDIS_USER=notdefaultuser # --- Storage Settings --- # Choose your storage backend. Valid options are 'local' or 's3'. STORAGE_TYPE=local -# The maximum request body size to accept in bytes including while streaming. The body size can also be specified with a unit suffix for kilobytes (K), megabytes (M), or gigabytes (G). For example, 512K or 1M. Defaults to 512kb. Or the value of Infinity if you don't want any upload limit. +# The maximum request body size the SvelteKit frontend server will accept (including file uploads via streaming). +# Accepts a numeric value in bytes, or a unit suffix: K (kilobytes), M (megabytes), G (gigabytes). +# Set to 'Infinity' to remove the limit entirely (recommended for archiving large PST/Mbox files). +# Examples: 512K, 100M, 5G, Infinity. Defaults to 512K if not set. +# For very large files (multi-GB), consider using the "Local Path" ingestion option which bypasses this limit entirely. BODY_SIZE_LIMIT=100M # --- Local Storage Settings --- diff --git a/packages/backend/src/api/controllers/upload.controller.ts b/packages/backend/src/api/controllers/upload.controller.ts index 5ebc5d9..67f8ee6 100644 --- a/packages/backend/src/api/controllers/upload.controller.ts +++ b/packages/backend/src/api/controllers/upload.controller.ts @@ -3,24 +3,96 @@ import { StorageService } from '../../services/StorageService'; import { randomUUID } from 'crypto'; import busboy from 'busboy'; import { config } from '../../config/index'; +import { logger } from '../../config/logger'; +import i18next from 'i18next'; export const uploadFile = async (req: Request, res: Response) => { const storage = new StorageService(); - const bb = busboy({ headers: req.headers }); const uploads: Promise[] = []; let filePath = ''; let originalFilename = ''; + let headersSent = false; + const contentLength = req.headers['content-length']; - bb.on('file', (fieldname, file, filename) => { - originalFilename = filename.filename; + logger.info({ contentLength, contentType: req.headers['content-type'] }, 'File upload started'); + + const sendErrorResponse = (statusCode: number, message: string) => { + if (!headersSent) { + headersSent = true; + res.status(statusCode).json({ + status: 'error', + statusCode, + message, + errors: null, + }); + } + }; + + let bb: busboy.Busboy; + try { + bb = busboy({ headers: req.headers }); + } catch (err) { + const message = err instanceof Error ? err.message : i18next.t('upload.invalid_request'); + logger.error({ error: message }, 'Failed to initialize file upload parser'); + sendErrorResponse(400, i18next.t('upload.invalid_request')); + return; + } + + bb.on('file', (fieldname, file, info) => { + originalFilename = info.filename; const uuid = randomUUID(); filePath = `${config.storage.openArchiverFolderName}/tmp/${uuid}-${originalFilename}`; + + logger.info({ filename: originalFilename, fieldname }, 'Receiving file stream'); + + file.on('error', (err) => { + logger.error( + { error: err.message, filename: originalFilename }, + 'File stream error during upload' + ); + sendErrorResponse(500, i18next.t('upload.stream_error')); + }); + uploads.push(storage.put(filePath, file)); }); + bb.on('error', (err: Error) => { + logger.error({ error: err.message }, 'Upload parsing error'); + sendErrorResponse(500, i18next.t('upload.parse_error')); + }); + bb.on('finish', async () => { - await Promise.all(uploads); - res.json({ filePath }); + try { + await Promise.all(uploads); + if (!headersSent) { + headersSent = true; + logger.info( + { filePath, filename: originalFilename }, + 'File upload completed successfully' + ); + res.json({ filePath }); + } + } catch (err) { + const message = err instanceof Error ? err.message : 'Unknown storage error'; + logger.error( + { error: message, filename: originalFilename, filePath }, + 'Failed to write uploaded file to storage' + ); + sendErrorResponse(500, i18next.t('upload.storage_error')); + } + }); + + // Handle client disconnection mid-upload + req.on('error', (err) => { + logger.warn( + { error: err.message, filename: originalFilename }, + 'Client connection error during upload' + ); + sendErrorResponse(499, i18next.t('upload.connection_error')); + }); + + req.on('aborted', () => { + logger.warn({ filename: originalFilename }, 'Client aborted upload'); }); req.pipe(bb); diff --git a/packages/backend/src/locales/en/translation.json b/packages/backend/src/locales/en/translation.json index 9ac9c2b..b993887 100644 --- a/packages/backend/src/locales/en/translation.json +++ b/packages/backend/src/locales/en/translation.json @@ -66,5 +66,12 @@ }, "api": { "requestBodyInvalid": "Invalid request body." + }, + "upload": { + "invalid_request": "The upload request is invalid or malformed.", + "stream_error": "An error occurred while receiving the file. Please try again.", + "parse_error": "Failed to parse the uploaded file data.", + "storage_error": "Failed to save the uploaded file to storage. Please try again.", + "connection_error": "The connection was lost during the upload." } } diff --git a/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte b/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte index 2f423a7..c6e1e14 100644 --- a/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte +++ b/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte @@ -72,7 +72,9 @@ let fileUploading = $state(false); let importMethod = $state<'upload' | 'local'>( - source?.credentials && 'localFilePath' in source.credentials && source.credentials.localFilePath + source?.credentials && + 'localFilePath' in source.credentials && + source.credentials.localFilePath ? 'local' : 'upload' ); @@ -119,16 +121,25 @@ method: 'POST', body: uploadFormData, }); - const result = await response.json(); + + // Safely parse the response body — it may not be valid JSON + // (e.g. if the proxy rejected the request with an HTML error page) + let result: Record; + try { + result = await response.json(); + } catch { + throw new Error($t('app.components.ingestion_source_form.upload_network_error')); + } + if (!response.ok) { - throw new Error(result.message || 'File upload failed'); + throw new Error( + result.message || $t('app.components.ingestion_source_form.upload_failed') + ); } formData.providerConfig.uploadedFilePath = result.filePath; formData.providerConfig.uploadedFileName = file.name; - fileUploading = false; } catch (error) { - fileUploading = false; const message = error instanceof Error ? error.message : String(error); setAlert({ type: 'error', @@ -137,6 +148,10 @@ duration: 5000, show: true, }); + // Reset file input so the user can retry with the same file + target.value = ''; + } finally { + fileUploading = false; } }; @@ -259,15 +274,21 @@ {:else if formData.provider === 'pst_import'}
- +
- +
- +
@@ -305,15 +326,21 @@ {/if} {:else if formData.provider === 'eml_import'}
- +
- +
- +
@@ -351,15 +378,21 @@ {/if} {:else if formData.provider === 'mbox_import'}
- +
- +
- +
diff --git a/packages/frontend/src/lib/translations/en.json b/packages/frontend/src/lib/translations/en.json index 0268d86..acaccb9 100644 --- a/packages/frontend/src/lib/translations/en.json +++ b/packages/frontend/src/lib/translations/en.json @@ -232,7 +232,8 @@ "mbox_file": "Mbox File", "heads_up": "Heads up!", "org_wide_warning": "Please note that this is an organization-wide operation. This kind of ingestions will import and index all email inboxes in your organization. If you want to import only specific email inboxes, use the IMAP connector.", - "upload_failed": "Upload Failed, please try again" + "upload_failed": "Upload Failed, please try again", + "upload_network_error": "The server could not process the upload. The file may exceed the configured upload size limit (BODY_SIZE_LIMIT). For very large files, use the Local Path option instead." }, "role_form": { "policies_json": "Policies (JSON)",