mirror of
https://github.com/LogicLabs-OU/OpenArchiver.git
synced 2026-04-06 08:41:57 +02:00
Compare commits
9 Commits
attachment
...
docs-ocr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b49d8a78ce | ||
|
|
d372ef7566 | ||
|
|
e9a65f9672 | ||
|
|
ce3f379b7a | ||
|
|
37a778cb6d | ||
|
|
26a760b232 | ||
|
|
6be0774bc4 | ||
|
|
4a23f8f29f | ||
|
|
074256ed59 |
11
.env.example
11
.env.example
@@ -19,7 +19,8 @@ DATABASE_URL="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/$
|
||||
# Meilisearch
|
||||
MEILI_MASTER_KEY=aSampleMasterKey
|
||||
MEILI_HOST=http://meilisearch:7700
|
||||
|
||||
# The number of emails to batch together for indexing. Defaults to 500.
|
||||
MEILI_INDEXING_BATCH=500
|
||||
|
||||
|
||||
# Redis (We use Valkey, which is Redis-compatible and open source)
|
||||
@@ -60,6 +61,8 @@ RATE_LIMIT_WINDOW_MS=60000
|
||||
# The maximum number of API requests allowed from an IP within the window. Defaults to 100.
|
||||
RATE_LIMIT_MAX_REQUESTS=100
|
||||
|
||||
|
||||
|
||||
# JWT
|
||||
# IMPORTANT: Change this to a long, random, and secret string in your .env file
|
||||
JWT_SECRET=a-very-secret-key-that-you-should-change
|
||||
@@ -70,3 +73,9 @@ JWT_EXPIRES_IN="7d"
|
||||
# IMPORTANT: Generate a secure, random 32-byte hex string for this
|
||||
# You can use `openssl rand -hex 32` to generate a key.
|
||||
ENCRYPTION_KEY=
|
||||
|
||||
# Apache Tika Integration
|
||||
# ONLY active if TIKA_URL is set
|
||||
TIKA_URL=http://tika:9998
|
||||
|
||||
|
||||
|
||||
1
.github/FUNDING.yml
vendored
Normal file
1
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
github: [wayneshn]
|
||||
32
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
32
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: bug
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1. Go to '...'
|
||||
2. Click on '....'
|
||||
5. See error
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**System:**
|
||||
- Open Archiver Version:
|
||||
|
||||
**Relevant logs:**
|
||||
Any relevant logs (Redact sensitive information)
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: enhancement
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is.
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
@@ -46,12 +46,14 @@ Password: openarchiver_demo
|
||||
- Microsoft 365
|
||||
- PST files
|
||||
- Zipped .eml files
|
||||
- Mbox files
|
||||
|
||||
- **Secure & Efficient Storage**: Emails are stored in the standard `.eml` format. The system uses deduplication and compression to minimize storage costs. All data is encrypted at rest.
|
||||
- **Pluggable Storage Backends**: Support both local filesystem storage and S3-compatible object storage (like AWS S3 or MinIO).
|
||||
- **Powerful Search & eDiscovery**: A high-performance search engine indexes the full text of emails and attachments (PDF, DOCX, etc.).
|
||||
- **Thread discovery**: The ability to discover if an email belongs to a thread/conversation and present the context.
|
||||
- **Compliance & Retention**: Define granular retention policies to automatically manage the lifecycle of your data. Place legal holds on communications to prevent deletion during litigation (TBD).
|
||||
- **File Hash and Encryption**: Email and attachment file hash values are stored in the meta database upon ingestion, meaning any attempt to alter the file content will be identified, ensuring legal and regulatory compliance.
|
||||
- **Comprehensive Auditing**: An immutable audit trail logs all system activities, ensuring you have a clear record of who accessed what and when (TBD).
|
||||
|
||||
## 🛠️ Tech Stack
|
||||
|
||||
@@ -52,6 +52,13 @@ services:
|
||||
networks:
|
||||
- open-archiver-net
|
||||
|
||||
tika:
|
||||
image: apache/tika:3.2.2.0-full
|
||||
container_name: tika
|
||||
restart: always
|
||||
networks:
|
||||
- open-archiver-net
|
||||
|
||||
volumes:
|
||||
pgdata:
|
||||
driver: local
|
||||
|
||||
@@ -52,6 +52,7 @@ export default defineConfig({
|
||||
},
|
||||
{ text: 'EML Import', link: '/user-guides/email-providers/eml' },
|
||||
{ text: 'PST Import', link: '/user-guides/email-providers/pst' },
|
||||
{ text: 'Mbox Import', link: '/user-guides/email-providers/mbox' },
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -64,6 +65,20 @@ export default defineConfig({
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
text: 'Upgrading and Migration',
|
||||
collapsed: true,
|
||||
items: [
|
||||
{
|
||||
text: 'Upgrading',
|
||||
link: '/user-guides/upgrade-and-migration/upgrade',
|
||||
},
|
||||
{
|
||||
text: 'Meilisearch Upgrade',
|
||||
link: '/user-guides/upgrade-and-migration/meilisearch-upgrade',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -85,6 +100,7 @@ export default defineConfig({
|
||||
items: [
|
||||
{ text: 'Overview', link: '/services/' },
|
||||
{ text: 'Storage Service', link: '/services/storage-service' },
|
||||
{ text: 'OCR Service', link: '/services/ocr-service' },
|
||||
{
|
||||
text: 'IAM Service',
|
||||
items: [{ text: 'IAM Policies', link: '/services/iam-service/iam-policy' }],
|
||||
|
||||
@@ -1,289 +0,0 @@
|
||||
# IAM Policies
|
||||
|
||||
This document provides a guide to creating and managing IAM policies in Open Archiver. It is intended for developers and administrators who need to configure granular access control for users and roles.
|
||||
|
||||
## Policy Structure
|
||||
|
||||
IAM policies are defined as an array of JSON objects, where each object represents a single permission rule. The structure of a policy object is as follows:
|
||||
|
||||
```json
|
||||
{
|
||||
"action": "read" OR ["read", "create"],
|
||||
"subject": "ingestion" OR ["ingestion", "dashboard"],
|
||||
"conditions": {
|
||||
"field_name": "value"
|
||||
},
|
||||
"inverted": false OR true,
|
||||
}
|
||||
```
|
||||
|
||||
- `action`: The action(s) to be performed on the subject. Can be a single string or an array of strings.
|
||||
- `subject`: The resource(s) or entity on which the action is to be performed. Can be a single string or an array of strings.
|
||||
- `conditions`: (Optional) A set of conditions that must be met for the permission to be granted.
|
||||
- `inverted`: (Optional) When set to `true`, this inverts the rule, turning it from a "can" rule into a "cannot" rule. This is useful for creating exceptions to broader permissions.
|
||||
|
||||
## Actions
|
||||
|
||||
The following actions are available for use in IAM policies:
|
||||
|
||||
- `manage`: A wildcard action that grants all permissions on a subject (`create`, `read`, `update`, `delete`, `search`, `sync`).
|
||||
- `create`: Allows the user to create a new resource.
|
||||
- `read`: Allows the user to view a resource.
|
||||
- `update`: Allows the user to modify an existing resource.
|
||||
- `delete`: Allows the user to delete a resource.
|
||||
- `search`: Allows the user to search for resources.
|
||||
- `sync`: Allows the user to synchronize a resource.
|
||||
|
||||
## Subjects
|
||||
|
||||
The following subjects are available for use in IAM policies:
|
||||
|
||||
- `all`: A wildcard subject that represents all resources.
|
||||
- `archive`: Represents archived emails.
|
||||
- `ingestion`: Represents ingestion sources.
|
||||
- `settings`: Represents system settings.
|
||||
- `users`: Represents user accounts.
|
||||
- `roles`: Represents user roles.
|
||||
- `dashboard`: Represents the dashboard.
|
||||
|
||||
## Advanced Conditions with MongoDB-Style Queries
|
||||
|
||||
Conditions are the key to creating fine-grained access control rules. They are defined as a JSON object where each key represents a field on the subject, and the value defines the criteria for that field.
|
||||
|
||||
All conditions within a single rule are implicitly joined with an **AND** logic. This means that for a permission to be granted, the resource must satisfy _all_ specified conditions.
|
||||
|
||||
The power of this system comes from its use of a subset of [MongoDB's query language](https://www.mongodb.com/docs/manual/), which provides a flexible and expressive way to define complex rules. These rules are translated into native queries for both the PostgreSQL database (via Drizzle ORM) and the Meilisearch engine.
|
||||
|
||||
### Supported Operators and Examples
|
||||
|
||||
Here is a detailed breakdown of the supported operators with examples.
|
||||
|
||||
#### `$eq` (Equal)
|
||||
|
||||
This is the default operator. If you provide a simple key-value pair, it is treated as an equality check.
|
||||
|
||||
```json
|
||||
// This rule...
|
||||
{ "status": "active" }
|
||||
|
||||
// ...is equivalent to this:
|
||||
{ "status": { "$eq": "active" } }
|
||||
```
|
||||
|
||||
**Use Case**: Grant access to an ingestion source only if its status is `active`.
|
||||
|
||||
#### `$ne` (Not Equal)
|
||||
|
||||
Matches documents where the field value is not equal to the specified value.
|
||||
|
||||
```json
|
||||
{ "provider": { "$ne": "pst_import" } }
|
||||
```
|
||||
|
||||
**Use Case**: Allow a user to see all ingestion sources except for PST imports.
|
||||
|
||||
#### `$in` (In Array)
|
||||
|
||||
Matches documents where the field value is one of the values in the specified array.
|
||||
|
||||
```json
|
||||
{
|
||||
"id": {
|
||||
"$in": ["INGESTION_ID_1", "INGESTION_ID_2"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Use Case**: Grant an auditor access to a specific list of ingestion sources.
|
||||
|
||||
#### `$nin` (Not In Array)
|
||||
|
||||
Matches documents where the field value is not one of the values in the specified array.
|
||||
|
||||
```json
|
||||
{ "provider": { "$nin": ["pst_import", "eml_import"] } }
|
||||
```
|
||||
|
||||
**Use Case**: Hide all manual import sources from a specific user role.
|
||||
|
||||
#### `$lt` / `$lte` (Less Than / Less Than or Equal)
|
||||
|
||||
Matches documents where the field value is less than (`$lt`) or less than or equal to (`$lte`) the specified value. This is useful for numeric or date-based comparisons.
|
||||
|
||||
```json
|
||||
{ "sentAt": { "$lt": "2024-01-01T00:00:00.000Z" } }
|
||||
```
|
||||
|
||||
#### `$gt` / `$gte` (Greater Than / Greater Than or Equal)
|
||||
|
||||
Matches documents where the field value is greater than (`$gt`) or greater than or equal to (`$gte`) the specified value.
|
||||
|
||||
```json
|
||||
{ "sentAt": { "$lt": "2024-01-01T00:00:00.000Z" } }
|
||||
```
|
||||
|
||||
#### `$exists`
|
||||
|
||||
Matches documents that have (or do not have) the specified field.
|
||||
|
||||
```json
|
||||
// Grant access only if a 'lastSyncStatusMessage' exists
|
||||
{ "lastSyncStatusMessage": { "$exists": true } }
|
||||
```
|
||||
|
||||
## Inverted Rules: Creating Exceptions with `cannot`
|
||||
|
||||
By default, all rules are "can" rules, meaning they grant permissions. However, you can create a "cannot" rule by adding `"inverted": true` to a policy object. This is extremely useful for creating exceptions to broader permissions.
|
||||
|
||||
A common pattern is to grant broad access and then use an inverted rule to carve out a specific restriction.
|
||||
|
||||
**Use Case**: Grant a user access to all ingestion sources _except_ for one specific source.
|
||||
|
||||
This is achieved with two rules:
|
||||
|
||||
1. A "can" rule that grants `read` access to the `ingestion` subject.
|
||||
2. An inverted "cannot" rule that denies `read` access for the specific ingestion `id`.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"action": "read",
|
||||
"subject": "ingestion"
|
||||
},
|
||||
{
|
||||
"inverted": true,
|
||||
"action": "read",
|
||||
"subject": "ingestion",
|
||||
"conditions": {
|
||||
"id": "SPECIFIC_INGESTION_ID_TO_EXCLUDE"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Policy Evaluation Logic
|
||||
|
||||
The system evaluates policies by combining all relevant rules for a user. The logic is simple:
|
||||
|
||||
- A user has permission if at least one `can` rule allows it.
|
||||
- A permission is denied if a `cannot` (`"inverted": true`) rule explicitly forbids it, even if a `can` rule allows it. `cannot` rules always take precedence.
|
||||
|
||||
### Dynamic Policies with Placeholders
|
||||
|
||||
To create dynamic policies that are specific to the current user, you can use the `${user.id}` placeholder in the `conditions` object. This placeholder will be replaced with the ID of the current user at runtime.
|
||||
|
||||
## Special Permissions for User and Role Management
|
||||
|
||||
It is important to note that while `read` access to `users` and `roles` can be granted granularly, any actions that modify these resources (`create`, `update`, `delete`) are restricted to Super Admins.
|
||||
|
||||
A user must have the `{ "action": "manage", "subject": "all" }` permission (Typically a Super Admin role) to manage users and roles. This is a security measure to prevent unauthorized changes to user accounts and permissions.
|
||||
|
||||
## Policy Examples
|
||||
|
||||
Here are several examples based on the default roles in the system, demonstrating how to combine actions, subjects, and conditions to achieve specific access control scenarios.
|
||||
|
||||
### Administrator
|
||||
|
||||
This policy grants a user full access to all resources using wildcards.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"action": "manage",
|
||||
"subject": "all"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### End-User
|
||||
|
||||
This policy allows a user to view the dashboard, create new ingestion sources, and fully manage the ingestion sources they own.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"action": "read",
|
||||
"subject": "dashboard"
|
||||
},
|
||||
{
|
||||
"action": "create",
|
||||
"subject": "ingestion"
|
||||
},
|
||||
{
|
||||
"action": "manage",
|
||||
"subject": "ingestion",
|
||||
"conditions": {
|
||||
"userId": "${user.id}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "manage",
|
||||
"subject": "archive",
|
||||
"conditions": {
|
||||
"ingestionSource.userId": "${user.id}" // also needs to give permission to archived emails created by the user
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Global Read-Only Auditor
|
||||
|
||||
This policy grants read and search access across most of the application's resources, making it suitable for an auditor who needs to view data without modifying it.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"action": ["read", "search"],
|
||||
"subject": ["ingestion", "archive", "dashboard", "users", "roles"]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Ingestion Admin
|
||||
|
||||
This policy grants full control over all ingestion sources and archives, but no other resources.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"action": "manage",
|
||||
"subject": "ingestion"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Auditor for Specific Ingestion Sources
|
||||
|
||||
This policy demonstrates how to grant access to a specific list of ingestion sources using the `$in` operator.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"action": ["read", "search"],
|
||||
"subject": "ingestion",
|
||||
"conditions": {
|
||||
"id": {
|
||||
"$in": ["INGESTION_ID_1", "INGESTION_ID_2"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Limit Access to a Specific Mailbox
|
||||
|
||||
This policy grants a user access to a specific ingestion source, but only allows them to see emails belonging to a single user within that source.
|
||||
|
||||
This is achieved by defining two specific `can` rules: The rule grants `read` and `search` access to the `archive` subject, but the `userEmail` must match.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"action": ["read", "search"],
|
||||
"subject": "archive",
|
||||
"conditions": {
|
||||
"userEmail": "user1@example.com"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
96
docs/services/ocr-service.md
Normal file
96
docs/services/ocr-service.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# OCR Service
|
||||
|
||||
The OCR (Optical Character Recognition) and text extraction service is responsible for extracting plain text content from various file formats, such as PDFs, Office documents, and more. This is a crucial component for making email attachments searchable.
|
||||
|
||||
## Overview
|
||||
|
||||
The system employs a two-pronged approach for text extraction:
|
||||
|
||||
1. **Primary Extractor (Apache Tika)**: A powerful and versatile toolkit that can extract text from a wide variety of file formats. It is the recommended method for its superior performance and format support.
|
||||
2. **Legacy Extractor**: A fallback mechanism that uses a combination of libraries (`pdf2json`, `mammoth`, `xlsx`) for common file types like PDF, DOCX, and XLSX. This is used when Apache Tika is not configured.
|
||||
|
||||
The main logic resides in `packages/backend/src/helpers/textExtractor.ts`, which decides which extraction method to use based on the application's configuration.
|
||||
|
||||
## Configuration
|
||||
|
||||
To enable the primary text extraction method, you must configure the URL of an Apache Tika server instance in your environment variables.
|
||||
|
||||
In your `.env` file, set the `TIKA_URL`:
|
||||
|
||||
```env
|
||||
# .env.example
|
||||
|
||||
# Apache Tika Integration
|
||||
# ONLY active if TIKA_URL is set
|
||||
TIKA_URL=http://tika:9998
|
||||
```
|
||||
|
||||
If `TIKA_URL` is not set, the system will automatically fall back to the legacy extraction methods. The service performs a health check on startup to verify connectivity with the Tika server.
|
||||
|
||||
## File Size Limits
|
||||
|
||||
To prevent excessive memory usage and processing time, the service imposes a general size limit on files submitted for text extraction. Files larger than the configured limit will be skipped.
|
||||
|
||||
- **With Apache Tika**: The maximum file size is **100MB**.
|
||||
- **With Legacy Fallback**: The maximum file size is **50MB**.
|
||||
|
||||
## Supported File Formats
|
||||
|
||||
The service's ability to extract text depends on whether it's using Apache Tika or the legacy fallback methods.
|
||||
|
||||
### With Apache Tika
|
||||
|
||||
When `TIKA_URL` is configured, the service can process a vast range of file formats. Apache Tika is designed for broad compatibility and supports hundreds of file types, including but not limited to:
|
||||
|
||||
- Portable Document Format (PDF)
|
||||
- Microsoft Office formats (DOC, DOCX, PPT, PPTX, XLS, XLSX)
|
||||
- OpenDocument Formats (ODT, ODS, ODP)
|
||||
- Rich Text Format (RTF)
|
||||
- Plain Text (TXT, CSV, JSON, XML, HTML)
|
||||
- Image formats with OCR capabilities (PNG, JPEG, TIFF)
|
||||
- Archive formats (ZIP, TAR, GZ)
|
||||
- Email formats (EML, MSG)
|
||||
|
||||
For a complete and up-to-date list, please refer to the official [Apache Tika documentation](https://tika.apache.org/3.2.3/formats.html).
|
||||
|
||||
### With Legacy Fallback
|
||||
|
||||
When Tika is not configured, text extraction is limited to the following formats:
|
||||
|
||||
- `application/pdf` (PDF)
|
||||
- `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX)
|
||||
- `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` (XLSX)
|
||||
- Plain text formats such as `text/*`, `application/json`, and `application/xml`.
|
||||
|
||||
## Features of the Tika Integration (`OcrService`)
|
||||
|
||||
The `OcrService` (`packages/backend/src/services/OcrService.ts`) provides several enhancements to make text extraction efficient and robust.
|
||||
|
||||
### Caching
|
||||
|
||||
To avoid redundant processing of the same file, the service implements a simple LRU (Least Recently Used) cache.
|
||||
|
||||
- **Cache Key**: A SHA-256 hash of the file's buffer is used as the cache key.
|
||||
- **Functionality**: If a file with the same hash is processed again, the text content is served directly from the cache, saving significant processing time.
|
||||
- **Statistics**: The service keeps track of cache hits, misses, and the hit rate for performance monitoring.
|
||||
|
||||
### Concurrency Management (Semaphore)
|
||||
|
||||
Extracting text from large files can be resource-intensive. To prevent the Tika server from being overwhelmed by multiple requests for the _same file_ simultaneously (e.g., during a large import), a semaphore mechanism is used.
|
||||
|
||||
- **Functionality**: If a request for a specific file (identified by its hash) is already in progress, any subsequent requests for the same file will wait for the first one to complete and then use its result.
|
||||
- **Benefit**: This deduplicates parallel processing efforts and reduces unnecessary load on the Tika server.
|
||||
|
||||
### Health Check and DNS Fallback
|
||||
|
||||
- **Availability Check**: The service includes a `checkTikaAvailability` method to verify that the Tika server is reachable and operational. This check is performed on application startup.
|
||||
- **DNS Fallback**: For convenience in Docker environments, if the Tika URL uses the hostname `tika` (e.g., `http://tika:9998`), the service will automatically attempt a fallback to `localhost` if the initial connection fails.
|
||||
|
||||
## Legacy Fallback Methods
|
||||
|
||||
When Tika is not available, the `extractTextLegacy` function in `textExtractor.ts` handles extraction for a limited set of MIME types:
|
||||
|
||||
- `application/pdf`: Processed using `pdf2json`. Includes a 50MB size limit and a 5-second timeout to prevent memory issues.
|
||||
- `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX): Processed using `mammoth`.
|
||||
- `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` (XLSX): Processed using `xlsx`.
|
||||
- Plain text formats (`text/*`, `application/json`, `application/xml`): Converted directly from the buffer.
|
||||
@@ -9,3 +9,4 @@ Choose your provider from the list below to get started:
|
||||
- [Generic IMAP Server](./imap.md)
|
||||
- [EML Import](./eml.md)
|
||||
- [PST Import](./pst.md)
|
||||
- [Mbox Import](./mbox.md)
|
||||
|
||||
29
docs/user-guides/email-providers/mbox.md
Normal file
29
docs/user-guides/email-providers/mbox.md
Normal file
@@ -0,0 +1,29 @@
|
||||
# Mbox Ingestion
|
||||
|
||||
Mbox is a common format for storing email messages. This guide will walk you through the process of ingesting mbox files into OpenArchiver.
|
||||
|
||||
## 1. Exporting from Your Email Client
|
||||
|
||||
Most email clients that support mbox exports will allow you to export a folder of emails as a single `.mbox` file. Here are the general steps:
|
||||
|
||||
- **Mozilla Thunderbird**: Right-click on a folder, select **ImportExportTools NG**, and then choose **Export folder**.
|
||||
- **Gmail**: You can use Google Takeout to export your emails in mbox format.
|
||||
- **Other Clients**: Refer to your email client's documentation for instructions on how to export emails to an mbox file.
|
||||
|
||||
## 2. Uploading to OpenArchiver
|
||||
|
||||
Once you have your `.mbox` file, you can upload it to OpenArchiver through the web interface.
|
||||
|
||||
1. Navigate to the **Ingestion** page.
|
||||
2. Click on the **New Ingestion** button.
|
||||
3. Select **Mbox** as the source type.
|
||||
4. Upload your `.mbox` file.
|
||||
|
||||
## 3. Folder Structure
|
||||
|
||||
OpenArchiver will attempt to preserve the original folder structure of your emails. This is done by inspecting the following email headers:
|
||||
|
||||
- `X-Gmail-Labels`: Used by Gmail to store labels.
|
||||
- `X-Folder`: A custom header used by some email clients like Thunderbird.
|
||||
|
||||
If neither of these headers is present, the emails will be ingested into the root of the archive.
|
||||
@@ -76,18 +76,19 @@ Here is a complete list of environment variables available for configuration:
|
||||
|
||||
These variables are used by `docker-compose.yml` to configure the services.
|
||||
|
||||
| Variable | Description | Default Value |
|
||||
| ------------------- | ----------------------------------------------- | -------------------------------------------------------- |
|
||||
| `POSTGRES_DB` | The name of the PostgreSQL database. | `open_archive` |
|
||||
| `POSTGRES_USER` | The username for the PostgreSQL database. | `admin` |
|
||||
| `POSTGRES_PASSWORD` | The password for the PostgreSQL database. | `password` |
|
||||
| `DATABASE_URL` | The connection URL for the PostgreSQL database. | `postgresql://admin:password@postgres:5432/open_archive` |
|
||||
| `MEILI_MASTER_KEY` | The master key for Meilisearch. | `aSampleMasterKey` |
|
||||
| `MEILI_HOST` | The host for the Meilisearch service. | `http://meilisearch:7700` |
|
||||
| `REDIS_HOST` | The host for the Valkey (Redis) service. | `valkey` |
|
||||
| `REDIS_PORT` | The port for the Valkey (Redis) service. | `6379` |
|
||||
| `REDIS_PASSWORD` | The password for the Valkey (Redis) service. | `defaultredispassword` |
|
||||
| `REDIS_TLS_ENABLED` | Enable or disable TLS for Redis. | `false` |
|
||||
| Variable | Description | Default Value |
|
||||
| ---------------------- | ---------------------------------------------------- | -------------------------------------------------------- |
|
||||
| `POSTGRES_DB` | The name of the PostgreSQL database. | `open_archive` |
|
||||
| `POSTGRES_USER` | The username for the PostgreSQL database. | `admin` |
|
||||
| `POSTGRES_PASSWORD` | The password for the PostgreSQL database. | `password` |
|
||||
| `DATABASE_URL` | The connection URL for the PostgreSQL database. | `postgresql://admin:password@postgres:5432/open_archive` |
|
||||
| `MEILI_MASTER_KEY` | The master key for Meilisearch. | `aSampleMasterKey` |
|
||||
| `MEILI_HOST` | The host for the Meilisearch service. | `http://meilisearch:7700` |
|
||||
| `MEILI_INDEXING_BATCH` | The number of emails to batch together for indexing. | `500` |
|
||||
| `REDIS_HOST` | The host for the Valkey (Redis) service. | `valkey` |
|
||||
| `REDIS_PORT` | The port for the Valkey (Redis) service. | `6379` |
|
||||
| `REDIS_PASSWORD` | The password for the Valkey (Redis) service. | `defaultredispassword` |
|
||||
| `REDIS_TLS_ENABLED` | Enable or disable TLS for Redis. | `false` |
|
||||
|
||||
#### Storage Settings
|
||||
|
||||
@@ -114,6 +115,12 @@ These variables are used by `docker-compose.yml` to configure the services.
|
||||
| `RATE_LIMIT_MAX_REQUESTS` | The maximum number of API requests allowed from an IP within the window. | `100` |
|
||||
| `ENCRYPTION_KEY` | A 32-byte hex string for encrypting sensitive data in the database. | |
|
||||
|
||||
#### Apache Tika Integration
|
||||
|
||||
| Variable | Description | Default Value |
|
||||
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------ |
|
||||
| `TIKA_URL` | Optional. The URL of an Apache Tika server for advanced text extraction from attachments. If not set, the application falls back to built-in parsers for PDF, Word, and Excel files. | `http://tika:9998` |
|
||||
|
||||
## 3. Run the Application
|
||||
|
||||
Once you have configured your `.env` file, you can start all the services using Docker Compose:
|
||||
@@ -138,7 +145,9 @@ docker compose ps
|
||||
|
||||
Once the services are running, you can access the Open Archiver web interface by navigating to `http://localhost:3000` in your web browser.
|
||||
|
||||
You can log in with the `ADMIN_EMAIL` and `ADMIN_PASSWORD` you configured in your `.env` file.
|
||||
Upon first visit, you will be redirected to the `/setup` page where you can set up your admin account. Make sure you are the first person who accesses the instance.
|
||||
|
||||
If you are not redirected to the `/setup` page but instead see the login page, there might be something wrong with the database. Restart the service and try again.
|
||||
|
||||
## 5. Next Steps
|
||||
|
||||
@@ -212,9 +221,9 @@ If you are using local storage to store your emails, based on your `docker-compo
|
||||
|
||||
Run this command to see all the volumes on your system:
|
||||
|
||||
```bash
|
||||
docker volume ls
|
||||
```
|
||||
```bash
|
||||
docker volume ls
|
||||
```
|
||||
|
||||
2. **Identify the correct volume**:
|
||||
|
||||
@@ -224,28 +233,28 @@ Look through the list for a volume name that ends with `_archiver-data`. The par
|
||||
|
||||
Once you've identified the correct volume name, use it in the `inspect` command. For example:
|
||||
|
||||
```bash
|
||||
docker volume inspect <your_volume_name_here>
|
||||
```
|
||||
```bash
|
||||
docker volume inspect <your_volume_name_here>
|
||||
```
|
||||
|
||||
This will give you the correct `Mountpoint` path where your data is being stored. It will look something like this (the exact path will vary depending on your system):
|
||||
|
||||
```json
|
||||
{
|
||||
"CreatedAt": "2025-07-25T11:22:19Z",
|
||||
"Driver": "local",
|
||||
"Labels": {
|
||||
"com.docker.compose.config-hash": "---",
|
||||
"com.docker.compose.project": "---",
|
||||
"com.docker.compose.version": "2.38.2",
|
||||
"com.docker.compose.volume": "us8wwos0o4ok4go4gc8cog84_archiver-data"
|
||||
},
|
||||
"Mountpoint": "/var/lib/docker/volumes/us8wwos0o4ok4go4gc8cog84_archiver-data/_data",
|
||||
"Name": "us8wwos0o4ok4go4gc8cog84_archiver-data",
|
||||
"Options": null,
|
||||
"Scope": "local"
|
||||
}
|
||||
```
|
||||
```json
|
||||
{
|
||||
"CreatedAt": "2025-07-25T11:22:19Z",
|
||||
"Driver": "local",
|
||||
"Labels": {
|
||||
"com.docker.compose.config-hash": "---",
|
||||
"com.docker.compose.project": "---",
|
||||
"com.docker.compose.version": "2.38.2",
|
||||
"com.docker.compose.volume": "us8wwos0o4ok4go4gc8cog84_archiver-data"
|
||||
},
|
||||
"Mountpoint": "/var/lib/docker/volumes/us8wwos0o4ok4go4gc8cog84_archiver-data/_data",
|
||||
"Name": "us8wwos0o4ok4go4gc8cog84_archiver-data",
|
||||
"Options": null,
|
||||
"Scope": "local"
|
||||
}
|
||||
```
|
||||
|
||||
In this example, the data is located at `/var/lib/docker/volumes/us8wwos0o4ok4go4gc8cog84_archiver-data/_data`. You can then `cd` into that directory to see your files.
|
||||
|
||||
@@ -259,44 +268,44 @@ Here’s how you can do it:
|
||||
|
||||
Open the `docker-compose.yml` file and find the `open-archiver` service. You're going to change the `volumes` section.
|
||||
|
||||
**Change this:**
|
||||
**Change this:**
|
||||
|
||||
```yaml
|
||||
services:
|
||||
open-archiver:
|
||||
# ... other config
|
||||
volumes:
|
||||
- archiver-data:/var/data/open-archiver
|
||||
```
|
||||
```yaml
|
||||
services:
|
||||
open-archiver:
|
||||
# ... other config
|
||||
volumes:
|
||||
- archiver-data:/var/data/open-archiver
|
||||
```
|
||||
|
||||
**To this:**
|
||||
**To this:**
|
||||
|
||||
```yaml
|
||||
services:
|
||||
open-archiver:
|
||||
# ... other config
|
||||
volumes:
|
||||
- ./data/open-archiver:/var/data/open-archiver
|
||||
```
|
||||
```yaml
|
||||
services:
|
||||
open-archiver:
|
||||
# ... other config
|
||||
volumes:
|
||||
- ./data/open-archiver:/var/data/open-archiver
|
||||
```
|
||||
|
||||
You'll also want to remove the `archiver-data` volume definition at the bottom of the file, since it's no longer needed.
|
||||
|
||||
**Remove this whole block:**
|
||||
**Remove this whole block:**
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
# ... other volumes
|
||||
archiver-data:
|
||||
driver: local
|
||||
```
|
||||
```yaml
|
||||
volumes:
|
||||
# ... other volumes
|
||||
archiver-data:
|
||||
driver: local
|
||||
```
|
||||
|
||||
2. **Restart your containers**:
|
||||
|
||||
After you've saved the changes, run the following command in your terminal to apply them. The `--force-recreate` flag will ensure the container is recreated with the new volume settings.
|
||||
|
||||
```bash
|
||||
docker-compose up -d --force-recreate
|
||||
```
|
||||
```bash
|
||||
docker-compose up -d --force-recreate
|
||||
```
|
||||
|
||||
After this, any new data will be saved directly into the `./data/open-archiver` folder in your project directory.
|
||||
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
# Upgrading Meilisearch
|
||||
|
||||
Meilisearch, the search engine used by Open Archiver, requires a manual data migration process when upgrading to a new version. This is because Meilisearch databases are only compatible with the specific version that created them.
|
||||
|
||||
If an Open Archiver upgrade includes a major Meilisearch version change, you will need to migrate your search index by following the process below.
|
||||
|
||||
## Migration Process Overview
|
||||
|
||||
For self-hosted instances using Docker Compose (as recommended), the migration process involves creating a data dump from your current Meilisearch instance, upgrading the Docker image, and then importing that dump into the new version.
|
||||
|
||||
### Step 1: Create a Dump
|
||||
|
||||
Before upgrading, you must create a dump of your existing Meilisearch data. You can do this by sending a POST request to the `/dumps` endpoint of the Meilisearch API.
|
||||
|
||||
1. **Find your Meilisearch container name**:
|
||||
|
||||
```bash
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
Look for the service name that corresponds to Meilisearch, usually `meilisearch`.
|
||||
|
||||
2. **Execute the dump command**:
|
||||
You will need your Meilisearch Admin API key, which can be found in your `.env` file as `MEILI_MASTER_KEY`.
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:7700/dumps' \
|
||||
-H "Authorization: Bearer YOUR_MEILI_MASTER_KEY"
|
||||
```
|
||||
|
||||
This will start the dump creation process. The dump file will be created inside the `meili_data` volume used by the Meilisearch container.
|
||||
|
||||
3. **Monitor the dump status**:
|
||||
The dump creation request returns a `taskUid`. You can use this to check the status of the dump.
|
||||
|
||||
For more details on dump and import, see the [official Meilisearch documentation](https://www.meilisearch.com/docs/learn/update_and_migration/updating).
|
||||
|
||||
### Step 2: Upgrade Your Open Archiver Instance
|
||||
|
||||
Once the dump is successfully created, you can proceed with the standard Open Archiver upgrade process.
|
||||
|
||||
1. **Pull the latest changes and Docker images**:
|
||||
|
||||
```bash
|
||||
git pull
|
||||
docker compose pull
|
||||
```
|
||||
|
||||
2. **Stop the running services**:
|
||||
```bash
|
||||
docker compose down
|
||||
```
|
||||
|
||||
### Step 3: Import the Dump
|
||||
|
||||
Now, you need to restart the services while telling Meilisearch to import from your dump file.
|
||||
|
||||
1. **Modify `docker-compose.yml`**:
|
||||
You need to temporarily add the `--import-dump` flag to the Meilisearch service command. Find the `meilisearch` service in your `docker-compose.yml` and modify the `command` section.
|
||||
|
||||
You will need the name of your dump file. It will be a `.dump` file located in the directory mapped to `/meili_data` inside the container.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
meilisearch:
|
||||
# ... other service config
|
||||
command:
|
||||
[
|
||||
'--master-key=${MEILI_MASTER_KEY}',
|
||||
'--env=production',
|
||||
'--import-dump=/meili_data/dumps/YOUR_DUMP_FILE.dump',
|
||||
]
|
||||
```
|
||||
|
||||
2. **Restart the services**:
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
Meilisearch will now start and import the data from the dump file. This may take some time depending on the size of your index.
|
||||
|
||||
### Step 4: Clean Up
|
||||
|
||||
Once the import is complete and you have verified that your search is working correctly, you should remove the `--import-dump` flag from your `docker-compose.yml` to prevent it from running on every startup.
|
||||
|
||||
1. **Remove the `--import-dump` line** from the `command` section of the `meilisearch` service in `docker-compose.yml`.
|
||||
2. **Restart the services** one last time:
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Your Meilisearch instance is now upgraded and running with your migrated data.
|
||||
|
||||
For more advanced scenarios or troubleshooting, please refer to the **[official Meilisearch migration guide](https://www.meilisearch.com/docs/learn/update_and_migration/updating)**.
|
||||
42
docs/user-guides/upgrade-and-migration/upgrade.md
Normal file
42
docs/user-guides/upgrade-and-migration/upgrade.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Upgrading Your Instance
|
||||
|
||||
This guide provides instructions for upgrading your Open Archiver instance to the latest version.
|
||||
|
||||
## Checking for New Versions
|
||||
|
||||
Open Archiver automatically checks for new versions and will display a notification in the footer of the web interface when an update is available. You can find a list of all releases and their release notes on the [GitHub Releases](https://github.com/LogicLabs-OU/OpenArchiver/releases) page.
|
||||
|
||||
## Upgrading Your Instance
|
||||
|
||||
To upgrade your Open Archiver instance, follow these steps:
|
||||
|
||||
1. **Pull the latest changes from the repository**:
|
||||
|
||||
```bash
|
||||
git pull
|
||||
```
|
||||
|
||||
2. **Pull the latest Docker images**:
|
||||
|
||||
```bash
|
||||
docker compose pull
|
||||
```
|
||||
|
||||
3. **Restart the services with the new images**:
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
This will restart your Open Archiver instance with the latest version of the application.
|
||||
|
||||
## Migrating Data
|
||||
|
||||
When you upgrade to a new version, database migrations are applied automatically when the application starts up. This ensures that your database schema is always up-to-date with the latest version of the application.
|
||||
|
||||
No manual intervention is required for database migrations.
|
||||
|
||||
## Upgrading Meilisearch
|
||||
|
||||
When an Open Archiver update includes a major version change for Meilisearch, you will need to manually migrate your search data. This process is not covered by the standard upgrade commands.
|
||||
|
||||
For detailed instructions, please see the [Meilisearch Upgrade Guide](./meilisearch-upgrade.md).
|
||||
77
open-archiver.yml
Normal file
77
open-archiver.yml
Normal file
@@ -0,0 +1,77 @@
|
||||
# documentation: https://openarchiver.com
|
||||
# slogan: A self-hosted, open-source email archiving solution with full-text search capability.
|
||||
# tags: email archiving,email,compliance,search
|
||||
# logo: svgs/openarchiver.svg
|
||||
# port: 3000
|
||||
|
||||
services:
|
||||
open-archiver:
|
||||
image: logiclabshq/open-archiver:latest
|
||||
environment:
|
||||
- SERVICE_URL_3000
|
||||
- SERVICE_URL=${SERVICE_URL_3000}
|
||||
- PORT_BACKEND=${PORT_BACKEND:-4000}
|
||||
- PORT_FRONTEND=${PORT_FRONTEND:-3000}
|
||||
- NODE_ENV=${NODE_ENV:-production}
|
||||
- SYNC_FREQUENCY=${SYNC_FREQUENCY:-* * * * *}
|
||||
- POSTGRES_DB=${POSTGRES_DB:-open_archive}
|
||||
- POSTGRES_USER=${POSTGRES_USER:-admin}
|
||||
- POSTGRES_PASSWORD=${SERVICE_PASSWORD_POSTGRES}
|
||||
- DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
|
||||
- MEILI_MASTER_KEY=${SERVICE_PASSWORD_MEILISEARCH}
|
||||
- MEILI_HOST=http://meilisearch:7700
|
||||
- REDIS_HOST=valkey
|
||||
- REDIS_PORT=6379
|
||||
- REDIS_PASSWORD=${SERVICE_PASSWORD_VALKEY}
|
||||
- REDIS_TLS_ENABLED=false
|
||||
- STORAGE_TYPE=${STORAGE_TYPE:-local}
|
||||
- STORAGE_LOCAL_ROOT_PATH=${STORAGE_LOCAL_ROOT_PATH:-/var/data/open-archiver}
|
||||
- BODY_SIZE_LIMIT=${BODY_SIZE_LIMIT:-100M}
|
||||
- STORAGE_S3_ENDPOINT=${STORAGE_S3_ENDPOINT}
|
||||
- STORAGE_S3_BUCKET=${STORAGE_S3_BUCKET}
|
||||
- STORAGE_S3_ACCESS_KEY_ID=${STORAGE_S3_ACCESS_KEY_ID}
|
||||
- STORAGE_S3_SECRET_ACCESS_KEY=${STORAGE_S3_SECRET_ACCESS_KEY}
|
||||
- STORAGE_S3_REGION=${STORAGE_S3_REGION}
|
||||
- STORAGE_S3_FORCE_PATH_STYLE=${STORAGE_S3_FORCE_PATH_STYLE:-false}
|
||||
- JWT_SECRET=${SERVICE_BASE64_128_JWT}
|
||||
- JWT_EXPIRES_IN=${JWT_EXPIRES_IN:-7d}
|
||||
- ENCRYPTION_KEY=${SERVICE_BASE64_64_ENCRYPTIONKEY}
|
||||
- RATE_LIMIT_WINDOW_MS=${RATE_LIMIT_WINDOW_MS:-60000}
|
||||
- RATE_LIMIT_MAX_REQUESTS=${RATE_LIMIT_MAX_REQUESTS:-100}
|
||||
volumes:
|
||||
- archiver-data:/var/data/open-archiver
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
valkey:
|
||||
condition: service_started
|
||||
meilisearch:
|
||||
condition: service_started
|
||||
|
||||
postgres:
|
||||
image: postgres:17-alpine
|
||||
environment:
|
||||
- POSTGRES_DB=${POSTGRES_DB}
|
||||
- POSTGRES_USER=${POSTGRES_USER}
|
||||
- POSTGRES_PASSWORD=${SERVICE_PASSWORD_POSTGRES}
|
||||
- LC_ALL=C
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}']
|
||||
interval: 10s
|
||||
timeout: 20s
|
||||
retries: 10
|
||||
|
||||
valkey:
|
||||
image: valkey/valkey:8-alpine
|
||||
command: valkey-server --requirepass ${SERVICE_PASSWORD_VALKEY}
|
||||
volumes:
|
||||
- valkeydata:/data
|
||||
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:v1.15
|
||||
environment:
|
||||
- MEILI_MASTER_KEY=${SERVICE_PASSWORD_MEILISEARCH}
|
||||
volumes:
|
||||
- meilidata:/meili_data
|
||||
@@ -1,5 +1,6 @@
|
||||
{
|
||||
"name": "open-archiver",
|
||||
"version": "0.3.3",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "dotenv -- pnpm --filter \"./packages/*\" --parallel dev",
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
"reflect-metadata": "^0.2.2",
|
||||
"sqlite3": "^5.1.7",
|
||||
"tsconfig-paths": "^4.2.0",
|
||||
"xlsx": "^0.18.5",
|
||||
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz",
|
||||
"yauzl": "^3.2.0",
|
||||
"zod": "^4.1.5"
|
||||
},
|
||||
@@ -74,7 +74,6 @@
|
||||
"@types/multer": "^2.0.0",
|
||||
"@types/node": "^24.0.12",
|
||||
"@types/yauzl": "^2.10.3",
|
||||
"bull-board": "^2.1.3",
|
||||
"ts-node-dev": "^2.0.0",
|
||||
"typescript": "^5.8.3"
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import { config } from '../../config/index';
|
||||
export const uploadFile = async (req: Request, res: Response) => {
|
||||
const storage = new StorageService();
|
||||
const bb = busboy({ headers: req.headers });
|
||||
const uploads: Promise<void>[] = [];
|
||||
let filePath = '';
|
||||
let originalFilename = '';
|
||||
|
||||
@@ -14,10 +15,11 @@ export const uploadFile = async (req: Request, res: Response) => {
|
||||
originalFilename = filename.filename;
|
||||
const uuid = randomUUID();
|
||||
filePath = `${config.storage.openArchiverFolderName}/tmp/${uuid}-${originalFilename}`;
|
||||
storage.put(filePath, file);
|
||||
uploads.push(storage.put(filePath, file));
|
||||
});
|
||||
|
||||
bb.on('finish', () => {
|
||||
bb.on('finish', async () => {
|
||||
await Promise.all(uploads);
|
||||
res.json({ filePath });
|
||||
});
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { storage } from './storage';
|
||||
import { app } from './app';
|
||||
import { searchConfig } from './search';
|
||||
import { searchConfig, meiliConfig } from './search';
|
||||
import { connection as redisConfig } from './redis';
|
||||
import { apiConfig } from './api';
|
||||
|
||||
@@ -8,6 +8,7 @@ export const config = {
|
||||
storage,
|
||||
app,
|
||||
search: searchConfig,
|
||||
meili: meiliConfig,
|
||||
redis: redisConfig,
|
||||
api: apiConfig,
|
||||
};
|
||||
|
||||
@@ -2,6 +2,7 @@ import pino from 'pino';
|
||||
|
||||
export const logger = pino({
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
redact: ['password'],
|
||||
transport: {
|
||||
target: 'pino-pretty',
|
||||
options: {
|
||||
|
||||
@@ -4,3 +4,9 @@ export const searchConfig = {
|
||||
host: process.env.MEILI_HOST || 'http://127.0.0.1:7700',
|
||||
apiKey: process.env.MEILI_MASTER_KEY || '',
|
||||
};
|
||||
|
||||
export const meiliConfig = {
|
||||
indexingBatchSize: process.env.MEILI_INDEXING_BATCH
|
||||
? parseInt(process.env.MEILI_INDEXING_BATCH)
|
||||
: 500,
|
||||
};
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TYPE "public"."ingestion_provider" ADD VALUE 'mbox_import';
|
||||
1245
packages/backend/src/database/migrations/meta/0020_snapshot.json
Normal file
1245
packages/backend/src/database/migrations/meta/0020_snapshot.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,146 +1,153 @@
|
||||
{
|
||||
"version": "7",
|
||||
"dialect": "postgresql",
|
||||
"entries": [
|
||||
{
|
||||
"idx": 0,
|
||||
"version": "7",
|
||||
"when": 1752225352591,
|
||||
"tag": "0000_amusing_namora",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 1,
|
||||
"version": "7",
|
||||
"when": 1752326803882,
|
||||
"tag": "0001_odd_night_thrasher",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 2,
|
||||
"version": "7",
|
||||
"when": 1752332648392,
|
||||
"tag": "0002_lethal_quentin_quire",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 3,
|
||||
"version": "7",
|
||||
"when": 1752332967084,
|
||||
"tag": "0003_petite_wrecker",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 4,
|
||||
"version": "7",
|
||||
"when": 1752606108876,
|
||||
"tag": "0004_sleepy_paper_doll",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 5,
|
||||
"version": "7",
|
||||
"when": 1752606327253,
|
||||
"tag": "0005_chunky_sue_storm",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 6,
|
||||
"version": "7",
|
||||
"when": 1753112018514,
|
||||
"tag": "0006_majestic_caretaker",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 7,
|
||||
"version": "7",
|
||||
"when": 1753190159356,
|
||||
"tag": "0007_handy_archangel",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 8,
|
||||
"version": "7",
|
||||
"when": 1753370737317,
|
||||
"tag": "0008_eminent_the_spike",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 9,
|
||||
"version": "7",
|
||||
"when": 1754337938241,
|
||||
"tag": "0009_late_lenny_balinger",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 10,
|
||||
"version": "7",
|
||||
"when": 1754420780849,
|
||||
"tag": "0010_perpetual_lightspeed",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 11,
|
||||
"version": "7",
|
||||
"when": 1754422064158,
|
||||
"tag": "0011_tan_blackheart",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 12,
|
||||
"version": "7",
|
||||
"when": 1754476962901,
|
||||
"tag": "0012_warm_the_stranger",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 13,
|
||||
"version": "7",
|
||||
"when": 1754659373517,
|
||||
"tag": "0013_classy_talkback",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 14,
|
||||
"version": "7",
|
||||
"when": 1754831765718,
|
||||
"tag": "0014_foamy_vapor",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 15,
|
||||
"version": "7",
|
||||
"when": 1755443936046,
|
||||
"tag": "0015_wakeful_norman_osborn",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 16,
|
||||
"version": "7",
|
||||
"when": 1755780572342,
|
||||
"tag": "0016_lonely_mariko_yashida",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 17,
|
||||
"version": "7",
|
||||
"when": 1755961566627,
|
||||
"tag": "0017_tranquil_shooting_star",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 18,
|
||||
"version": "7",
|
||||
"when": 1756911118035,
|
||||
"tag": "0018_flawless_owl",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 19,
|
||||
"version": "7",
|
||||
"when": 1756937533843,
|
||||
"tag": "0019_confused_scream",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
"version": "7",
|
||||
"dialect": "postgresql",
|
||||
"entries": [
|
||||
{
|
||||
"idx": 0,
|
||||
"version": "7",
|
||||
"when": 1752225352591,
|
||||
"tag": "0000_amusing_namora",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 1,
|
||||
"version": "7",
|
||||
"when": 1752326803882,
|
||||
"tag": "0001_odd_night_thrasher",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 2,
|
||||
"version": "7",
|
||||
"when": 1752332648392,
|
||||
"tag": "0002_lethal_quentin_quire",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 3,
|
||||
"version": "7",
|
||||
"when": 1752332967084,
|
||||
"tag": "0003_petite_wrecker",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 4,
|
||||
"version": "7",
|
||||
"when": 1752606108876,
|
||||
"tag": "0004_sleepy_paper_doll",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 5,
|
||||
"version": "7",
|
||||
"when": 1752606327253,
|
||||
"tag": "0005_chunky_sue_storm",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 6,
|
||||
"version": "7",
|
||||
"when": 1753112018514,
|
||||
"tag": "0006_majestic_caretaker",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 7,
|
||||
"version": "7",
|
||||
"when": 1753190159356,
|
||||
"tag": "0007_handy_archangel",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 8,
|
||||
"version": "7",
|
||||
"when": 1753370737317,
|
||||
"tag": "0008_eminent_the_spike",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 9,
|
||||
"version": "7",
|
||||
"when": 1754337938241,
|
||||
"tag": "0009_late_lenny_balinger",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 10,
|
||||
"version": "7",
|
||||
"when": 1754420780849,
|
||||
"tag": "0010_perpetual_lightspeed",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 11,
|
||||
"version": "7",
|
||||
"when": 1754422064158,
|
||||
"tag": "0011_tan_blackheart",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 12,
|
||||
"version": "7",
|
||||
"when": 1754476962901,
|
||||
"tag": "0012_warm_the_stranger",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 13,
|
||||
"version": "7",
|
||||
"when": 1754659373517,
|
||||
"tag": "0013_classy_talkback",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 14,
|
||||
"version": "7",
|
||||
"when": 1754831765718,
|
||||
"tag": "0014_foamy_vapor",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 15,
|
||||
"version": "7",
|
||||
"when": 1755443936046,
|
||||
"tag": "0015_wakeful_norman_osborn",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 16,
|
||||
"version": "7",
|
||||
"when": 1755780572342,
|
||||
"tag": "0016_lonely_mariko_yashida",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 17,
|
||||
"version": "7",
|
||||
"when": 1755961566627,
|
||||
"tag": "0017_tranquil_shooting_star",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 18,
|
||||
"version": "7",
|
||||
"when": 1756911118035,
|
||||
"tag": "0018_flawless_owl",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 19,
|
||||
"version": "7",
|
||||
"when": 1756937533843,
|
||||
"tag": "0019_confused_scream",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 20,
|
||||
"version": "7",
|
||||
"when": 1757860242528,
|
||||
"tag": "0020_panoramic_wolverine",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -8,6 +8,7 @@ export const ingestionProviderEnum = pgEnum('ingestion_provider', [
|
||||
'generic_imap',
|
||||
'pst_import',
|
||||
'eml_import',
|
||||
'mbox_import',
|
||||
]);
|
||||
|
||||
export const ingestionStatusEnum = pgEnum('ingestion_status', [
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import PDFParser from 'pdf2json';
|
||||
import mammoth from 'mammoth';
|
||||
import xlsx from 'xlsx';
|
||||
import { logger } from '../config/logger';
|
||||
import { OcrService } from '../services/OcrService';
|
||||
|
||||
// Legacy PDF extraction (with improved memory management)
|
||||
function extractTextFromPdf(buffer: Buffer): Promise<string> {
|
||||
return new Promise((resolve) => {
|
||||
const pdfParser = new PDFParser(null, true);
|
||||
@@ -10,34 +13,60 @@ function extractTextFromPdf(buffer: Buffer): Promise<string> {
|
||||
const finish = (text: string) => {
|
||||
if (completed) return;
|
||||
completed = true;
|
||||
pdfParser.removeAllListeners();
|
||||
|
||||
// explicit cleanup
|
||||
try {
|
||||
pdfParser.removeAllListeners();
|
||||
} catch (e) {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
|
||||
resolve(text);
|
||||
};
|
||||
|
||||
pdfParser.on('pdfParser_dataError', () => finish(''));
|
||||
pdfParser.on('pdfParser_dataReady', () => finish(pdfParser.getRawTextContent()));
|
||||
pdfParser.on('pdfParser_dataError', (err: any) => {
|
||||
logger.warn('PDF parsing error:', err?.parserError || 'Unknown error');
|
||||
finish('');
|
||||
});
|
||||
|
||||
pdfParser.on('pdfParser_dataReady', () => {
|
||||
try {
|
||||
const text = pdfParser.getRawTextContent();
|
||||
finish(text || '');
|
||||
} catch (err) {
|
||||
logger.warn('Error getting PDF text content:', err);
|
||||
finish('');
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
pdfParser.parseBuffer(buffer);
|
||||
} catch (err) {
|
||||
console.error('Error parsing PDF buffer', err);
|
||||
logger.error('Error parsing PDF buffer:', err);
|
||||
finish('');
|
||||
}
|
||||
|
||||
// Prevent hanging if the parser never emits events
|
||||
setTimeout(() => finish(''), 10000);
|
||||
// reduced Timeout for better performance
|
||||
setTimeout(() => {
|
||||
logger.warn('PDF parsing timed out');
|
||||
finish('');
|
||||
}, 5000);
|
||||
});
|
||||
}
|
||||
|
||||
export async function extractText(buffer: Buffer, mimeType: string): Promise<string> {
|
||||
// Legacy text extraction for various formats
|
||||
async function extractTextLegacy(buffer: Buffer, mimeType: string): Promise<string> {
|
||||
try {
|
||||
if (mimeType === 'application/pdf') {
|
||||
// Check PDF size (memory protection)
|
||||
if (buffer.length > 50 * 1024 * 1024) { // 50MB Limit
|
||||
logger.warn('PDF too large for legacy extraction, skipping');
|
||||
return '';
|
||||
}
|
||||
return await extractTextFromPdf(buffer);
|
||||
}
|
||||
|
||||
if (
|
||||
mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||||
) {
|
||||
if (mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
|
||||
const { value } = await mammoth.extractRawText({ buffer });
|
||||
return value;
|
||||
}
|
||||
@@ -50,7 +79,7 @@ export async function extractText(buffer: Buffer, mimeType: string): Promise<str
|
||||
const sheetText = xlsx.utils.sheet_to_txt(sheet);
|
||||
fullText += sheetText + '\n';
|
||||
}
|
||||
return fullText;
|
||||
return fullText.trim();
|
||||
}
|
||||
|
||||
if (
|
||||
@@ -60,11 +89,54 @@ export async function extractText(buffer: Buffer, mimeType: string): Promise<str
|
||||
) {
|
||||
return buffer.toString('utf-8');
|
||||
}
|
||||
|
||||
return '';
|
||||
} catch (error) {
|
||||
console.error(`Error extracting text from attachment with MIME type ${mimeType}:`, error);
|
||||
return ''; // Return empty string on failure
|
||||
logger.error(`Error extracting text from attachment with MIME type ${mimeType}:`, error);
|
||||
|
||||
// Force garbage collection if available
|
||||
if (global.gc) {
|
||||
global.gc();
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
// Main extraction function
|
||||
export async function extractText(buffer: Buffer, mimeType: string): Promise<string> {
|
||||
// Input validation
|
||||
if (!buffer || buffer.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
console.warn(`Unsupported MIME type for text extraction: ${mimeType}`);
|
||||
return ''; // Return empty string for unsupported types
|
||||
if (!mimeType) {
|
||||
logger.warn('No MIME type provided for text extraction');
|
||||
return '';
|
||||
}
|
||||
|
||||
// General size limit
|
||||
const maxSize = process.env.TIKA_URL ? 100 * 1024 * 1024 : 50 * 1024 * 1024; // 100MB for Tika, 50MB for Legacy
|
||||
if (buffer.length > maxSize) {
|
||||
logger.warn(`File too large for text extraction: ${buffer.length} bytes (limit: ${maxSize})`);
|
||||
return '';
|
||||
}
|
||||
|
||||
// Decide between Tika and legacy
|
||||
const tikaUrl = process.env.TIKA_URL;
|
||||
|
||||
if (tikaUrl) {
|
||||
// Tika decides what it can parse
|
||||
logger.debug(`Using Tika for text extraction: ${mimeType}`);
|
||||
const ocrService = new OcrService()
|
||||
try {
|
||||
return await ocrService.extractTextWithTika(buffer, mimeType);
|
||||
} catch (error) {
|
||||
logger.error({ error }, "OCR text extraction failed, returning empty string")
|
||||
return ''
|
||||
}
|
||||
} else {
|
||||
// extract using legacy mode
|
||||
return await extractTextLegacy(buffer, mimeType);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,14 +3,15 @@ import { IndexingService } from '../../services/IndexingService';
|
||||
import { SearchService } from '../../services/SearchService';
|
||||
import { StorageService } from '../../services/StorageService';
|
||||
import { DatabaseService } from '../../services/DatabaseService';
|
||||
import { PendingEmail } from '@open-archiver/types';
|
||||
|
||||
const searchService = new SearchService();
|
||||
const storageService = new StorageService();
|
||||
const databaseService = new DatabaseService();
|
||||
const indexingService = new IndexingService(databaseService, searchService, storageService);
|
||||
|
||||
export default async function (job: Job<{ emailId: string }>) {
|
||||
const { emailId } = job.data;
|
||||
console.log(`Indexing email with ID: ${emailId}`);
|
||||
await indexingService.indexEmailById(emailId);
|
||||
export default async function (job: Job<{ emails: PendingEmail[] }>) {
|
||||
const { emails } = job.data;
|
||||
console.log(`Indexing email batch with ${emails.length} emails`);
|
||||
await indexingService.indexEmailBatch(emails);
|
||||
}
|
||||
@@ -1,9 +1,19 @@
|
||||
import { Job } from 'bullmq';
|
||||
import { IProcessMailboxJob, SyncState, ProcessMailboxError } from '@open-archiver/types';
|
||||
import {
|
||||
IProcessMailboxJob,
|
||||
SyncState,
|
||||
ProcessMailboxError,
|
||||
PendingEmail,
|
||||
} from '@open-archiver/types';
|
||||
import { IngestionService } from '../../services/IngestionService';
|
||||
import { logger } from '../../config/logger';
|
||||
import { EmailProviderFactory } from '../../services/EmailProviderFactory';
|
||||
import { StorageService } from '../../services/StorageService';
|
||||
import { IndexingService } from '../../services/IndexingService';
|
||||
import { SearchService } from '../../services/SearchService';
|
||||
import { DatabaseService } from '../../services/DatabaseService';
|
||||
import { config } from '../../config';
|
||||
|
||||
|
||||
/**
|
||||
* This processor handles the ingestion of emails for a single user's mailbox.
|
||||
@@ -15,9 +25,16 @@ import { StorageService } from '../../services/StorageService';
|
||||
*/
|
||||
export const processMailboxProcessor = async (job: Job<IProcessMailboxJob, SyncState, string>) => {
|
||||
const { ingestionSourceId, userEmail } = job.data;
|
||||
const BATCH_SIZE: number = config.meili.indexingBatchSize;
|
||||
let emailBatch: PendingEmail[] = [];
|
||||
|
||||
logger.info({ ingestionSourceId, userEmail }, `Processing mailbox for user`);
|
||||
|
||||
const searchService = new SearchService();
|
||||
const storageService = new StorageService();
|
||||
const databaseService = new DatabaseService();
|
||||
const indexingService = new IndexingService(databaseService, searchService, storageService);
|
||||
|
||||
try {
|
||||
const source = await IngestionService.findById(ingestionSourceId);
|
||||
if (!source) {
|
||||
@@ -26,22 +43,38 @@ export const processMailboxProcessor = async (job: Job<IProcessMailboxJob, SyncS
|
||||
|
||||
const connector = EmailProviderFactory.createConnector(source);
|
||||
const ingestionService = new IngestionService();
|
||||
const storageService = new StorageService();
|
||||
|
||||
// Pass the sync state for the entire source, the connector will handle per-user logic if necessary
|
||||
for await (const email of connector.fetchEmails(userEmail, source.syncState)) {
|
||||
if (email) {
|
||||
await ingestionService.processEmail(email, source, storageService, userEmail);
|
||||
const processedEmail = await ingestionService.processEmail(
|
||||
email,
|
||||
source,
|
||||
storageService,
|
||||
userEmail
|
||||
);
|
||||
if (processedEmail) {
|
||||
emailBatch.push(processedEmail);
|
||||
if (emailBatch.length >= BATCH_SIZE) {
|
||||
await indexingService.indexEmailBatch(emailBatch);
|
||||
emailBatch = [];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (emailBatch.length > 0) {
|
||||
await indexingService.indexEmailBatch(emailBatch);
|
||||
emailBatch = [];
|
||||
}
|
||||
|
||||
const newSyncState = connector.getUpdatedSyncState(userEmail);
|
||||
|
||||
logger.info({ ingestionSourceId, userEmail }, `Finished processing mailbox for user`);
|
||||
|
||||
// Return the new sync state to be aggregated by the parent flow
|
||||
return newSyncState;
|
||||
} catch (error) {
|
||||
if (emailBatch.length > 0) {
|
||||
await indexingService.indexEmailBatch(emailBatch);
|
||||
}
|
||||
|
||||
logger.error({ err: error, ingestionSourceId, userEmail }, 'Error processing mailbox');
|
||||
const errorMessage = error instanceof Error ? error.message : 'An unknown error occurred';
|
||||
const processMailboxError: ProcessMailboxError = {
|
||||
|
||||
@@ -8,6 +8,7 @@ const scheduleContinuousSync = async () => {
|
||||
'schedule-continuous-sync',
|
||||
{},
|
||||
{
|
||||
jobId: 'schedule-continuous-sync',
|
||||
repeat: {
|
||||
pattern: config.app.syncFrequency,
|
||||
},
|
||||
|
||||
@@ -5,6 +5,7 @@ import type {
|
||||
GenericImapCredentials,
|
||||
PSTImportCredentials,
|
||||
EMLImportCredentials,
|
||||
MboxImportCredentials,
|
||||
EmailObject,
|
||||
SyncState,
|
||||
MailboxUser,
|
||||
@@ -14,6 +15,7 @@ import { MicrosoftConnector } from './ingestion-connectors/MicrosoftConnector';
|
||||
import { ImapConnector } from './ingestion-connectors/ImapConnector';
|
||||
import { PSTConnector } from './ingestion-connectors/PSTConnector';
|
||||
import { EMLConnector } from './ingestion-connectors/EMLConnector';
|
||||
import { MboxConnector } from './ingestion-connectors/MboxConnector';
|
||||
|
||||
// Define a common interface for all connectors
|
||||
export interface IEmailConnector {
|
||||
@@ -43,6 +45,8 @@ export class EmailProviderFactory {
|
||||
return new PSTConnector(credentials as PSTImportCredentials);
|
||||
case 'eml_import':
|
||||
return new EMLConnector(credentials as EMLImportCredentials);
|
||||
case 'mbox_import':
|
||||
return new MboxConnector(credentials as MboxImportCredentials);
|
||||
default:
|
||||
throw new Error(`Unsupported provider: ${source.provider}`);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import { Attachment, EmailAddress, EmailDocument, EmailObject } from '@open-archiver/types';
|
||||
import {
|
||||
Attachment,
|
||||
EmailAddress,
|
||||
EmailDocument,
|
||||
EmailObject,
|
||||
PendingEmail,
|
||||
} from '@open-archiver/types';
|
||||
import { SearchService } from './SearchService';
|
||||
import { StorageService } from './StorageService';
|
||||
import { extractText } from '../helpers/textExtractor';
|
||||
@@ -7,6 +13,7 @@ import { archivedEmails, attachments, emailAttachments } from '../database/schem
|
||||
import { eq } from 'drizzle-orm';
|
||||
import { streamToBuffer } from '../helpers/streamToBuffer';
|
||||
import { simpleParser } from 'mailparser';
|
||||
import { logger } from '../config/logger';
|
||||
|
||||
interface DbRecipients {
|
||||
to: { name: string; address: string }[];
|
||||
@@ -20,14 +27,45 @@ type AttachmentsType = {
|
||||
mimeType: string;
|
||||
}[];
|
||||
|
||||
/**
|
||||
* Sanitizes text content by removing invalid characters that could cause JSON serialization issues
|
||||
*/
|
||||
function sanitizeText(text: string): string {
|
||||
if (!text) return '';
|
||||
|
||||
// Remove control characters and invalid UTF-8 sequences
|
||||
return text
|
||||
.replace(/\uFFFD/g, '') // Replacement character for invalid UTF-8 sequences
|
||||
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') // Remove control characters
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively sanitize all string values in an object to prevent JSON issues
|
||||
*/
|
||||
function sanitizeObject<T>(obj: T): T {
|
||||
if (typeof obj === 'string') {
|
||||
return sanitizeText(obj) as unknown as T;
|
||||
} else if (Array.isArray(obj)) {
|
||||
return obj.map(sanitizeObject) as unknown as T;
|
||||
} else if (obj !== null && typeof obj === 'object') {
|
||||
const sanitized: any = {};
|
||||
for (const key in obj) {
|
||||
if (Object.prototype.hasOwnProperty.call(obj, key)) {
|
||||
sanitized[key] = sanitizeObject((obj as any)[key]);
|
||||
}
|
||||
}
|
||||
return sanitized;
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
|
||||
export class IndexingService {
|
||||
private dbService: DatabaseService;
|
||||
private searchService: SearchService;
|
||||
private storageService: StorageService;
|
||||
|
||||
/**
|
||||
* Initializes the service with its dependencies.
|
||||
*/
|
||||
constructor(
|
||||
dbService: DatabaseService,
|
||||
searchService: SearchService,
|
||||
@@ -39,9 +77,129 @@ export class IndexingService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches an email by its ID from the database, creates a search document, and indexes it.
|
||||
* Index multiple emails in a single batch operation for better performance
|
||||
*/
|
||||
public async indexEmailById(emailId: string): Promise<void> {
|
||||
public async indexEmailBatch(emails: PendingEmail[]): Promise<void> {
|
||||
if (emails.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info({ batchSize: emails.length }, 'Starting batch indexing of emails');
|
||||
|
||||
try {
|
||||
const CONCURRENCY_LIMIT = 10;
|
||||
const rawDocuments: EmailDocument[] = [];
|
||||
|
||||
for (let i = 0; i < emails.length; i += CONCURRENCY_LIMIT) {
|
||||
const batch = emails.slice(i, i + CONCURRENCY_LIMIT);
|
||||
|
||||
const batchDocuments = await Promise.allSettled(
|
||||
batch.map(async ({ email, sourceId, archivedId }) => {
|
||||
try {
|
||||
return await this.createEmailDocumentFromRawForBatch(
|
||||
email,
|
||||
sourceId,
|
||||
archivedId,
|
||||
email.userEmail || ''
|
||||
);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
{
|
||||
emailId: archivedId,
|
||||
sourceId,
|
||||
userEmail: email.userEmail || '',
|
||||
rawEmailData: JSON.stringify(email, null, 2),
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
'Failed to create document for email in batch'
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
for (const result of batchDocuments) {
|
||||
if (result.status === 'fulfilled') {
|
||||
rawDocuments.push(result.value);
|
||||
} else {
|
||||
logger.error({ error: result.reason }, 'Failed to process email in batch');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rawDocuments.length === 0) {
|
||||
logger.warn('No documents created from email batch');
|
||||
return;
|
||||
}
|
||||
|
||||
// Sanitize all documents
|
||||
const sanitizedDocuments = rawDocuments.map((doc) => sanitizeObject(doc));
|
||||
|
||||
// Ensure all required fields are present
|
||||
const completeDocuments = sanitizedDocuments.map((doc) =>
|
||||
this.ensureEmailDocumentFields(doc)
|
||||
);
|
||||
|
||||
// Validate each document and separate valid from invalid ones
|
||||
const validDocuments: EmailDocument[] = [];
|
||||
const invalidDocuments: { doc: any; reason: string }[] = [];
|
||||
|
||||
for (const doc of completeDocuments) {
|
||||
if (this.isValidEmailDocument(doc)) {
|
||||
validDocuments.push(doc);
|
||||
} else {
|
||||
invalidDocuments.push({ doc, reason: 'JSON.stringify failed' });
|
||||
logger.warn({ document: doc }, 'Skipping invalid EmailDocument');
|
||||
}
|
||||
}
|
||||
|
||||
// Log detailed information for invalid documents
|
||||
if (invalidDocuments.length > 0) {
|
||||
for (const { doc } of invalidDocuments) {
|
||||
logger.error(
|
||||
{
|
||||
emailId: doc.id,
|
||||
document: JSON.stringify(doc, null, 2),
|
||||
},
|
||||
'Invalid EmailDocument details'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (validDocuments.length === 0) {
|
||||
logger.warn('No valid documents to index in batch.');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.debug({ documentCount: validDocuments.length }, 'Sending batch to Meilisearch');
|
||||
|
||||
await this.searchService.addDocuments('emails', validDocuments, 'id');
|
||||
|
||||
logger.info(
|
||||
{
|
||||
batchSize: emails.length,
|
||||
successfulDocuments: validDocuments.length,
|
||||
failedDocuments: emails.length - validDocuments.length,
|
||||
invalidDocuments: invalidDocuments.length,
|
||||
},
|
||||
'Successfully indexed email batch'
|
||||
);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
{
|
||||
batchSize: emails.length,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
'Failed to index email batch'
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
*/
|
||||
private async indexEmailById(emailId: string): Promise<void> {
|
||||
const email = await this.dbService.db.query.archivedEmails.findFirst({
|
||||
where: eq(archivedEmails.id, emailId),
|
||||
});
|
||||
@@ -75,16 +233,14 @@ export class IndexingService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Indexes an email object directly, creates a search document, and indexes it.
|
||||
* @deprecated
|
||||
*/
|
||||
public async indexByEmail(
|
||||
email: EmailObject,
|
||||
ingestionSourceId: string,
|
||||
archivedEmailId: string
|
||||
private async indexByEmail(
|
||||
pendingEmail: PendingEmail
|
||||
): Promise<void> {
|
||||
const attachments: AttachmentsType = [];
|
||||
if (email.attachments && email.attachments.length > 0) {
|
||||
for (const attachment of email.attachments) {
|
||||
if (pendingEmail.email.attachments && pendingEmail.email.attachments.length > 0) {
|
||||
for (const attachment of pendingEmail.email.attachments) {
|
||||
attachments.push({
|
||||
buffer: attachment.content,
|
||||
filename: attachment.filename,
|
||||
@@ -93,19 +249,96 @@ export class IndexingService {
|
||||
}
|
||||
}
|
||||
const document = await this.createEmailDocumentFromRaw(
|
||||
email,
|
||||
pendingEmail.email,
|
||||
attachments,
|
||||
ingestionSourceId,
|
||||
archivedEmailId,
|
||||
email.userEmail || ''
|
||||
pendingEmail.sourceId,
|
||||
pendingEmail.archivedId,
|
||||
pendingEmail.email.userEmail || ''
|
||||
);
|
||||
// console.log(document);
|
||||
await this.searchService.addDocuments('emails', [document], 'id');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a search document from a raw email object and its attachments.
|
||||
*/
|
||||
private async createEmailDocumentFromRawForBatch(
|
||||
email: EmailObject,
|
||||
ingestionSourceId: string,
|
||||
archivedEmailId: string,
|
||||
userEmail: string
|
||||
): Promise<EmailDocument> {
|
||||
const extractedAttachments: { filename: string; content: string }[] = [];
|
||||
|
||||
if (email.attachments && email.attachments.length > 0) {
|
||||
const ATTACHMENT_CONCURRENCY = 3;
|
||||
|
||||
for (let i = 0; i < email.attachments.length; i += ATTACHMENT_CONCURRENCY) {
|
||||
const attachmentBatch = email.attachments.slice(i, i + ATTACHMENT_CONCURRENCY);
|
||||
|
||||
const attachmentResults = await Promise.allSettled(
|
||||
attachmentBatch.map(async (attachment) => {
|
||||
try {
|
||||
if (!this.shouldExtractText(attachment.contentType)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const textContent = await extractText(
|
||||
attachment.content,
|
||||
attachment.contentType || ''
|
||||
);
|
||||
|
||||
return {
|
||||
filename: attachment.filename,
|
||||
content: textContent || '',
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn(
|
||||
{
|
||||
filename: attachment.filename,
|
||||
mimeType: attachment.contentType,
|
||||
emailId: archivedEmailId,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
'Failed to extract text from attachment'
|
||||
);
|
||||
return null;
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
for (const result of attachmentResults) {
|
||||
if (result.status === 'fulfilled' && result.value) {
|
||||
extractedAttachments.push(result.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const allAttachmentText = extractedAttachments
|
||||
.map((att) => sanitizeText(att.content))
|
||||
.join(' ');
|
||||
|
||||
const enhancedBody = [sanitizeText(email.body || email.html || ''), allAttachmentText]
|
||||
.filter(Boolean)
|
||||
.join('\n\n--- Attachments ---\n\n');
|
||||
|
||||
return {
|
||||
id: archivedEmailId,
|
||||
userEmail: userEmail,
|
||||
from: email.from[0]?.address || '',
|
||||
to: email.to?.map((addr: EmailAddress) => addr.address) || [],
|
||||
cc: email.cc?.map((addr: EmailAddress) => addr.address) || [],
|
||||
bcc: email.bcc?.map((addr: EmailAddress) => addr.address) || [],
|
||||
subject: email.subject || '',
|
||||
body: enhancedBody,
|
||||
attachments: extractedAttachments,
|
||||
timestamp: new Date(email.receivedAt).getTime(),
|
||||
ingestionSourceId: ingestionSourceId,
|
||||
};
|
||||
}
|
||||
|
||||
private async createEmailDocumentFromRaw(
|
||||
email: EmailObject,
|
||||
attachments: AttachmentsType,
|
||||
@@ -126,7 +359,6 @@ export class IndexingService {
|
||||
`Failed to extract text from attachment: ${attachment.filename}`,
|
||||
error
|
||||
);
|
||||
// skip attachment or fail the job
|
||||
}
|
||||
}
|
||||
// console.log('email.userEmail', userEmail);
|
||||
@@ -145,9 +377,6 @@ export class IndexingService {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a search document from a database email record and its attachments.
|
||||
*/
|
||||
private async createEmailDocument(
|
||||
email: typeof archivedEmails.$inferSelect,
|
||||
attachments: Attachment[],
|
||||
@@ -181,9 +410,6 @@ export class IndexingService {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts text content from a list of attachments.
|
||||
*/
|
||||
private async extractAttachmentContents(
|
||||
attachments: Attachment[]
|
||||
): Promise<{ filename: string; content: string }[]> {
|
||||
@@ -202,9 +428,90 @@ export class IndexingService {
|
||||
`Failed to extract text from attachment: ${attachment.filename}`,
|
||||
error
|
||||
);
|
||||
// skip attachment or fail the job
|
||||
}
|
||||
}
|
||||
return extractedAttachments;
|
||||
}
|
||||
|
||||
private shouldExtractText(mimeType: string): boolean {
|
||||
if (process.env.TIKA_URL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!mimeType) return false;
|
||||
// Tika supported mime types: https://tika.apache.org/2.4.1/formats.html
|
||||
const extractableTypes = [
|
||||
'application/pdf',
|
||||
'application/msword',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.ms-excel',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'application/vnd.ms-powerpoint',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'text/plain',
|
||||
'text/html',
|
||||
'application/rss+xml',
|
||||
'application/xml',
|
||||
'application/json',
|
||||
'text/rtf',
|
||||
'application/rtf',
|
||||
'text/csv',
|
||||
'text/tsv',
|
||||
'application/csv',
|
||||
'image/bpg',
|
||||
'image/png',
|
||||
'image/vnd.wap.wbmp',
|
||||
'image/x-jbig2',
|
||||
'image/bmp',
|
||||
'image/x-xcf',
|
||||
'image/gif',
|
||||
'image/x-icon',
|
||||
'image/jpeg',
|
||||
'image/x-ms-bmp',
|
||||
'image/webp',
|
||||
'image/tiff',
|
||||
'image/svg+xml',
|
||||
'application/vnd.apple.pages',
|
||||
'application/vnd.apple.numbers',
|
||||
'application/vnd.apple.keynote',
|
||||
'image/heic',
|
||||
'image/heif',
|
||||
];
|
||||
|
||||
|
||||
|
||||
return extractableTypes.some((type) => mimeType.toLowerCase().includes(type));
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures all required fields are present in EmailDocument
|
||||
*/
|
||||
private ensureEmailDocumentFields(doc: Partial<EmailDocument>): EmailDocument {
|
||||
return {
|
||||
id: doc.id || 'missing-id',
|
||||
userEmail: doc.userEmail || 'unknown',
|
||||
from: doc.from || '',
|
||||
to: Array.isArray(doc.to) ? doc.to : [],
|
||||
cc: Array.isArray(doc.cc) ? doc.cc : [],
|
||||
bcc: Array.isArray(doc.bcc) ? doc.bcc : [],
|
||||
subject: doc.subject || '',
|
||||
body: doc.body || '',
|
||||
attachments: Array.isArray(doc.attachments) ? doc.attachments : [],
|
||||
timestamp: typeof doc.timestamp === 'number' ? doc.timestamp : Date.now(),
|
||||
ingestionSourceId: doc.ingestionSourceId || 'unknown',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if the given object is a valid EmailDocument that can be serialized to JSON
|
||||
*/
|
||||
private isValidEmailDocument(doc: any): boolean {
|
||||
try {
|
||||
JSON.stringify(doc);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error({ doc, error: (error as Error).message }, 'Invalid EmailDocument detected');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import type {
|
||||
IngestionSource,
|
||||
IngestionCredentials,
|
||||
IngestionProvider,
|
||||
PendingEmail,
|
||||
} from '@open-archiver/types';
|
||||
import { and, desc, eq } from 'drizzle-orm';
|
||||
import { CryptoService } from './CryptoService';
|
||||
@@ -26,6 +27,7 @@ import { SearchService } from './SearchService';
|
||||
import { DatabaseService } from './DatabaseService';
|
||||
import { config } from '../config/index';
|
||||
import { FilterBuilder } from './FilterBuilder';
|
||||
import e from 'express';
|
||||
|
||||
export class IngestionService {
|
||||
private static decryptSource(
|
||||
@@ -47,7 +49,7 @@ export class IngestionService {
|
||||
}
|
||||
|
||||
public static returnFileBasedIngestions(): IngestionProvider[] {
|
||||
return ['pst_import', 'eml_import'];
|
||||
return ['pst_import', 'eml_import', 'mbox_import'];
|
||||
}
|
||||
|
||||
public static async create(
|
||||
@@ -76,9 +78,13 @@ export class IngestionService {
|
||||
const connector = EmailProviderFactory.createConnector(decryptedSource);
|
||||
|
||||
try {
|
||||
await connector.testConnection();
|
||||
const connectionValid = await connector.testConnection();
|
||||
// If connection succeeds, update status to auth_success, which triggers the initial import.
|
||||
return await this.update(decryptedSource.id, { status: 'auth_success' });
|
||||
if (connectionValid) {
|
||||
return await this.update(decryptedSource.id, { status: 'auth_success' });
|
||||
} else {
|
||||
throw Error('Ingestion authentication failed.')
|
||||
}
|
||||
} catch (error) {
|
||||
// If connection fails, delete the newly created source and throw the error.
|
||||
await this.delete(decryptedSource.id);
|
||||
@@ -297,7 +303,7 @@ export class IngestionService {
|
||||
source: IngestionSource,
|
||||
storage: StorageService,
|
||||
userEmail: string
|
||||
): Promise<void> {
|
||||
): Promise<PendingEmail | null> {
|
||||
try {
|
||||
// Generate a unique message ID for the email. If the email already has a message-id header, use that.
|
||||
// Otherwise, generate a new one based on the email's hash, source ID, and email ID.
|
||||
@@ -326,7 +332,7 @@ export class IngestionService {
|
||||
{ messageId, ingestionSourceId: source.id },
|
||||
'Skipping duplicate email'
|
||||
);
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
const emlBuffer = email.eml ?? Buffer.from(email.body, 'utf-8');
|
||||
@@ -393,23 +399,14 @@ export class IngestionService {
|
||||
.onConflictDoNothing();
|
||||
}
|
||||
}
|
||||
// adding to indexing queue
|
||||
//Instead: index by email (raw email object, ingestion id)
|
||||
logger.info({ emailId: archivedEmail.id }, 'Indexing email');
|
||||
// await indexingQueue.add('index-email', {
|
||||
// emailId: archivedEmail.id,
|
||||
// });
|
||||
const searchService = new SearchService();
|
||||
const storageService = new StorageService();
|
||||
const databaseService = new DatabaseService();
|
||||
const indexingService = new IndexingService(
|
||||
databaseService,
|
||||
searchService,
|
||||
storageService
|
||||
);
|
||||
//assign userEmail
|
||||
|
||||
email.userEmail = userEmail;
|
||||
await indexingService.indexByEmail(email, source.id, archivedEmail.id);
|
||||
|
||||
return {
|
||||
email,
|
||||
sourceId: source.id,
|
||||
archivedId: archivedEmail.id,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({
|
||||
message: `Failed to process email ${email.id} for source ${source.id}`,
|
||||
@@ -417,6 +414,7 @@ export class IngestionService {
|
||||
emailId: email.id,
|
||||
ingestionSourceId: source.id,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
271
packages/backend/src/services/OcrService.ts
Normal file
271
packages/backend/src/services/OcrService.ts
Normal file
@@ -0,0 +1,271 @@
|
||||
import crypto from 'crypto';
|
||||
import { logger } from '../config/logger';
|
||||
|
||||
// Simple LRU cache for Tika results with statistics
|
||||
class TikaCache {
|
||||
private cache = new Map<string, string>();
|
||||
private maxSize = 50;
|
||||
private hits = 0;
|
||||
private misses = 0;
|
||||
|
||||
get(key: string): string | undefined {
|
||||
const value = this.cache.get(key);
|
||||
if (value !== undefined) {
|
||||
this.hits++;
|
||||
// LRU: Move element to the end
|
||||
this.cache.delete(key);
|
||||
this.cache.set(key, value);
|
||||
} else {
|
||||
this.misses++;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
set(key: string, value: string): void {
|
||||
// If already exists, delete first
|
||||
if (this.cache.has(key)) {
|
||||
this.cache.delete(key);
|
||||
}
|
||||
// If cache is full, remove oldest element
|
||||
else if (this.cache.size >= this.maxSize) {
|
||||
const firstKey = this.cache.keys().next().value;
|
||||
if (firstKey !== undefined) {
|
||||
this.cache.delete(firstKey);
|
||||
}
|
||||
}
|
||||
|
||||
this.cache.set(key, value);
|
||||
}
|
||||
|
||||
getStats(): { size: number; maxSize: number; hits: number; misses: number; hitRate: number } {
|
||||
const total = this.hits + this.misses;
|
||||
const hitRate = total > 0 ? (this.hits / total) * 100 : 0;
|
||||
return {
|
||||
size: this.cache.size,
|
||||
maxSize: this.maxSize,
|
||||
hits: this.hits,
|
||||
misses: this.misses,
|
||||
hitRate: Math.round(hitRate * 100) / 100 // 2 decimal places
|
||||
};
|
||||
}
|
||||
|
||||
reset(): void {
|
||||
this.cache.clear();
|
||||
this.hits = 0;
|
||||
this.misses = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Semaphore for running Tika requests
|
||||
class TikaSemaphore {
|
||||
private inProgress = new Map<string, Promise<string>>();
|
||||
private waitCount = 0;
|
||||
|
||||
async acquire(key: string, operation: () => Promise<string>): Promise<string> {
|
||||
// Check if a request for this key is already running
|
||||
const existingPromise = this.inProgress.get(key);
|
||||
if (existingPromise) {
|
||||
this.waitCount++;
|
||||
logger.debug(`Waiting for in-progress Tika request (${key.slice(0, 8)}...)`);
|
||||
try {
|
||||
return await existingPromise;
|
||||
} finally {
|
||||
this.waitCount--;
|
||||
}
|
||||
}
|
||||
|
||||
// Start new request
|
||||
const promise = this.executeOperation(key, operation);
|
||||
this.inProgress.set(key, promise);
|
||||
|
||||
try {
|
||||
return await promise;
|
||||
} finally {
|
||||
// Remove promise from map when finished
|
||||
this.inProgress.delete(key);
|
||||
}
|
||||
}
|
||||
|
||||
private async executeOperation(key: string, operation: () => Promise<string>): Promise<string> {
|
||||
try {
|
||||
return await operation();
|
||||
} catch (error) {
|
||||
// Remove promise from map even on errors
|
||||
logger.error(`Tika operation failed for key ${key.slice(0, 8)}...`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
getStats(): { inProgress: number; waitCount: number } {
|
||||
return {
|
||||
inProgress: this.inProgress.size,
|
||||
waitCount: this.waitCount
|
||||
};
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.inProgress.clear();
|
||||
this.waitCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
export class OcrService {
|
||||
private tikaCache = new TikaCache();
|
||||
private tikaSemaphore = new TikaSemaphore();
|
||||
|
||||
// Tika-based text extraction with cache and semaphore
|
||||
async extractTextWithTika(buffer: Buffer, mimeType: string): Promise<string> {
|
||||
const tikaUrl = process.env.TIKA_URL;
|
||||
if (!tikaUrl) {
|
||||
throw new Error('TIKA_URL environment variable not set');
|
||||
}
|
||||
|
||||
// Cache key: SHA-256 hash of the buffer
|
||||
const hash = crypto.createHash('sha256').update(buffer).digest('hex');
|
||||
|
||||
// Cache lookup (before semaphore!)
|
||||
const cachedResult = this.tikaCache.get(hash);
|
||||
if (cachedResult !== undefined) {
|
||||
logger.debug(`Tika cache hit for ${mimeType} (${buffer.length} bytes)`);
|
||||
return cachedResult;
|
||||
}
|
||||
|
||||
// Use semaphore to deduplicate parallel requests
|
||||
return await this.tikaSemaphore.acquire(hash, async () => {
|
||||
// Check cache again (might have been filled by parallel request)
|
||||
const cachedAfterWait = this.tikaCache.get(hash);
|
||||
if (cachedAfterWait !== undefined) {
|
||||
logger.debug(`Tika cache hit after wait for ${mimeType} (${buffer.length} bytes)`);
|
||||
return cachedAfterWait;
|
||||
}
|
||||
|
||||
logger.debug(`Executing Tika request for ${mimeType} (${buffer.length} bytes)`);
|
||||
|
||||
// DNS fallback: If "tika" hostname, also try localhost
|
||||
const urlsToTry = [
|
||||
`${tikaUrl}/tika`,
|
||||
// Fallback falls DNS-Problem mit "tika" hostname
|
||||
...(tikaUrl.includes('://tika:')
|
||||
? [`${tikaUrl.replace('://tika:', '://localhost:')}/tika`]
|
||||
: [])
|
||||
];
|
||||
|
||||
for (const url of urlsToTry) {
|
||||
try {
|
||||
logger.debug(`Trying Tika URL: ${url}`);
|
||||
const response = await fetch(url, {
|
||||
method: 'PUT',
|
||||
headers: {
|
||||
'Content-Type': mimeType || 'application/octet-stream',
|
||||
Accept: 'text/plain',
|
||||
Connection: 'close'
|
||||
},
|
||||
body: buffer,
|
||||
signal: AbortSignal.timeout(180000)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
logger.warn(
|
||||
`Tika extraction failed at ${url}: ${response.status} ${response.statusText}`
|
||||
);
|
||||
continue; // Try next URL
|
||||
}
|
||||
|
||||
const text = await response.text();
|
||||
const result = text.trim();
|
||||
|
||||
// Cache result (also empty strings to avoid repeated attempts)
|
||||
this.tikaCache.set(hash, result);
|
||||
|
||||
const cacheStats = this.tikaCache.getStats();
|
||||
const semaphoreStats = this.tikaSemaphore.getStats();
|
||||
logger.debug(
|
||||
`Tika extraction successful - Cache: ${cacheStats.hits}H/${cacheStats.misses}M (${cacheStats.hitRate}%) - Semaphore: ${semaphoreStats.inProgress} active, ${semaphoreStats.waitCount} waiting`
|
||||
);
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.warn(
|
||||
`Tika extraction error at ${url}:`,
|
||||
error instanceof Error ? error.message : 'Unknown error'
|
||||
);
|
||||
// Continue to next URL
|
||||
}
|
||||
}
|
||||
|
||||
// All URLs failed - cache this too (as empty string)
|
||||
logger.error('All Tika URLs failed');
|
||||
this.tikaCache.set(hash, '');
|
||||
return '';
|
||||
});
|
||||
}
|
||||
|
||||
// Helper function to check Tika availability
|
||||
async checkTikaAvailability(): Promise<boolean> {
|
||||
const tikaUrl = process.env.TIKA_URL;
|
||||
if (!tikaUrl) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${tikaUrl}/version`, {
|
||||
method: 'GET',
|
||||
signal: AbortSignal.timeout(5000) // 5 seconds timeout
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const version = await response.text();
|
||||
logger.info(`Tika server available, version: ${version.trim()}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
logger.warn(
|
||||
'Tika server not available:',
|
||||
error instanceof Error ? error.message : 'Unknown error'
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Optional: Tika health check on startup
|
||||
async initializeTextExtractor(): Promise<void> {
|
||||
const tikaUrl = process.env.TIKA_URL;
|
||||
|
||||
if (tikaUrl) {
|
||||
const isAvailable = await this.checkTikaAvailability();
|
||||
if (!isAvailable) {
|
||||
logger.error(`Tika server configured but not available at: ${tikaUrl}`);
|
||||
logger.error('Text extraction will fall back to legacy methods or fail');
|
||||
}
|
||||
} else {
|
||||
logger.info('Using legacy text extraction methods (pdf2json, mammoth, xlsx)');
|
||||
logger.info('Set TIKA_URL environment variable to use Apache Tika for better extraction');
|
||||
}
|
||||
}
|
||||
|
||||
// Get cache statistics
|
||||
getTikaCacheStats(): {
|
||||
size: number;
|
||||
maxSize: number;
|
||||
hits: number;
|
||||
misses: number;
|
||||
hitRate: number;
|
||||
} {
|
||||
return this.tikaCache.getStats();
|
||||
}
|
||||
|
||||
// Get semaphore statistics
|
||||
getTikaSemaphoreStats(): { inProgress: number; waitCount: number } {
|
||||
return this.tikaSemaphore.getStats();
|
||||
}
|
||||
|
||||
// Clear cache (e.g. for tests or manual reset)
|
||||
clearTikaCache(): void {
|
||||
this.tikaCache.reset();
|
||||
this.tikaSemaphore.clear();
|
||||
logger.info('Tika cache and semaphore cleared');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,7 +69,7 @@ export class EMLConnector implements IEmailConnector {
|
||||
syncState?: SyncState | null
|
||||
): AsyncGenerator<EmailObject | null> {
|
||||
const fileStream = await this.storage.get(this.credentials.uploadedFilePath);
|
||||
const tempDir = await fs.mkdtemp(join('/tmp', 'eml-import-'));
|
||||
const tempDir = await fs.mkdtemp(join('/tmp', `eml-import-${new Date().getTime()}`));
|
||||
const unzippedPath = join(tempDir, 'unzipped');
|
||||
await fs.mkdir(unzippedPath);
|
||||
const zipFilePath = join(tempDir, 'eml.zip');
|
||||
@@ -115,6 +115,14 @@ export class EMLConnector implements IEmailConnector {
|
||||
throw error;
|
||||
} finally {
|
||||
await fs.rm(tempDir, { recursive: true, force: true });
|
||||
try {
|
||||
await this.storage.delete(this.credentials.uploadedFilePath);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
{ error, file: this.credentials.uploadedFilePath },
|
||||
'Failed to delete EML file after processing.'
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,174 @@
|
||||
import type {
|
||||
MboxImportCredentials,
|
||||
EmailObject,
|
||||
EmailAddress,
|
||||
SyncState,
|
||||
MailboxUser,
|
||||
} from '@open-archiver/types';
|
||||
import type { IEmailConnector } from '../EmailProviderFactory';
|
||||
import { simpleParser, ParsedMail, Attachment, AddressObject } from 'mailparser';
|
||||
import { logger } from '../../config/logger';
|
||||
import { getThreadId } from './helpers/utils';
|
||||
import { StorageService } from '../StorageService';
|
||||
import { Readable } from 'stream';
|
||||
import { createHash } from 'crypto';
|
||||
import { streamToBuffer } from '../../helpers/streamToBuffer';
|
||||
|
||||
export class MboxConnector implements IEmailConnector {
|
||||
private storage: StorageService;
|
||||
|
||||
constructor(private credentials: MboxImportCredentials) {
|
||||
this.storage = new StorageService();
|
||||
}
|
||||
|
||||
public async testConnection(): Promise<boolean> {
|
||||
try {
|
||||
if (!this.credentials.uploadedFilePath) {
|
||||
throw Error('Mbox file path not provided.');
|
||||
}
|
||||
if (!this.credentials.uploadedFilePath.includes('.mbox')) {
|
||||
throw Error('Provided file is not in the MBOX format.');
|
||||
}
|
||||
const fileExist = await this.storage.exists(this.credentials.uploadedFilePath);
|
||||
if (!fileExist) {
|
||||
throw Error('Mbox file upload not finished yet, please wait.');
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error({ error, credentials: this.credentials }, 'Mbox file validation failed.');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
public async *listAllUsers(): AsyncGenerator<MailboxUser> {
|
||||
const displayName =
|
||||
this.credentials.uploadedFileName || `mbox-import-${new Date().getTime()}`;
|
||||
logger.info(`Found potential mailbox: ${displayName}`);
|
||||
const constructedPrimaryEmail = `${displayName.replace(/ /g, '.').toLowerCase()}@mbox.local`;
|
||||
yield {
|
||||
id: constructedPrimaryEmail,
|
||||
primaryEmail: constructedPrimaryEmail,
|
||||
displayName: displayName,
|
||||
};
|
||||
}
|
||||
|
||||
public async *fetchEmails(
|
||||
userEmail: string,
|
||||
syncState?: SyncState | null
|
||||
): AsyncGenerator<EmailObject | null> {
|
||||
try {
|
||||
const fileStream = await this.storage.get(this.credentials.uploadedFilePath);
|
||||
const fileBuffer = await streamToBuffer(fileStream as Readable);
|
||||
const mboxContent = fileBuffer.toString('utf-8');
|
||||
const emailDelimiter = '\nFrom ';
|
||||
const emails = mboxContent.split(emailDelimiter);
|
||||
|
||||
// The first split part might be empty or part of the first email's header, so we adjust.
|
||||
if (emails.length > 0 && !mboxContent.startsWith('From ')) {
|
||||
emails.shift(); // Adjust if the file doesn't start with "From "
|
||||
}
|
||||
|
||||
logger.info(`Found ${emails.length} potential emails in the mbox file.`);
|
||||
let emailCount = 0;
|
||||
|
||||
for (const email of emails) {
|
||||
try {
|
||||
// Re-add the "From " delimiter for the parser, except for the very first email
|
||||
const emailWithDelimiter =
|
||||
emailCount > 0 || mboxContent.startsWith('From ') ? `From ${email}` : email;
|
||||
const emailBuffer = Buffer.from(emailWithDelimiter, 'utf-8');
|
||||
const emailObject = await this.parseMessage(emailBuffer, '');
|
||||
yield emailObject;
|
||||
emailCount++;
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
{ error, file: this.credentials.uploadedFilePath },
|
||||
'Failed to process a single message from mbox file. Skipping.'
|
||||
);
|
||||
}
|
||||
}
|
||||
logger.info(`Finished processing mbox file. Total emails processed: ${emailCount}`);
|
||||
} finally {
|
||||
try {
|
||||
await this.storage.delete(this.credentials.uploadedFilePath);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
{ error, file: this.credentials.uploadedFilePath },
|
||||
'Failed to delete mbox file after processing.'
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async parseMessage(emlBuffer: Buffer, path: string): Promise<EmailObject> {
|
||||
const parsedEmail: ParsedMail = await simpleParser(emlBuffer);
|
||||
|
||||
const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({
|
||||
filename: attachment.filename || 'untitled',
|
||||
contentType: attachment.contentType,
|
||||
size: attachment.size,
|
||||
content: attachment.content as Buffer,
|
||||
}));
|
||||
|
||||
const mapAddresses = (
|
||||
addresses: AddressObject | AddressObject[] | undefined
|
||||
): EmailAddress[] => {
|
||||
if (!addresses) return [];
|
||||
const addressArray = Array.isArray(addresses) ? addresses : [addresses];
|
||||
return addressArray.flatMap((a) =>
|
||||
a.value.map((v) => ({
|
||||
name: v.name,
|
||||
address: v.address?.replaceAll(`'`, '') || '',
|
||||
}))
|
||||
);
|
||||
};
|
||||
|
||||
const threadId = getThreadId(parsedEmail.headers);
|
||||
let messageId = parsedEmail.messageId;
|
||||
|
||||
if (!messageId) {
|
||||
messageId = `generated-${createHash('sha256').update(emlBuffer).digest('hex')}`;
|
||||
}
|
||||
|
||||
const from = mapAddresses(parsedEmail.from);
|
||||
if (from.length === 0) {
|
||||
from.push({ name: 'No Sender', address: 'No Sender' });
|
||||
}
|
||||
|
||||
// Extract folder path from headers. Mbox files don't have a standard folder structure, so we rely on custom headers added by email clients.
|
||||
// Gmail uses 'X-Gmail-Labels', and other clients like Thunderbird may use 'X-Folder'.
|
||||
const gmailLabels = parsedEmail.headers.get('x-gmail-labels');
|
||||
const folderHeader = parsedEmail.headers.get('x-folder');
|
||||
let finalPath = '';
|
||||
|
||||
if (gmailLabels && typeof gmailLabels === 'string') {
|
||||
// We take the first label as the primary folder.
|
||||
// Gmail labels can be hierarchical, but we'll simplify to the first label.
|
||||
finalPath = gmailLabels.split(',')[0];
|
||||
} else if (folderHeader && typeof folderHeader === 'string') {
|
||||
finalPath = folderHeader;
|
||||
}
|
||||
|
||||
return {
|
||||
id: messageId,
|
||||
threadId: threadId,
|
||||
from,
|
||||
to: mapAddresses(parsedEmail.to),
|
||||
cc: mapAddresses(parsedEmail.cc),
|
||||
bcc: mapAddresses(parsedEmail.bcc),
|
||||
subject: parsedEmail.subject || '',
|
||||
body: parsedEmail.text || '',
|
||||
html: parsedEmail.html || '',
|
||||
headers: parsedEmail.headers,
|
||||
attachments,
|
||||
receivedAt: parsedEmail.date || new Date(),
|
||||
eml: emlBuffer,
|
||||
path: finalPath,
|
||||
};
|
||||
}
|
||||
|
||||
public getUpdatedSyncState(): SyncState {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
@@ -193,6 +193,14 @@ export class PSTConnector implements IEmailConnector {
|
||||
throw error;
|
||||
} finally {
|
||||
pstFile?.close();
|
||||
try {
|
||||
await this.storage.delete(this.credentials.uploadedFilePath);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
{ error, file: this.credentials.uploadedFilePath },
|
||||
'Failed to delete PST file after processing.'
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -273,8 +281,8 @@ export class PSTConnector implements IEmailConnector {
|
||||
emlBuffer ?? Buffer.from(parsedEmail.text || parsedEmail.html || '', 'utf-8')
|
||||
)
|
||||
.digest('hex')}-${createHash('sha256')
|
||||
.update(emlBuffer ?? Buffer.from(msg.subject || '', 'utf-8'))
|
||||
.digest('hex')}-${msg.clientSubmitTime?.getTime()}`;
|
||||
.update(emlBuffer ?? Buffer.from(msg.subject || '', 'utf-8'))
|
||||
.digest('hex')}-${msg.clientSubmitTime?.getTime()}`;
|
||||
}
|
||||
return {
|
||||
id: messageId,
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { Worker } from 'bullmq';
|
||||
import { connection } from '../config/redis';
|
||||
import indexEmailProcessor from '../jobs/processors/index-email.processor';
|
||||
import indexEmailBatchProcessor from '../jobs/processors/index-email-batch.processor';
|
||||
|
||||
const processor = async (job: any) => {
|
||||
switch (job.name) {
|
||||
case 'index-email':
|
||||
return indexEmailProcessor(job);
|
||||
case 'index-email-batch':
|
||||
return indexEmailBatchProcessor(job);
|
||||
default:
|
||||
throw new Error(`Unknown job name: ${job.name}`);
|
||||
}
|
||||
|
||||
@@ -15,13 +15,15 @@
|
||||
"dependencies": {
|
||||
"@iconify/svelte": "^5.0.1",
|
||||
"@open-archiver/types": "workspace:*",
|
||||
"@sveltejs/kit": "^2.16.0",
|
||||
"@sveltejs/kit": "^2.38.1",
|
||||
"bits-ui": "^2.8.10",
|
||||
"clsx": "^2.1.1",
|
||||
"d3-shape": "^3.2.0",
|
||||
"html-entities": "^2.6.0",
|
||||
"jose": "^6.0.1",
|
||||
"lucide-svelte": "^0.525.0",
|
||||
"postal-mime": "^2.4.4",
|
||||
"semver": "^7.7.2",
|
||||
"svelte-persisted-store": "^0.12.0",
|
||||
"sveltekit-i18n": "^2.4.2",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
@@ -35,6 +37,7 @@
|
||||
"@sveltejs/vite-plugin-svelte": "^5.0.0",
|
||||
"@tailwindcss/vite": "^4.0.0",
|
||||
"@types/d3-shape": "^3.1.7",
|
||||
"@types/semver": "^7.7.1",
|
||||
"dotenv": "^17.2.0",
|
||||
"layerchart": "2.0.0-next.27",
|
||||
"mode-watcher": "^1.1.0",
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import PostalMime, { type Email } from 'postal-mime';
|
||||
import type { Buffer } from 'buffer';
|
||||
import { t } from '$lib/translations';
|
||||
import { encode } from 'html-entities';
|
||||
|
||||
let {
|
||||
raw,
|
||||
@@ -18,7 +19,9 @@
|
||||
if (parsedEmail && parsedEmail.html) {
|
||||
return `<base target="_blank" />${parsedEmail.html}`;
|
||||
} else if (parsedEmail && parsedEmail.text) {
|
||||
return `<base target="_blank" />${parsedEmail.text}`;
|
||||
// display raw text email body in html
|
||||
const safeHtmlContent: string = encode(parsedEmail.text);
|
||||
return `<base target="_blank" /><div>${safeHtmlContent.replaceAll('\n', '<br>')}</div>`;
|
||||
} else if (rawHtml) {
|
||||
return `<base target="_blank" />${rawHtml}`;
|
||||
}
|
||||
@@ -52,16 +55,16 @@
|
||||
|
||||
<div class="mt-2 rounded-md border bg-white p-4">
|
||||
{#if isLoading}
|
||||
<p>{$t('components.email_preview.loading')}</p>
|
||||
{:else if emailHtml}
|
||||
<p>{$t('app.components.email_preview.loading')}</p>
|
||||
{:else if emailHtml()}
|
||||
<iframe
|
||||
title={$t('archive.email_preview')}
|
||||
title={$t('app.archive.email_preview')}
|
||||
srcdoc={emailHtml()}
|
||||
class="h-[600px] w-full border-none"
|
||||
></iframe>
|
||||
{:else if raw}
|
||||
<p>{$t('components.email_preview.render_error')}</p>
|
||||
<p>{$t('app.components.email_preview.render_error')}</p>
|
||||
{:else}
|
||||
<p class="text-gray-500">{$t('components.email_preview.not_available')}</p>
|
||||
<p class="text-gray-500">{$t('app.components.email_preview.not_available')}</p>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
@@ -1,18 +1,39 @@
|
||||
<script lang="ts">
|
||||
import { t } from '$lib/translations';
|
||||
import * as Alert from '$lib/components/ui/alert';
|
||||
import { Info } from 'lucide-svelte';
|
||||
|
||||
export let currentVersion: string;
|
||||
export let newVersionInfo: { version: string; description: string; url: string } | null = null;
|
||||
</script>
|
||||
|
||||
<footer class="bg-muted py-6 md:py-0">
|
||||
<div
|
||||
class="container mx-auto flex flex-col items-center justify-center gap-4 md:h-24 md:flex-row"
|
||||
>
|
||||
<div class="container mx-auto flex flex-col items-center justify-center gap-4 py-8 md:flex-row">
|
||||
<div class="flex flex-col items-center gap-2">
|
||||
{#if newVersionInfo}
|
||||
<Alert.Root>
|
||||
<Alert.Title class="flex items-center gap-2">
|
||||
<Info class="h-4 w-4" />
|
||||
{$t('app.components.footer.new_version_available')}
|
||||
<a
|
||||
href={newVersionInfo.url}
|
||||
target="_blank"
|
||||
class=" text-muted-foreground underline"
|
||||
>
|
||||
{newVersionInfo.description}
|
||||
</a>
|
||||
</Alert.Title>
|
||||
</Alert.Root>
|
||||
{/if}
|
||||
<p class="text-balance text-center text-xs font-medium leading-loose">
|
||||
© {new Date().getFullYear()}
|
||||
<a href="https://openarchiver.com/" target="_blank">Open Archiver</a>. {$t(
|
||||
'app.components.footer.all_rights_reserved'
|
||||
)}
|
||||
</p>
|
||||
<p class="text-balance text-center text-xs font-medium leading-loose">
|
||||
Version: {currentVersion}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
@@ -41,6 +41,10 @@
|
||||
value: 'eml_import',
|
||||
label: $t('app.components.ingestion_source_form.provider_eml_import'),
|
||||
},
|
||||
{
|
||||
value: 'mbox_import',
|
||||
label: $t('app.components.ingestion_source_form.provider_mbox_import'),
|
||||
},
|
||||
];
|
||||
|
||||
let formData: CreateIngestionSourceDto = $state({
|
||||
@@ -55,7 +59,6 @@
|
||||
|
||||
$effect(() => {
|
||||
formData.providerConfig.type = formData.provider;
|
||||
console.log(formData);
|
||||
});
|
||||
|
||||
const triggerContent = $derived(
|
||||
@@ -101,7 +104,6 @@
|
||||
|
||||
formData.providerConfig.uploadedFilePath = result.filePath;
|
||||
formData.providerConfig.uploadedFileName = file.name;
|
||||
|
||||
fileUploading = false;
|
||||
} catch (error) {
|
||||
fileUploading = false;
|
||||
@@ -209,7 +211,7 @@
|
||||
<Input id="username" bind:value={formData.providerConfig.username} class="col-span-3" />
|
||||
</div>
|
||||
<div class="grid grid-cols-4 items-center gap-4">
|
||||
<Label for="password" class="text-left">{$t('auth.password')}</Label>
|
||||
<Label for="password" class="text-left">{$t('app.auth.password')}</Label>
|
||||
<Input
|
||||
id="password"
|
||||
type="password"
|
||||
@@ -224,10 +226,13 @@
|
||||
<Checkbox id="secure" bind:checked={formData.providerConfig.secure} />
|
||||
</div>
|
||||
<div class="grid grid-cols-4 items-center gap-4">
|
||||
<Label for="secure" class="text-left"
|
||||
<Label for="allowInsecureCert" class="text-left"
|
||||
>{$t('app.components.ingestion_source_form.allow_insecure_cert')}</Label
|
||||
>
|
||||
<Checkbox id="secure" bind:checked={formData.providerConfig.allowInsecureCert} />
|
||||
<Checkbox
|
||||
id="allowInsecureCert"
|
||||
bind:checked={formData.providerConfig.allowInsecureCert}
|
||||
/>
|
||||
</div>
|
||||
{:else if formData.provider === 'pst_import'}
|
||||
<div class="grid grid-cols-4 items-center gap-4">
|
||||
@@ -265,6 +270,24 @@
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{:else if formData.provider === 'mbox_import'}
|
||||
<div class="grid grid-cols-4 items-center gap-4">
|
||||
<Label for="mbox-file" class="text-left"
|
||||
>{$t('app.components.ingestion_source_form.mbox_file')}</Label
|
||||
>
|
||||
<div class="col-span-3 flex flex-row items-center space-x-2">
|
||||
<Input
|
||||
id="mbox-file"
|
||||
type="file"
|
||||
class=""
|
||||
accept=".mbox"
|
||||
onchange={handleFileChange}
|
||||
/>
|
||||
{#if fileUploading}
|
||||
<span class=" text-primary animate-spin"><Loader2 /></span>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
{#if formData.provider === 'google_workspace' || formData.provider === 'microsoft_365'}
|
||||
<Alert.Root>
|
||||
|
||||
@@ -163,7 +163,8 @@
|
||||
"not_available": "Raw .eml file not available for this email."
|
||||
},
|
||||
"footer": {
|
||||
"all_rights_reserved": "All rights reserved."
|
||||
"all_rights_reserved": "All rights reserved.",
|
||||
"new_version_available": "New version available"
|
||||
},
|
||||
"ingestion_source_form": {
|
||||
"provider_generic_imap": "Generic IMAP",
|
||||
@@ -171,6 +172,7 @@
|
||||
"provider_microsoft_365": "Microsoft 365",
|
||||
"provider_pst_import": "PST Import",
|
||||
"provider_eml_import": "EML Import",
|
||||
"provider_mbox_import": "Mbox Import",
|
||||
"select_provider": "Select a provider",
|
||||
"service_account_key": "Service Account Key (JSON)",
|
||||
"service_account_key_placeholder": "Paste your service account key JSON content",
|
||||
@@ -186,6 +188,7 @@
|
||||
"allow_insecure_cert": "Allow insecure cert",
|
||||
"pst_file": "PST File",
|
||||
"eml_file": "EML File",
|
||||
"mbox_file": "Mbox File",
|
||||
"heads_up": "Heads up!",
|
||||
"org_wide_warning": "Please note that this is an organization-wide operation. This kind of ingestions will import and index <b>all</b> email inboxes in your organization. If you want to import only specific email inboxes, use the IMAP connector.",
|
||||
"upload_failed": "Upload Failed, please try again"
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
{
|
||||
"app": {
|
||||
"auth": {
|
||||
"login": "Accesso",
|
||||
"login": "Accedi",
|
||||
"login_tip": "Inserisci la tua email qui sotto per accedere al tuo account.",
|
||||
"email": "Email",
|
||||
"password": "Password"
|
||||
},
|
||||
"common": {
|
||||
"working": "In lavorazione"
|
||||
"working": "In corso"
|
||||
},
|
||||
"archive": {
|
||||
"title": "Archivio",
|
||||
"no_subject": "Nessun oggetto",
|
||||
"no_subject": "Nessun Oggetto",
|
||||
"from": "Da",
|
||||
"sent": "Inviato",
|
||||
"recipients": "Destinatari",
|
||||
@@ -20,27 +20,27 @@
|
||||
"folder": "Cartella",
|
||||
"tags": "Tag",
|
||||
"size": "Dimensione",
|
||||
"email_preview": "Anteprima email",
|
||||
"email_preview": "Anteprima Email",
|
||||
"attachments": "Allegati",
|
||||
"download": "Scarica",
|
||||
"actions": "Azioni",
|
||||
"download_eml": "Scarica email (.eml)",
|
||||
"delete_email": "Elimina email",
|
||||
"email_thread": "Thread email",
|
||||
"download_eml": "Scarica Email (.eml)",
|
||||
"delete_email": "Elimina Email",
|
||||
"email_thread": "Thread Email",
|
||||
"delete_confirmation_title": "Sei sicuro di voler eliminare questa email?",
|
||||
"delete_confirmation_description": "Questa azione non può essere annullata ed eliminerà permanentemente l'email e i suoi allegati.",
|
||||
"delete_confirmation_description": "Questa azione non può essere annullata e rimuoverà permanentemente l'email e i suoi allegati.",
|
||||
"deleting": "Eliminazione in corso",
|
||||
"confirm": "Conferma",
|
||||
"cancel": "Annulla",
|
||||
"not_found": "Email non trovata."
|
||||
},
|
||||
"ingestions": {
|
||||
"title": "Fonti di ingestione",
|
||||
"ingestion_sources": "Fonti di ingestione",
|
||||
"bulk_actions": "Azioni di massa",
|
||||
"force_sync": "Forza sincronizzazione",
|
||||
"title": "Sorgenti di Ingestione",
|
||||
"ingestion_sources": "Sorgenti di Ingestione",
|
||||
"bulk_actions": "Azioni di Massa",
|
||||
"force_sync": "Forza Sincronizzazione",
|
||||
"delete": "Elimina",
|
||||
"create_new": "Crea nuovo",
|
||||
"create_new": "Crea Nuovo",
|
||||
"name": "Nome",
|
||||
"provider": "Provider",
|
||||
"status": "Stato",
|
||||
@@ -52,28 +52,28 @@
|
||||
"open_menu": "Apri menu",
|
||||
"edit": "Modifica",
|
||||
"create": "Crea",
|
||||
"ingestion_source": "Fonte di ingestione",
|
||||
"edit_description": "Apporta modifiche alla tua fonte di ingestione qui.",
|
||||
"create_description": "Aggiungi una nuova fonte di ingestione per iniziare ad archiviare le email.",
|
||||
"ingestion_source": "Sorgente di Ingestione",
|
||||
"edit_description": "Apporta modifiche alla tua sorgente di ingestione qui.",
|
||||
"create_description": "Aggiungi una nuova sorgente di ingestione per iniziare ad archiviare le email.",
|
||||
"read": "Leggi",
|
||||
"docs_here": "documenti qui",
|
||||
"delete_confirmation_title": "Sei sicuro di voler eliminare questa ingestione?",
|
||||
"delete_confirmation_description": "Questo eliminerà tutte le email archiviate, gli allegati, l'indicizzazione e i file associati a questa ingestione. Se desideri solo interrompere la sincronizzazione di nuove email, puoi invece mettere in pausa l'ingestione.",
|
||||
"delete_confirmation_description": "Questo cancellerà tutte le email archiviate, gli allegati, l'indicizzazione e i file associati a questa ingestione. Se vuoi solo interrompere la sincronizzazione di nuove email, puoi mettere in pausa l'ingestione.",
|
||||
"deleting": "Eliminazione in corso",
|
||||
"confirm": "Conferma",
|
||||
"cancel": "Annulla",
|
||||
"bulk_delete_confirmation_title": "Sei sicuro di voler eliminare {{count}} ingestioni selezionate?",
|
||||
"bulk_delete_confirmation_description": "Questo eliminerà tutte le email archiviate, gli allegati, l'indicizzazione e i file associati a queste ingestioni. Se desideri solo interrompere la sincronizzazione di nuove email, puoi invece mettere in pausa le ingestioni."
|
||||
"bulk_delete_confirmation_description": "Questo cancellerà tutte le email archiviate, gli allegati, l'indicizzazione e i file associati a queste ingestioni. Se vuoi solo interrompere la sincronizzazione di nuove email, puoi mettere in pausa le ingestioni."
|
||||
},
|
||||
"search": {
|
||||
"title": "Cerca",
|
||||
"description": "Cerca email archiviate.",
|
||||
"email_search": "Ricerca email",
|
||||
"title": "Ricerca",
|
||||
"description": "Ricerca email archiviate.",
|
||||
"email_search": "Ricerca Email",
|
||||
"placeholder": "Cerca per parola chiave, mittente, destinatario...",
|
||||
"search_button": "Cerca",
|
||||
"search_options": "Opzioni di ricerca",
|
||||
"strategy_fuzzy": "Fuzzy",
|
||||
"strategy_verbatim": "Verbatim",
|
||||
"strategy_fuzzy": "Approssimativa",
|
||||
"strategy_verbatim": "Esatta",
|
||||
"strategy_frequency": "Frequenza",
|
||||
"select_strategy": "Seleziona una strategia",
|
||||
"error": "Errore",
|
||||
@@ -87,18 +87,18 @@
|
||||
"next": "Succ"
|
||||
},
|
||||
"roles": {
|
||||
"title": "Gestione ruoli",
|
||||
"role_management": "Gestione ruoli",
|
||||
"create_new": "Crea nuovo",
|
||||
"title": "Gestione Ruoli",
|
||||
"role_management": "Gestione Ruoli",
|
||||
"create_new": "Crea Nuovo",
|
||||
"name": "Nome",
|
||||
"created_at": "Creato il",
|
||||
"actions": "Azioni",
|
||||
"open_menu": "Apri menu",
|
||||
"view_policy": "Visualizza policy",
|
||||
"view_policy": "Visualizza Policy",
|
||||
"edit": "Modifica",
|
||||
"delete": "Elimina",
|
||||
"no_roles_found": "Nessun ruolo trovato.",
|
||||
"role_policy": "Policy ruolo",
|
||||
"role_policy": "Policy Ruolo",
|
||||
"viewing_policy_for_role": "Visualizzazione policy per il ruolo: {{name}}",
|
||||
"create": "Crea",
|
||||
"role": "Ruolo",
|
||||
@@ -111,22 +111,22 @@
|
||||
"cancel": "Annulla"
|
||||
},
|
||||
"system_settings": {
|
||||
"title": "Impostazioni di sistema",
|
||||
"system_settings": "Impostazioni di sistema",
|
||||
"title": "Impostazioni di Sistema",
|
||||
"system_settings": "Impostazioni di Sistema",
|
||||
"description": "Gestisci le impostazioni globali dell'applicazione.",
|
||||
"language": "Lingua",
|
||||
"default_theme": "Tema predefinito",
|
||||
"light": "Chiaro",
|
||||
"dark": "Scuro",
|
||||
"system": "Sistema",
|
||||
"support_email": "Email di supporto",
|
||||
"saving": "Salvataggio",
|
||||
"save_changes": "Salva modifiche"
|
||||
"support_email": "Email di Supporto",
|
||||
"saving": "Salvataggio in corso",
|
||||
"save_changes": "Salva Modifiche"
|
||||
},
|
||||
"users": {
|
||||
"title": "Gestione utenti",
|
||||
"user_management": "Gestione utenti",
|
||||
"create_new": "Crea nuovo",
|
||||
"title": "Gestione Utenti",
|
||||
"user_management": "Gestione Utenti",
|
||||
"create_new": "Crea Nuovo",
|
||||
"name": "Nome",
|
||||
"email": "Email",
|
||||
"role": "Ruolo",
|
||||
@@ -146,33 +146,10 @@
|
||||
"confirm": "Conferma",
|
||||
"cancel": "Annulla"
|
||||
},
|
||||
"setup": {
|
||||
"title": "Configurazione",
|
||||
"description": "Configura l'account amministratore iniziale per Open Archiver.",
|
||||
"welcome": "Benvenuto",
|
||||
"create_admin_account": "Crea il primo account amministratore per iniziare.",
|
||||
"first_name": "Nome",
|
||||
"last_name": "Cognome",
|
||||
"email": "Email",
|
||||
"password": "Password",
|
||||
"creating_account": "Creazione account",
|
||||
"create_account": "Crea account"
|
||||
},
|
||||
"layout": {
|
||||
"dashboard": "Dashboard",
|
||||
"ingestions": "Ingestioni",
|
||||
"archived_emails": "Email archiviate",
|
||||
"search": "Cerca",
|
||||
"settings": "Impostazioni",
|
||||
"system": "Sistema",
|
||||
"users": "Utenti",
|
||||
"roles": "Ruoli",
|
||||
"logout": "Esci"
|
||||
},
|
||||
"components": {
|
||||
"charts": {
|
||||
"emails_ingested": "Email ingerite",
|
||||
"storage_used": "Spazio di archiviazione utilizzato",
|
||||
"emails_ingested": "Email Acquisite",
|
||||
"storage_used": "Spazio di Archiviazione Utilizzato",
|
||||
"emails": "Email"
|
||||
},
|
||||
"common": {
|
||||
@@ -182,35 +159,36 @@
|
||||
},
|
||||
"email_preview": {
|
||||
"loading": "Caricamento anteprima email...",
|
||||
"render_error": "Impossibile visualizzare l'anteprima dell'email.",
|
||||
"not_available": "File .eml non disponibile per questa email."
|
||||
"render_error": "Impossibile renderizzare l'anteprima dell'email.",
|
||||
"not_available": "File .eml grezzo non disponibile per questa email."
|
||||
},
|
||||
"footer": {
|
||||
"all_rights_reserved": "Tutti i diritti riservati."
|
||||
},
|
||||
"ingestion_source_form": {
|
||||
"provider_generic_imap": "IMAP generico",
|
||||
"provider_generic_imap": "IMAP Generico",
|
||||
"provider_google_workspace": "Google Workspace",
|
||||
"provider_microsoft_365": "Microsoft 365",
|
||||
"provider_pst_import": "Importazione PST",
|
||||
"provider_eml_import": "Importazione EML",
|
||||
"select_provider": "Seleziona un provider",
|
||||
"service_account_key": "Chiave account di servizio (JSON)",
|
||||
"service_account_key_placeholder": "Incolla il contenuto JSON della tua chiave account di servizio",
|
||||
"impersonated_admin_email": "Email amministratore impersonata",
|
||||
"client_id": "ID applicazione (client)",
|
||||
"client_secret": "Valore segreto client",
|
||||
"client_secret_placeholder": "Inserisci il valore segreto, non l'ID segreto",
|
||||
"tenant_id": "ID directory (tenant)",
|
||||
"service_account_key": "Chiave Account di Servizio (JSON)",
|
||||
"service_account_key_placeholder": "Incolla il contenuto JSON della chiave del tuo account di servizio",
|
||||
"impersonated_admin_email": "Email dell'Amministratore Impersonato",
|
||||
"client_id": "ID Applicazione (Client)",
|
||||
"client_secret": "Valore Segreto Client",
|
||||
"client_secret_placeholder": "Inserisci il Valore segreto, non l'ID Segreto",
|
||||
"tenant_id": "ID Directory (Tenant)",
|
||||
"host": "Host",
|
||||
"port": "Porta",
|
||||
"username": "Nome utente",
|
||||
"username": "Nome Utente",
|
||||
"use_tls": "Usa TLS",
|
||||
"allow_insecure_cert": "Consenti certificato non sicuro",
|
||||
"pst_file": "File PST",
|
||||
"eml_file": "File EML",
|
||||
"heads_up": "Attenzione!",
|
||||
"org_wide_warning": "Si prega di notare che questa è un'operazione a livello di organizzazione. Questo tipo di ingestione importerà e indicizzerà <b>tutte</b> le caselle di posta elettronica della tua organizzazione. Se desideri importare solo caselle di posta elettronica specifiche, utilizza il connettore IMAP.",
|
||||
"upload_failed": "Caricamento non riuscito, riprova"
|
||||
"org_wide_warning": "Si prega di notare che questa è un'operazione a livello di organizzazione. Questo tipo di ingestione importerà e indicizzerà <b>tutte</b> le caselle di posta elettronica nella tua organizzazione. Se vuoi importare solo caselle di posta elettronica specifiche, usa il connettore IMAP.",
|
||||
"upload_failed": "Caricamento Fallito, riprova"
|
||||
},
|
||||
"role_form": {
|
||||
"policies_json": "Policy (JSON)",
|
||||
@@ -223,28 +201,61 @@
|
||||
"select_role": "Seleziona un ruolo"
|
||||
}
|
||||
},
|
||||
"dashboard_page": {
|
||||
"title": "Dashboard",
|
||||
"meta_description": "Panoramica del tuo archivio email.",
|
||||
"header": "Dashboard",
|
||||
"create_ingestion": "Crea un'ingestione",
|
||||
"no_ingestion_header": "Non hai alcuna fonte di ingestione configurata.",
|
||||
"no_ingestion_text": "Aggiungi una fonte di ingestione per iniziare ad archiviare le tue caselle di posta.",
|
||||
"total_emails_archived": "Email totali archiviate",
|
||||
"total_storage_used": "Spazio di archiviazione totale utilizzato",
|
||||
"failed_ingestions": "Ingestioni non riuscite (ultimi 7 giorni)",
|
||||
"ingestion_history": "Cronologia ingestioni",
|
||||
"no_ingestion_history": "Nessuna cronologia di ingestione disponibile.",
|
||||
"storage_by_source": "Archiviazione per fonte di ingestione",
|
||||
"no_ingestion_sources": "Nessuna fonte di ingestione disponibile.",
|
||||
"indexed_insights": "Approfondimenti indicizzati",
|
||||
"top_10_senders": "Top 10 mittenti",
|
||||
"no_indexed_insights": "Nessun approfondimento indicizzato disponibile."
|
||||
"setup": {
|
||||
"title": "Configurazione",
|
||||
"description": "Configura l'account amministratore iniziale per Open Archiver.",
|
||||
"welcome": "Benvenuto",
|
||||
"create_admin_account": "Crea il primo account amministratore per iniziare.",
|
||||
"first_name": "Nome",
|
||||
"last_name": "Cognome",
|
||||
"email": "Email",
|
||||
"password": "Password",
|
||||
"creating_account": "Creazione Account",
|
||||
"create_account": "Crea Account"
|
||||
},
|
||||
"layout": {
|
||||
"dashboard": "Dashboard",
|
||||
"ingestions": "Ingestioni",
|
||||
"archived_emails": "Email archiviate",
|
||||
"search": "Ricerca",
|
||||
"settings": "Impostazioni",
|
||||
"system": "Sistema",
|
||||
"users": "Utenti",
|
||||
"roles": "Ruoli",
|
||||
"api_keys": "Chiavi API",
|
||||
"logout": "Esci"
|
||||
},
|
||||
"api_keys_page": {
|
||||
"title": "Chiavi API",
|
||||
"header": "Chiavi API",
|
||||
"generate_new_key": "Genera Nuova Chiave",
|
||||
"name": "Nome",
|
||||
"key": "Chiave",
|
||||
"expires_at": "Scade il",
|
||||
"created_at": "Creato il",
|
||||
"actions": "Azioni",
|
||||
"delete": "Elimina",
|
||||
"no_keys_found": "Nessuna chiave API trovata.",
|
||||
"generate_modal_title": "Genera Nuova Chiave API",
|
||||
"generate_modal_description": "Fornisci un nome e una scadenza per la tua nuova chiave API.",
|
||||
"expires_in": "Scade Tra",
|
||||
"select_expiration": "Seleziona una scadenza",
|
||||
"30_days": "30 Giorni",
|
||||
"60_days": "60 Giorni",
|
||||
"6_months": "6 Mesi",
|
||||
"12_months": "12 Mesi",
|
||||
"24_months": "24 Mesi",
|
||||
"generate": "Genera",
|
||||
"new_api_key": "Nuova Chiave API",
|
||||
"failed_to_delete": "Impossibile eliminare la chiave API",
|
||||
"api_key_deleted": "Chiave API eliminata",
|
||||
"generated_title": "Chiave API Generata",
|
||||
"generated_message": "La tua chiave API è stata generata, per favore copiala e salvala in un luogo sicuro. Questa chiave verrà mostrata solo una volta."
|
||||
},
|
||||
"archived_emails_page": {
|
||||
"title": "Email archiviate",
|
||||
"header": "Email archiviate",
|
||||
"select_ingestion_source": "Seleziona una fonte di ingestione",
|
||||
"header": "Email Archiviate",
|
||||
"select_ingestion_source": "Seleziona una sorgente di ingestione",
|
||||
"date": "Data",
|
||||
"subject": "Oggetto",
|
||||
"sender": "Mittente",
|
||||
@@ -255,6 +266,24 @@
|
||||
"no_emails_found": "Nessuna email archiviata trovata.",
|
||||
"prev": "Prec",
|
||||
"next": "Succ"
|
||||
},
|
||||
"dashboard_page": {
|
||||
"title": "Dashboard",
|
||||
"meta_description": "Panoramica del tuo archivio email.",
|
||||
"header": "Dashboard",
|
||||
"create_ingestion": "Crea un'ingestione",
|
||||
"no_ingestion_header": "Non hai impostato nessuna sorgente di ingestione.",
|
||||
"no_ingestion_text": "Aggiungi una sorgente di ingestione per iniziare ad archiviare le tue caselle di posta.",
|
||||
"total_emails_archived": "Totale Email Archiviate",
|
||||
"total_storage_used": "Spazio di Archiviazione Totale Utilizzato",
|
||||
"failed_ingestions": "Ingestioni Fallite (Ultimi 7 Giorni)",
|
||||
"ingestion_history": "Cronologia Ingestioni",
|
||||
"no_ingestion_history": "Nessuna cronologia delle ingestioni disponibile.",
|
||||
"storage_by_source": "Spazio di Archiviazione per Sorgente di Ingestione",
|
||||
"no_ingestion_sources": "Nessuna sorgente di ingestione disponibile.",
|
||||
"indexed_insights": "Approfondimenti indicizzati",
|
||||
"top_10_senders": "I 10 Mittenti Principali",
|
||||
"no_indexed_insights": "Nessun approfondimento indicizzato disponibile."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,11 @@ import type { LayoutServerLoad } from './$types';
|
||||
import 'dotenv/config';
|
||||
import { api } from '$lib/server/api';
|
||||
import type { SystemSettings } from '@open-archiver/types';
|
||||
import { version } from '../../../../package.json';
|
||||
import semver from 'semver';
|
||||
|
||||
let newVersionInfo: { version: string; description: string; url: string } | null = null;
|
||||
let lastChecked: Date | null = null;
|
||||
|
||||
export const load: LayoutServerLoad = async (event) => {
|
||||
const { locals, url } = event;
|
||||
@@ -32,10 +37,35 @@ export const load: LayoutServerLoad = async (event) => {
|
||||
? await systemSettingsResponse.json()
|
||||
: null;
|
||||
|
||||
const now = new Date();
|
||||
if (!lastChecked || now.getTime() - lastChecked.getTime() > 1000 * 60 * 60) {
|
||||
try {
|
||||
const res = await fetch(
|
||||
'https://api.github.com/repos/LogicLabs-OU/OpenArchiver/releases/latest'
|
||||
);
|
||||
if (res.ok) {
|
||||
const latestRelease = await res.json();
|
||||
const latestVersion = latestRelease.tag_name.replace('v', '');
|
||||
if (semver.gt(latestVersion, version)) {
|
||||
newVersionInfo = {
|
||||
version: latestVersion,
|
||||
description: latestRelease.name,
|
||||
url: latestRelease.html_url,
|
||||
};
|
||||
}
|
||||
}
|
||||
lastChecked = now;
|
||||
} catch (error) {
|
||||
console.error('Failed to fetch latest version from GitHub:', error);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
user: locals.user,
|
||||
accessToken: locals.accessToken,
|
||||
isDemo: process.env.IS_DEMO === 'true',
|
||||
systemSettings,
|
||||
currentVersion: version,
|
||||
newVersionInfo: newVersionInfo,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -35,5 +35,5 @@
|
||||
<main class="flex-1">
|
||||
{@render children()}
|
||||
</main>
|
||||
<Footer />
|
||||
<Footer currentVersion={data.currentVersion} newVersionInfo={data.newVersionInfo} />
|
||||
</div>
|
||||
|
||||
@@ -435,7 +435,12 @@
|
||||
</div>
|
||||
|
||||
<Dialog.Root bind:open={isDialogOpen}>
|
||||
<Dialog.Content class="sm:max-w-120 md:max-w-180">
|
||||
<Dialog.Content
|
||||
class="sm:max-w-120 md:max-w-180"
|
||||
onInteractOutside={(e) => {
|
||||
e.preventDefault();
|
||||
}}
|
||||
>
|
||||
<Dialog.Header>
|
||||
<Dialog.Title
|
||||
>{selectedSource ? $t('app.ingestions.edit') : $t('app.ingestions.create')}{' '}
|
||||
|
||||
@@ -55,6 +55,16 @@ export interface EmailObject {
|
||||
tags?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an email that has been processed and is ready for indexing.
|
||||
* This interface defines the shape of the data that is passed to the batch indexing function.
|
||||
*/
|
||||
export interface PendingEmail {
|
||||
email: EmailObject;
|
||||
sourceId: string;
|
||||
archivedId: string;
|
||||
}
|
||||
|
||||
// Define the structure of the document to be indexed in Meilisearch
|
||||
export interface EmailDocument {
|
||||
id: string; // The unique ID of the email
|
||||
|
||||
@@ -23,7 +23,8 @@ export type IngestionProvider =
|
||||
| 'microsoft_365'
|
||||
| 'generic_imap'
|
||||
| 'pst_import'
|
||||
| 'eml_import';
|
||||
| 'eml_import'
|
||||
| 'mbox_import';
|
||||
|
||||
export type IngestionStatus =
|
||||
| 'active'
|
||||
@@ -81,13 +82,20 @@ export interface EMLImportCredentials extends BaseIngestionCredentials {
|
||||
uploadedFilePath: string;
|
||||
}
|
||||
|
||||
export interface MboxImportCredentials extends BaseIngestionCredentials {
|
||||
type: 'mbox_import';
|
||||
uploadedFileName: string;
|
||||
uploadedFilePath: string;
|
||||
}
|
||||
|
||||
// Discriminated union for all possible credential types
|
||||
export type IngestionCredentials =
|
||||
| GenericImapCredentials
|
||||
| GoogleWorkspaceCredentials
|
||||
| Microsoft365Credentials
|
||||
| PSTImportCredentials
|
||||
| EMLImportCredentials;
|
||||
| EMLImportCredentials
|
||||
| MboxImportCredentials;
|
||||
|
||||
export interface IngestionSource {
|
||||
id: string;
|
||||
|
||||
478
pnpm-lock.yaml
generated
478
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user