Search page

This commit is contained in:
Wayne
2025-07-14 23:52:03 +03:00
parent 5217954b65
commit 3cb4287dfa
15 changed files with 402 additions and 29 deletions

View File

@@ -0,0 +1,34 @@
import { Request, Response } from 'express';
import { SearchService } from '../../services/SearchService';
import type { SearchQuery } from '@open-archive/types';
export class SearchController {
private searchService: SearchService;
constructor() {
this.searchService = new SearchService();
}
public search = async (req: Request, res: Response): Promise<void> => {
try {
const { query, filters, page, limit } = req.body as SearchQuery;
if (!query) {
res.status(400).json({ message: 'Query is required' });
return;
}
const results = await this.searchService.searchEmails({
query,
filters,
page,
limit
});
res.status(200).json(results);
} catch (error) {
const message = error instanceof Error ? error.message : 'An unknown error occurred';
res.status(500).json({ message });
}
};
}

View File

@@ -0,0 +1,17 @@
import { Router } from 'express';
import { SearchController } from '../controllers/search.controller';
import { requireAuth } from '../middleware/requireAuth';
import { IAuthService } from '../../services/AuthService';
export const createSearchRouter = (
searchController: SearchController,
authService: IAuthService
): Router => {
const router = Router();
router.use(requireAuth(authService));
router.post('/', searchController.search);
return router;
};

View File

@@ -4,15 +4,18 @@ import { AuthController } from './api/controllers/auth.controller';
import { IngestionController } from './api/controllers/ingestion.controller';
import { ArchivedEmailController } from './api/controllers/archived-email.controller';
import { StorageController } from './api/controllers/storage.controller';
import { SearchController } from './api/controllers/search.controller';
import { requireAuth } from './api/middleware/requireAuth';
import { createAuthRouter } from './api/routes/auth.routes';
import { createIngestionRouter } from './api/routes/ingestion.routes';
import { createArchivedEmailRouter } from './api/routes/archived-email.routes';
import { createStorageRouter } from './api/routes/storage.routes';
import { createSearchRouter } from './api/routes/search.routes';
import testRouter from './api/routes/test.routes';
import { AuthService } from './services/AuthService';
import { AdminUserService } from './services/UserService';
import { StorageService } from './services/StorageService';
import { SearchService } from './services/SearchService';
@@ -40,6 +43,8 @@ const ingestionController = new IngestionController();
const archivedEmailController = new ArchivedEmailController();
const storageService = new StorageService();
const storageController = new StorageController(storageService);
const searchService = new SearchService();
const searchController = new SearchController();
// --- Express App Initialization ---
const app = express();
@@ -52,10 +57,12 @@ const authRouter = createAuthRouter(authController);
const ingestionRouter = createIngestionRouter(ingestionController, authService);
const archivedEmailRouter = createArchivedEmailRouter(archivedEmailController, authService);
const storageRouter = createStorageRouter(storageController, authService);
const searchRouter = createSearchRouter(searchController, authService);
app.use('/v1/auth', authRouter);
app.use('/v1/ingestion-sources', ingestionRouter);
app.use('/v1/archived-emails', archivedEmailRouter);
app.use('/v1/storage', storageRouter);
app.use('/v1/search', searchRouter);
app.use('/v1/test', testRouter);
// Example of a protected route
@@ -73,6 +80,10 @@ app.get('/', (req, res) => {
// --- Server Start ---
const startServer = async () => {
try {
// Configure the Meilisearch index on startup
console.log('Configuring email index...');
await searchService.configureEmailIndex();
app.listen(PORT_BACKEND, () => {
console.log(`Backend listening at http://localhost:${PORT_BACKEND}`);
});

View File

@@ -1,4 +1,4 @@
import { Attachment } from '@open-archive/types';
import { Attachment, EmailDocument } from '@open-archive/types';
import { SearchService } from './SearchService';
import { StorageService } from './StorageService';
import { extractText } from '../helpers/textExtractor';
@@ -6,23 +6,7 @@ import DatabaseService from './DatabaseService';
import { archivedEmails, attachments, emailAttachments } from '../database/schema';
import { eq } from 'drizzle-orm';
import { streamToBuffer } from '../helpers/streamToBuffer';
// Define the structure of the document to be indexed in Meilisearch
interface EmailDocument {
id: string; // The unique ID of the email
from: string;
to: string[];
cc: string[];
bcc: string[];
subject: string;
body: string;
attachments: {
filename: string;
content: string; // Extracted text from the attachment
}[];
timestamp: number;
// other metadata
}
import { simpleParser } from 'mailparser';
interface DbRecipients {
to: { name: string; address: string; }[];
@@ -82,7 +66,12 @@ export class IndexingService {
const emailBodyStream = await this.storageService.get(email.storagePath);
const emailBodyBuffer = await streamToBuffer(emailBodyStream);
const emailBodyText = await extractText(emailBodyBuffer, 'text/plain');
const parsedEmail = await simpleParser(emailBodyBuffer);
const emailBodyText =
parsedEmail.text ||
parsedEmail.html ||
(await extractText(emailBodyBuffer, 'text/plain')) ||
'';
const recipients = email.recipients as DbRecipients;
@@ -96,6 +85,7 @@ export class IndexingService {
body: emailBodyText,
attachments: attachmentContents,
timestamp: new Date(email.sentAt).getTime(),
ingestionSourceId: email.ingestionSourceId
};
}

View File

@@ -1,5 +1,6 @@
import { Index, MeiliSearch } from 'meilisearch';
import { Index, MeiliSearch, SearchParams } from 'meilisearch';
import { config } from '../config';
import type { SearchQuery, SearchResult, EmailDocument } from '@open-archive/types';
export class SearchService {
private client: MeiliSearch;
@@ -32,7 +33,38 @@ export class SearchService {
return index.search(query, options);
}
// Add other methods as needed (e.g., deleteDocuments, updateSettings)
public async searchEmails(dto: SearchQuery): Promise<SearchResult> {
const { query, filters, page = 1, limit = 10 } = dto;
const index = await this.getIndex<EmailDocument>('emails');
const searchParams: SearchParams = {
limit,
offset: (page - 1) * limit,
attributesToHighlight: ['body', 'attachments.*.content'],
showMatchesPosition: true,
sort: ['timestamp:desc']
};
if (filters) {
const filterStrings = Object.entries(filters).map(([key, value]) => {
if (typeof value === 'string') {
return `${key} = '${value}'`;
}
return `${key} = ${value}`;
});
searchParams.filter = filterStrings.join(' AND ');
}
const searchResults = await index.search(query, searchParams);
return {
hits: searchResults.hits,
total: searchResults.estimatedTotalHits ?? searchResults.hits.length,
page,
limit,
totalPages: Math.ceil((searchResults.estimatedTotalHits ?? searchResults.hits.length) / limit)
};
}
public async configureEmailIndex() {
const index = await this.getIndex('emails');
@@ -47,8 +79,8 @@ export class SearchService {
'attachments.filename',
'attachments.content',
],
filterableAttributes: ['from', 'to', 'cc', 'bcc', 'timestamp'],
sortableAttributes: ['timestamp'],
filterableAttributes: ['from', 'to', 'cc', 'bcc', 'timestamp', 'ingestionSourceId'],
sortableAttributes: ['timestamp']
});
}
}

View File

@@ -13,7 +13,13 @@ const processor = async (job: any) => {
const worker = new Worker('indexing', processor, {
connection,
concurrency: 5
concurrency: 5,
removeOnComplete: {
count: 1000, // keep last 1000 jobs
},
removeOnFail: {
count: 5000, // keep last 5000 failed jobs
},
});
console.log('Indexing worker started');

View File

@@ -14,7 +14,15 @@ const processor = async (job: any) => {
}
};
const worker = new Worker('ingestion', processor, { connection });
const worker = new Worker('ingestion', processor, {
connection,
removeOnComplete: {
count: 100, // keep last 100 jobs
},
removeOnFail: {
count: 500, // keep last 500 failed jobs
},
});
console.log('Ingestion worker started');

View File

@@ -2,7 +2,10 @@
import PostalMime, { type Email } from 'postal-mime';
import type { Buffer } from 'buffer';
let { raw }: { raw: Buffer | { type: 'Buffer'; data: number[] } | undefined } = $props();
let {
raw,
rawHtml
}: { raw?: Buffer | { type: 'Buffer'; data: number[] } | undefined; rawHtml?: string } = $props();
let parsedEmail: Email | null = $state(null);
let isLoading = $state(true);
@@ -10,8 +13,10 @@
// By adding a <base> tag, all relative and absolute links in the HTML document
// will open in a new tab by default.
let emailHtml = $derived(() => {
if (parsedEmail?.html) {
if (parsedEmail && parsedEmail?.html) {
return `<base target="_blank" />${parsedEmail.html}`;
} else if (rawHtml) {
return `<base target="_blank" />${rawHtml}`;
}
return null;
});

View File

@@ -8,7 +8,8 @@
const navItems = [
{ href: '/dashboard', label: 'Dashboard' },
{ href: '/dashboard/ingestions', label: 'Ingestions' },
{ href: '/dashboard/archived-emails', label: 'Archived emails' }
{ href: '/dashboard/archived-emails', label: 'Archived emails' },
{ href: '/dashboard/search', label: 'Search' }
];
let { children } = $props();
function handleLogout() {

View File

@@ -0,0 +1,42 @@
import type { PageServerLoad, Actions } from './$types';
import { api } from '$lib/server/api';
import type { SearchResult } from '@open-archive/types';
export const load: PageServerLoad = async () => {
return {
searchResult: null,
query: ''
};
};
export const actions: Actions = {
default: async (event) => {
const formData = await event.request.formData();
const query = formData.get('query') as string;
if (!query) {
return { searchResult: null, query: '' };
}
try {
const response = await api(
'/search',
event,
{
method: 'POST',
body: JSON.stringify({ query })
}
);
if (!response.ok) {
const error = await response.json();
return { searchResult: null, query, error: error.message };
}
const searchResult = await response.json() as SearchResult;
return { searchResult, query };
} catch (error) {
return { searchResult: null, query, error: error instanceof Error ? error.message : 'Unknown error' };
}
}
};

View File

@@ -0,0 +1,185 @@
<script lang="ts">
import type { ActionData, PageData } from './$types';
import { Button } from '$lib/components/ui/button';
import { Input } from '$lib/components/ui/input';
import {
Card,
CardContent,
CardHeader,
CardTitle,
CardDescription
} from '$lib/components/ui/card';
let { data, form }: { data: PageData; form: ActionData } = $props();
const searchResult = form?.searchResult ?? data.searchResult;
const query = form?.query ?? data.query;
const error = form?.error;
function escapeHTML(text: string) {
if (!text) return '';
return text
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, '&#039;')
.replace('<html', '')
.replace('</html>', '');
}
function getHighlightedHTML(
text: string,
positions: { start: number; length: number }[]
): string {
if (!text || !positions) {
return text;
}
// sort positions by start index
positions.sort((a, b) => a.start - b.start);
let highlighted = '';
let lastIndex = 0;
positions.forEach(({ start, length }) => {
highlighted += escapeHTML(text.substring(lastIndex, start));
highlighted += `<mark class="bg-yellow-300 dark:bg-yellow-600">${escapeHTML(
text.substring(start, start + length)
)}</mark>`;
lastIndex = start + length;
});
highlighted += escapeHTML(text.substring(lastIndex));
return highlighted;
}
function getSnippets(
text: string,
positions: { start: number; length: number }[],
contextLength = 15
) {
if (!text || !positions) {
return [];
}
// sort positions by start index
positions.sort((a, b) => a.start - b.start);
const snippets: string[] = [];
let lastEnd = -1;
for (const { start, length } of positions) {
if (start < lastEnd) {
// Skip overlapping matches to avoid duplicate snippets
continue;
}
const snippetStart = Math.max(0, start - contextLength);
const snippetEnd = Math.min(text.length, start + length + contextLength);
lastEnd = snippetEnd;
let snippet = text.substring(snippetStart, snippetEnd);
// Adjust positions to be relative to the snippet
const relativeStart = start - snippetStart;
const relativePositions = [{ start: relativeStart, length }];
let highlightedSnippet = getHighlightedHTML(snippet, relativePositions);
if (snippetStart > 0) {
highlightedSnippet = '...' + highlightedSnippet;
}
if (snippetEnd < text.length) {
highlightedSnippet += '...';
}
snippets.push(highlightedSnippet);
}
return snippets;
}
</script>
<svelte:head>
<title>Search | OpenArchive</title>
<meta name="description" content="Search for archived emails." />
</svelte:head>
<div class="container mx-auto p-4 md:p-8">
<h1 class="mb-4 text-2xl font-bold">Email Search</h1>
<form method="POST" class="mb-8 flex items-center gap-2">
<Input
type="search"
name="query"
placeholder="Search by keyword, sender, recipient..."
class="flex-grow"
value={query}
/>
<Button type="submit">Search</Button>
</form>
{#if error}
<p class="text-red-500">{error}</p>
{/if}
{#if searchResult}
<p class="text-muted-foreground mb-4">
Found {searchResult.total} results in {searchResult.hits.length / 1000}s
</p>
<div class="grid gap-4">
{#each searchResult.hits as hit}
{@const _matchesPosition = hit._matchesPosition || {}}
<a href="/dashboard/archived-emails/{hit.id}" class="block">
<Card>
<CardHeader>
<CardTitle>
{@html getHighlightedHTML(hit.subject, _matchesPosition.subject)}
</CardTitle>
<CardDescription>
From: {@html getHighlightedHTML(hit.from, _matchesPosition.from)} | To:
{@html getHighlightedHTML(hit.to.join(', '), _matchesPosition.to)}
|
{new Date(hit.timestamp).toLocaleString()}
</CardDescription>
</CardHeader>
<CardContent class="space-y-2">
<!-- Body matches -->
{#if _matchesPosition.body}
{#each getSnippets(hit.body, _matchesPosition.body) as snippet}
<div class="space-y-2 rounded-md bg-slate-100 p-2 dark:bg-slate-800">
<p class="text-sm text-gray-500">In email body:</p>
<p class="font-mono text-sm">
{@html snippet}
</p>
</div>
{/each}
{/if}
<!-- Attachment matches -->
{#if _matchesPosition['attachments.content']}
{#each _matchesPosition['attachments.content'] as match}
{#if match.indices}
{@const attachmentIndex = match.indices[0]}
{@const attachment = hit.attachments[attachmentIndex]}
{#if attachment}
{#each getSnippets(attachment.content, [match]) as snippet}
<div class="space-y-2 rounded-md bg-slate-100 p-2 dark:bg-slate-800">
<p class="text-sm text-gray-500">
In attachment: {attachment.filename}
</p>
<p class="font-mono text-sm">
{@html snippet}
</p>
</div>
{/each}
{/if}
{/if}
{/each}
{/if}
</CardContent>
</Card>
</a>
{/each}
</div>
{/if}
</div>

View File

@@ -46,3 +46,21 @@ export interface EmailObject {
/** An optional buffer containing the full raw EML content of the email, which is useful for archival and compliance purposes. */
eml?: Buffer;
}
// Define the structure of the document to be indexed in Meilisearch
export interface EmailDocument {
id: string; // The unique ID of the email
from: string;
to: string[];
cc: string[];
bcc: string[];
subject: string;
body: string;
attachments: {
filename: string;
content: string; // Extracted text from the attachment
}[];
timestamp: number;
ingestionSourceId: string;
// other metadata
}

View File

@@ -4,3 +4,4 @@ export * from './ingestion.types';
export * from './storage.types';
export * from './email.types';
export * from './archived-emails.types';
export * from './search.types';

View File

@@ -0,0 +1,23 @@
import type { EmailDocument } from './email.types';
export interface SearchQuery {
query: string;
filters?: Record<string, any>;
page?: number;
limit?: number;
}
export interface SearchHit extends EmailDocument {
_matchesPosition?: {
[key: string]: { start: number; length: number; indices?: number[]; }[];
};
_formatted?: Partial<EmailDocument>;
}
export interface SearchResult {
hits: SearchHit[];
total: number;
page: number;
limit: number;
totalPages: number;
}

File diff suppressed because one or more lines are too long