mirror of
https://github.com/LogicLabs-OU/OpenArchiver.git
synced 2026-04-06 00:31:57 +02:00
Search page
This commit is contained in:
34
packages/backend/src/api/controllers/search.controller.ts
Normal file
34
packages/backend/src/api/controllers/search.controller.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import { Request, Response } from 'express';
|
||||
import { SearchService } from '../../services/SearchService';
|
||||
import type { SearchQuery } from '@open-archive/types';
|
||||
|
||||
export class SearchController {
|
||||
private searchService: SearchService;
|
||||
|
||||
constructor() {
|
||||
this.searchService = new SearchService();
|
||||
}
|
||||
|
||||
public search = async (req: Request, res: Response): Promise<void> => {
|
||||
try {
|
||||
const { query, filters, page, limit } = req.body as SearchQuery;
|
||||
|
||||
if (!query) {
|
||||
res.status(400).json({ message: 'Query is required' });
|
||||
return;
|
||||
}
|
||||
|
||||
const results = await this.searchService.searchEmails({
|
||||
query,
|
||||
filters,
|
||||
page,
|
||||
limit
|
||||
});
|
||||
|
||||
res.status(200).json(results);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'An unknown error occurred';
|
||||
res.status(500).json({ message });
|
||||
}
|
||||
};
|
||||
}
|
||||
17
packages/backend/src/api/routes/search.routes.ts
Normal file
17
packages/backend/src/api/routes/search.routes.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import { Router } from 'express';
|
||||
import { SearchController } from '../controllers/search.controller';
|
||||
import { requireAuth } from '../middleware/requireAuth';
|
||||
import { IAuthService } from '../../services/AuthService';
|
||||
|
||||
export const createSearchRouter = (
|
||||
searchController: SearchController,
|
||||
authService: IAuthService
|
||||
): Router => {
|
||||
const router = Router();
|
||||
|
||||
router.use(requireAuth(authService));
|
||||
|
||||
router.post('/', searchController.search);
|
||||
|
||||
return router;
|
||||
};
|
||||
@@ -4,15 +4,18 @@ import { AuthController } from './api/controllers/auth.controller';
|
||||
import { IngestionController } from './api/controllers/ingestion.controller';
|
||||
import { ArchivedEmailController } from './api/controllers/archived-email.controller';
|
||||
import { StorageController } from './api/controllers/storage.controller';
|
||||
import { SearchController } from './api/controllers/search.controller';
|
||||
import { requireAuth } from './api/middleware/requireAuth';
|
||||
import { createAuthRouter } from './api/routes/auth.routes';
|
||||
import { createIngestionRouter } from './api/routes/ingestion.routes';
|
||||
import { createArchivedEmailRouter } from './api/routes/archived-email.routes';
|
||||
import { createStorageRouter } from './api/routes/storage.routes';
|
||||
import { createSearchRouter } from './api/routes/search.routes';
|
||||
import testRouter from './api/routes/test.routes';
|
||||
import { AuthService } from './services/AuthService';
|
||||
import { AdminUserService } from './services/UserService';
|
||||
import { StorageService } from './services/StorageService';
|
||||
import { SearchService } from './services/SearchService';
|
||||
|
||||
|
||||
|
||||
@@ -40,6 +43,8 @@ const ingestionController = new IngestionController();
|
||||
const archivedEmailController = new ArchivedEmailController();
|
||||
const storageService = new StorageService();
|
||||
const storageController = new StorageController(storageService);
|
||||
const searchService = new SearchService();
|
||||
const searchController = new SearchController();
|
||||
|
||||
// --- Express App Initialization ---
|
||||
const app = express();
|
||||
@@ -52,10 +57,12 @@ const authRouter = createAuthRouter(authController);
|
||||
const ingestionRouter = createIngestionRouter(ingestionController, authService);
|
||||
const archivedEmailRouter = createArchivedEmailRouter(archivedEmailController, authService);
|
||||
const storageRouter = createStorageRouter(storageController, authService);
|
||||
const searchRouter = createSearchRouter(searchController, authService);
|
||||
app.use('/v1/auth', authRouter);
|
||||
app.use('/v1/ingestion-sources', ingestionRouter);
|
||||
app.use('/v1/archived-emails', archivedEmailRouter);
|
||||
app.use('/v1/storage', storageRouter);
|
||||
app.use('/v1/search', searchRouter);
|
||||
app.use('/v1/test', testRouter);
|
||||
|
||||
// Example of a protected route
|
||||
@@ -73,6 +80,10 @@ app.get('/', (req, res) => {
|
||||
// --- Server Start ---
|
||||
const startServer = async () => {
|
||||
try {
|
||||
// Configure the Meilisearch index on startup
|
||||
console.log('Configuring email index...');
|
||||
await searchService.configureEmailIndex();
|
||||
|
||||
app.listen(PORT_BACKEND, () => {
|
||||
console.log(`Backend listening at http://localhost:${PORT_BACKEND}`);
|
||||
});
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Attachment } from '@open-archive/types';
|
||||
import { Attachment, EmailDocument } from '@open-archive/types';
|
||||
import { SearchService } from './SearchService';
|
||||
import { StorageService } from './StorageService';
|
||||
import { extractText } from '../helpers/textExtractor';
|
||||
@@ -6,23 +6,7 @@ import DatabaseService from './DatabaseService';
|
||||
import { archivedEmails, attachments, emailAttachments } from '../database/schema';
|
||||
import { eq } from 'drizzle-orm';
|
||||
import { streamToBuffer } from '../helpers/streamToBuffer';
|
||||
|
||||
// Define the structure of the document to be indexed in Meilisearch
|
||||
interface EmailDocument {
|
||||
id: string; // The unique ID of the email
|
||||
from: string;
|
||||
to: string[];
|
||||
cc: string[];
|
||||
bcc: string[];
|
||||
subject: string;
|
||||
body: string;
|
||||
attachments: {
|
||||
filename: string;
|
||||
content: string; // Extracted text from the attachment
|
||||
}[];
|
||||
timestamp: number;
|
||||
// other metadata
|
||||
}
|
||||
import { simpleParser } from 'mailparser';
|
||||
|
||||
interface DbRecipients {
|
||||
to: { name: string; address: string; }[];
|
||||
@@ -82,7 +66,12 @@ export class IndexingService {
|
||||
|
||||
const emailBodyStream = await this.storageService.get(email.storagePath);
|
||||
const emailBodyBuffer = await streamToBuffer(emailBodyStream);
|
||||
const emailBodyText = await extractText(emailBodyBuffer, 'text/plain');
|
||||
const parsedEmail = await simpleParser(emailBodyBuffer);
|
||||
const emailBodyText =
|
||||
parsedEmail.text ||
|
||||
parsedEmail.html ||
|
||||
(await extractText(emailBodyBuffer, 'text/plain')) ||
|
||||
'';
|
||||
|
||||
const recipients = email.recipients as DbRecipients;
|
||||
|
||||
@@ -96,6 +85,7 @@ export class IndexingService {
|
||||
body: emailBodyText,
|
||||
attachments: attachmentContents,
|
||||
timestamp: new Date(email.sentAt).getTime(),
|
||||
ingestionSourceId: email.ingestionSourceId
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { Index, MeiliSearch } from 'meilisearch';
|
||||
import { Index, MeiliSearch, SearchParams } from 'meilisearch';
|
||||
import { config } from '../config';
|
||||
import type { SearchQuery, SearchResult, EmailDocument } from '@open-archive/types';
|
||||
|
||||
export class SearchService {
|
||||
private client: MeiliSearch;
|
||||
@@ -32,7 +33,38 @@ export class SearchService {
|
||||
return index.search(query, options);
|
||||
}
|
||||
|
||||
// Add other methods as needed (e.g., deleteDocuments, updateSettings)
|
||||
public async searchEmails(dto: SearchQuery): Promise<SearchResult> {
|
||||
const { query, filters, page = 1, limit = 10 } = dto;
|
||||
const index = await this.getIndex<EmailDocument>('emails');
|
||||
|
||||
const searchParams: SearchParams = {
|
||||
limit,
|
||||
offset: (page - 1) * limit,
|
||||
attributesToHighlight: ['body', 'attachments.*.content'],
|
||||
showMatchesPosition: true,
|
||||
sort: ['timestamp:desc']
|
||||
};
|
||||
|
||||
if (filters) {
|
||||
const filterStrings = Object.entries(filters).map(([key, value]) => {
|
||||
if (typeof value === 'string') {
|
||||
return `${key} = '${value}'`;
|
||||
}
|
||||
return `${key} = ${value}`;
|
||||
});
|
||||
searchParams.filter = filterStrings.join(' AND ');
|
||||
}
|
||||
|
||||
const searchResults = await index.search(query, searchParams);
|
||||
|
||||
return {
|
||||
hits: searchResults.hits,
|
||||
total: searchResults.estimatedTotalHits ?? searchResults.hits.length,
|
||||
page,
|
||||
limit,
|
||||
totalPages: Math.ceil((searchResults.estimatedTotalHits ?? searchResults.hits.length) / limit)
|
||||
};
|
||||
}
|
||||
|
||||
public async configureEmailIndex() {
|
||||
const index = await this.getIndex('emails');
|
||||
@@ -47,8 +79,8 @@ export class SearchService {
|
||||
'attachments.filename',
|
||||
'attachments.content',
|
||||
],
|
||||
filterableAttributes: ['from', 'to', 'cc', 'bcc', 'timestamp'],
|
||||
sortableAttributes: ['timestamp'],
|
||||
filterableAttributes: ['from', 'to', 'cc', 'bcc', 'timestamp', 'ingestionSourceId'],
|
||||
sortableAttributes: ['timestamp']
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,13 @@ const processor = async (job: any) => {
|
||||
|
||||
const worker = new Worker('indexing', processor, {
|
||||
connection,
|
||||
concurrency: 5
|
||||
concurrency: 5,
|
||||
removeOnComplete: {
|
||||
count: 1000, // keep last 1000 jobs
|
||||
},
|
||||
removeOnFail: {
|
||||
count: 5000, // keep last 5000 failed jobs
|
||||
},
|
||||
});
|
||||
|
||||
console.log('Indexing worker started');
|
||||
|
||||
@@ -14,7 +14,15 @@ const processor = async (job: any) => {
|
||||
}
|
||||
};
|
||||
|
||||
const worker = new Worker('ingestion', processor, { connection });
|
||||
const worker = new Worker('ingestion', processor, {
|
||||
connection,
|
||||
removeOnComplete: {
|
||||
count: 100, // keep last 100 jobs
|
||||
},
|
||||
removeOnFail: {
|
||||
count: 500, // keep last 500 failed jobs
|
||||
},
|
||||
});
|
||||
|
||||
console.log('Ingestion worker started');
|
||||
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
import PostalMime, { type Email } from 'postal-mime';
|
||||
import type { Buffer } from 'buffer';
|
||||
|
||||
let { raw }: { raw: Buffer | { type: 'Buffer'; data: number[] } | undefined } = $props();
|
||||
let {
|
||||
raw,
|
||||
rawHtml
|
||||
}: { raw?: Buffer | { type: 'Buffer'; data: number[] } | undefined; rawHtml?: string } = $props();
|
||||
|
||||
let parsedEmail: Email | null = $state(null);
|
||||
let isLoading = $state(true);
|
||||
@@ -10,8 +13,10 @@
|
||||
// By adding a <base> tag, all relative and absolute links in the HTML document
|
||||
// will open in a new tab by default.
|
||||
let emailHtml = $derived(() => {
|
||||
if (parsedEmail?.html) {
|
||||
if (parsedEmail && parsedEmail?.html) {
|
||||
return `<base target="_blank" />${parsedEmail.html}`;
|
||||
} else if (rawHtml) {
|
||||
return `<base target="_blank" />${rawHtml}`;
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
@@ -8,7 +8,8 @@
|
||||
const navItems = [
|
||||
{ href: '/dashboard', label: 'Dashboard' },
|
||||
{ href: '/dashboard/ingestions', label: 'Ingestions' },
|
||||
{ href: '/dashboard/archived-emails', label: 'Archived emails' }
|
||||
{ href: '/dashboard/archived-emails', label: 'Archived emails' },
|
||||
{ href: '/dashboard/search', label: 'Search' }
|
||||
];
|
||||
let { children } = $props();
|
||||
function handleLogout() {
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
import type { PageServerLoad, Actions } from './$types';
|
||||
import { api } from '$lib/server/api';
|
||||
import type { SearchResult } from '@open-archive/types';
|
||||
|
||||
export const load: PageServerLoad = async () => {
|
||||
return {
|
||||
searchResult: null,
|
||||
query: ''
|
||||
};
|
||||
};
|
||||
|
||||
export const actions: Actions = {
|
||||
default: async (event) => {
|
||||
const formData = await event.request.formData();
|
||||
const query = formData.get('query') as string;
|
||||
|
||||
if (!query) {
|
||||
return { searchResult: null, query: '' };
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await api(
|
||||
'/search',
|
||||
event,
|
||||
{
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ query })
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
return { searchResult: null, query, error: error.message };
|
||||
}
|
||||
|
||||
const searchResult = await response.json() as SearchResult;
|
||||
return { searchResult, query };
|
||||
} catch (error) {
|
||||
return { searchResult: null, query, error: error instanceof Error ? error.message : 'Unknown error' };
|
||||
}
|
||||
}
|
||||
};
|
||||
185
packages/frontend/src/routes/dashboard/search/+page.svelte
Normal file
185
packages/frontend/src/routes/dashboard/search/+page.svelte
Normal file
@@ -0,0 +1,185 @@
|
||||
<script lang="ts">
|
||||
import type { ActionData, PageData } from './$types';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { Input } from '$lib/components/ui/input';
|
||||
import {
|
||||
Card,
|
||||
CardContent,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
CardDescription
|
||||
} from '$lib/components/ui/card';
|
||||
|
||||
let { data, form }: { data: PageData; form: ActionData } = $props();
|
||||
const searchResult = form?.searchResult ?? data.searchResult;
|
||||
const query = form?.query ?? data.query;
|
||||
const error = form?.error;
|
||||
|
||||
function escapeHTML(text: string) {
|
||||
if (!text) return '';
|
||||
return text
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''')
|
||||
.replace('<html', '')
|
||||
.replace('</html>', '');
|
||||
}
|
||||
|
||||
function getHighlightedHTML(
|
||||
text: string,
|
||||
positions: { start: number; length: number }[]
|
||||
): string {
|
||||
if (!text || !positions) {
|
||||
return text;
|
||||
}
|
||||
|
||||
// sort positions by start index
|
||||
positions.sort((a, b) => a.start - b.start);
|
||||
|
||||
let highlighted = '';
|
||||
let lastIndex = 0;
|
||||
positions.forEach(({ start, length }) => {
|
||||
highlighted += escapeHTML(text.substring(lastIndex, start));
|
||||
highlighted += `<mark class="bg-yellow-300 dark:bg-yellow-600">${escapeHTML(
|
||||
text.substring(start, start + length)
|
||||
)}</mark>`;
|
||||
lastIndex = start + length;
|
||||
});
|
||||
highlighted += escapeHTML(text.substring(lastIndex));
|
||||
return highlighted;
|
||||
}
|
||||
|
||||
function getSnippets(
|
||||
text: string,
|
||||
positions: { start: number; length: number }[],
|
||||
contextLength = 15
|
||||
) {
|
||||
if (!text || !positions) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// sort positions by start index
|
||||
positions.sort((a, b) => a.start - b.start);
|
||||
|
||||
const snippets: string[] = [];
|
||||
let lastEnd = -1;
|
||||
|
||||
for (const { start, length } of positions) {
|
||||
if (start < lastEnd) {
|
||||
// Skip overlapping matches to avoid duplicate snippets
|
||||
continue;
|
||||
}
|
||||
|
||||
const snippetStart = Math.max(0, start - contextLength);
|
||||
const snippetEnd = Math.min(text.length, start + length + contextLength);
|
||||
lastEnd = snippetEnd;
|
||||
|
||||
let snippet = text.substring(snippetStart, snippetEnd);
|
||||
|
||||
// Adjust positions to be relative to the snippet
|
||||
const relativeStart = start - snippetStart;
|
||||
const relativePositions = [{ start: relativeStart, length }];
|
||||
|
||||
let highlightedSnippet = getHighlightedHTML(snippet, relativePositions);
|
||||
|
||||
if (snippetStart > 0) {
|
||||
highlightedSnippet = '...' + highlightedSnippet;
|
||||
}
|
||||
if (snippetEnd < text.length) {
|
||||
highlightedSnippet += '...';
|
||||
}
|
||||
|
||||
snippets.push(highlightedSnippet);
|
||||
}
|
||||
|
||||
return snippets;
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Search | OpenArchive</title>
|
||||
<meta name="description" content="Search for archived emails." />
|
||||
</svelte:head>
|
||||
|
||||
<div class="container mx-auto p-4 md:p-8">
|
||||
<h1 class="mb-4 text-2xl font-bold">Email Search</h1>
|
||||
|
||||
<form method="POST" class="mb-8 flex items-center gap-2">
|
||||
<Input
|
||||
type="search"
|
||||
name="query"
|
||||
placeholder="Search by keyword, sender, recipient..."
|
||||
class="flex-grow"
|
||||
value={query}
|
||||
/>
|
||||
<Button type="submit">Search</Button>
|
||||
</form>
|
||||
|
||||
{#if error}
|
||||
<p class="text-red-500">{error}</p>
|
||||
{/if}
|
||||
|
||||
{#if searchResult}
|
||||
<p class="text-muted-foreground mb-4">
|
||||
Found {searchResult.total} results in {searchResult.hits.length / 1000}s
|
||||
</p>
|
||||
|
||||
<div class="grid gap-4">
|
||||
{#each searchResult.hits as hit}
|
||||
{@const _matchesPosition = hit._matchesPosition || {}}
|
||||
<a href="/dashboard/archived-emails/{hit.id}" class="block">
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>
|
||||
{@html getHighlightedHTML(hit.subject, _matchesPosition.subject)}
|
||||
</CardTitle>
|
||||
<CardDescription>
|
||||
From: {@html getHighlightedHTML(hit.from, _matchesPosition.from)} | To:
|
||||
{@html getHighlightedHTML(hit.to.join(', '), _matchesPosition.to)}
|
||||
|
|
||||
{new Date(hit.timestamp).toLocaleString()}
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent class="space-y-2">
|
||||
<!-- Body matches -->
|
||||
{#if _matchesPosition.body}
|
||||
{#each getSnippets(hit.body, _matchesPosition.body) as snippet}
|
||||
<div class="space-y-2 rounded-md bg-slate-100 p-2 dark:bg-slate-800">
|
||||
<p class="text-sm text-gray-500">In email body:</p>
|
||||
<p class="font-mono text-sm">
|
||||
{@html snippet}
|
||||
</p>
|
||||
</div>
|
||||
{/each}
|
||||
{/if}
|
||||
|
||||
<!-- Attachment matches -->
|
||||
{#if _matchesPosition['attachments.content']}
|
||||
{#each _matchesPosition['attachments.content'] as match}
|
||||
{#if match.indices}
|
||||
{@const attachmentIndex = match.indices[0]}
|
||||
{@const attachment = hit.attachments[attachmentIndex]}
|
||||
{#if attachment}
|
||||
{#each getSnippets(attachment.content, [match]) as snippet}
|
||||
<div class="space-y-2 rounded-md bg-slate-100 p-2 dark:bg-slate-800">
|
||||
<p class="text-sm text-gray-500">
|
||||
In attachment: {attachment.filename}
|
||||
</p>
|
||||
<p class="font-mono text-sm">
|
||||
{@html snippet}
|
||||
</p>
|
||||
</div>
|
||||
{/each}
|
||||
{/if}
|
||||
{/if}
|
||||
{/each}
|
||||
{/if}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</a>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
@@ -46,3 +46,21 @@ export interface EmailObject {
|
||||
/** An optional buffer containing the full raw EML content of the email, which is useful for archival and compliance purposes. */
|
||||
eml?: Buffer;
|
||||
}
|
||||
|
||||
// Define the structure of the document to be indexed in Meilisearch
|
||||
export interface EmailDocument {
|
||||
id: string; // The unique ID of the email
|
||||
from: string;
|
||||
to: string[];
|
||||
cc: string[];
|
||||
bcc: string[];
|
||||
subject: string;
|
||||
body: string;
|
||||
attachments: {
|
||||
filename: string;
|
||||
content: string; // Extracted text from the attachment
|
||||
}[];
|
||||
timestamp: number;
|
||||
ingestionSourceId: string;
|
||||
// other metadata
|
||||
}
|
||||
|
||||
@@ -4,3 +4,4 @@ export * from './ingestion.types';
|
||||
export * from './storage.types';
|
||||
export * from './email.types';
|
||||
export * from './archived-emails.types';
|
||||
export * from './search.types';
|
||||
|
||||
23
packages/types/src/search.types.ts
Normal file
23
packages/types/src/search.types.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import type { EmailDocument } from './email.types';
|
||||
|
||||
export interface SearchQuery {
|
||||
query: string;
|
||||
filters?: Record<string, any>;
|
||||
page?: number;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export interface SearchHit extends EmailDocument {
|
||||
_matchesPosition?: {
|
||||
[key: string]: { start: number; length: number; indices?: number[]; }[];
|
||||
};
|
||||
_formatted?: Partial<EmailDocument>;
|
||||
}
|
||||
|
||||
export interface SearchResult {
|
||||
hits: SearchHit[];
|
||||
total: number;
|
||||
page: number;
|
||||
limit: number;
|
||||
totalPages: number;
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user