Preserve original email path and tags. Emails with path now will be stored within their path

This commit is contained in:
Wayne
2025-08-09 16:40:25 +03:00
parent 29db34c5d8
commit 512f0312ba
16 changed files with 1276 additions and 39 deletions

View File

@@ -0,0 +1,2 @@
ALTER TABLE "archived_emails" ADD COLUMN "path" text;--> statement-breakpoint
ALTER TABLE "archived_emails" ADD COLUMN "tags" jsonb;

File diff suppressed because it is too large Load Diff

View File

@@ -92,6 +92,13 @@
"when": 1754476962901,
"tag": "0012_warm_the_stranger",
"breakpoints": true
},
{
"idx": 13,
"version": "7",
"when": 1754659373517,
"tag": "0013_classy_talkback",
"breakpoints": true
}
]
}

View File

@@ -24,6 +24,8 @@ export const archivedEmails = pgTable(
hasAttachments: boolean('has_attachments').notNull().default(false),
isOnLegalHold: boolean('is_on_legal_hold').notNull().default(false),
archivedAt: timestamp('archived_at', { withTimezone: true }).notNull().defaultNow(),
path: text('path'),
tags: jsonb('tags'),
},
(table) => [index('thread_id_idx').on(table.threadId)]
);

View File

@@ -59,7 +59,9 @@ export class ArchivedEmailService {
return {
items: items.map((item) => ({
...item,
recipients: this.mapRecipients(item.recipients)
recipients: this.mapRecipients(item.recipients),
tags: (item.tags as string[] | null) || null,
path: item.path || null
})),
total: total.count,
page,
@@ -103,7 +105,9 @@ export class ArchivedEmailService {
...email,
recipients: this.mapRecipients(email.recipients),
raw,
thread: threadEmails
thread: threadEmails,
tags: (email.tags as string[] | null) || null,
path: email.path || null
};
if (email.hasAttachments) {

View File

@@ -293,7 +293,7 @@ export class IngestionService {
console.log('processing email, ', email.id, email.subject);
const emlBuffer = email.eml ?? Buffer.from(email.body, 'utf-8');
const emailHash = createHash('sha256').update(emlBuffer).digest('hex');
const emailPath = `${config.storage.openArchiverFolderName}/${source.name.replaceAll(' ', '-')}-${source.id}/emails/${email.id}.eml`;
const emailPath = `${config.storage.openArchiverFolderName}/${source.name.replaceAll(' ', '-')}-${source.id}/emails/${email.path ? `${email.path}/` : ''}${email.id}.eml`;
await storage.put(emailPath, emlBuffer);
const [archivedEmail] = await db
@@ -315,7 +315,9 @@ export class IngestionService {
storagePath: emailPath,
storageHashSha256: emailHash,
sizeBytes: emlBuffer.length,
hasAttachments: email.attachments.length > 0
hasAttachments: email.attachments.length > 0,
path: email.path,
tags: email.tags
})
.returning();

View File

@@ -168,9 +168,18 @@ export class GoogleWorkspaceConnector implements IEmailConnector {
for (const messageAdded of historyRecord.messagesAdded) {
if (messageAdded.message?.id) {
try {
const messageId = messageAdded.message.id;
const metadataResponse = await gmail.users.messages.get({
userId: userEmail,
id: messageId,
format: 'METADATA',
fields: 'labelIds'
});
const labels = await this.getLabelDetails(gmail, userEmail, metadataResponse.data.labelIds || []);
const msgResponse = await gmail.users.messages.get({
userId: userEmail,
id: messageAdded.message.id,
id: messageId,
format: 'RAW'
});
@@ -205,6 +214,8 @@ export class GoogleWorkspaceConnector implements IEmailConnector {
headers: parsedEmail.headers,
attachments,
receivedAt: parsedEmail.date || new Date(),
path: labels.path,
tags: labels.tags
};
}
} catch (error: any) {
@@ -243,9 +254,18 @@ export class GoogleWorkspaceConnector implements IEmailConnector {
for (const message of messages) {
if (message.id) {
try {
const messageId = message.id;
const metadataResponse = await gmail.users.messages.get({
userId: userEmail,
id: messageId,
format: 'METADATA',
fields: 'labelIds'
});
const labels = await this.getLabelDetails(gmail, userEmail, metadataResponse.data.labelIds || []);
const msgResponse = await gmail.users.messages.get({
userId: userEmail,
id: message.id,
id: messageId,
format: 'RAW'
});
@@ -280,6 +300,8 @@ export class GoogleWorkspaceConnector implements IEmailConnector {
headers: parsedEmail.headers,
attachments,
receivedAt: parsedEmail.date || new Date(),
path: labels.path,
tags: labels.tags
};
}
} catch (error: any) {
@@ -313,4 +335,29 @@ export class GoogleWorkspaceConnector implements IEmailConnector {
}
};
}
private labelCache: Map<string, gmail_v1.Schema$Label> = new Map();
private async getLabelDetails(gmail: gmail_v1.Gmail, userEmail: string, labelIds: string[]): Promise<{ path: string, tags: string[]; }> {
const tags: string[] = [];
let path = '';
for (const labelId of labelIds) {
let label = this.labelCache.get(labelId);
if (!label) {
const res = await gmail.users.labels.get({ userId: userEmail, id: labelId });
label = res.data;
this.labelCache.set(labelId, label);
}
if (label.name) {
tags.push(label.name);
if (label.type === 'user') {
path = path ? `${path}/${label.name}` : label.name;
}
}
}
return { path, tags };
}
}

View File

@@ -196,7 +196,7 @@ export class ImapConnector implements IEmailConnector {
}
if (msg.envelope && msg.source) {
yield await this.parseMessage(msg);
yield await this.parseMessage(msg, mailboxPath);
}
}
@@ -222,7 +222,7 @@ export class ImapConnector implements IEmailConnector {
}
}
private async parseMessage(msg: any): Promise<EmailObject> {
private async parseMessage(msg: any, mailboxPath: string): Promise<EmailObject> {
const parsedEmail: ParsedMail = await simpleParser(msg.source);
const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({
filename: attachment.filename || 'untitled',
@@ -252,7 +252,8 @@ export class ImapConnector implements IEmailConnector {
headers: parsedEmail.headers,
attachments,
receivedAt: parsedEmail.date || new Date(),
eml: msg.source
eml: msg.source,
path: mailboxPath
};
}

View File

@@ -143,9 +143,9 @@ export class MicrosoftConnector implements IEmailConnector {
try {
const folders = this.listAllFolders(userEmail);
for await (const folder of folders) {
if (folder.id) {
if (folder.id && folder.path) {
logger.info({ userEmail, folderId: folder.id, folderName: folder.displayName }, 'Syncing folder');
yield* this.syncFolder(userEmail, folder.id, this.newDeltaTokens[folder.id]);
yield* this.syncFolder(userEmail, folder.id, folder.path, this.newDeltaTokens[folder.id]);
}
}
} catch (error) {
@@ -159,20 +159,33 @@ export class MicrosoftConnector implements IEmailConnector {
* @param userEmail The user principal name or ID.
* @returns An async generator that yields each mail folder.
*/
private async *listAllFolders(userEmail: string): AsyncGenerator<MailFolder> {
let requestUrl: string | undefined = `/users/${userEmail}/mailFolders`;
private async *listAllFolders(userEmail: string, parentFolderId?: string, currentPath = ''): AsyncGenerator<MailFolder & { path: string; }> {
const requestUrl = parentFolderId
? `/users/${userEmail}/mailFolders/${parentFolderId}/childFolders`
: `/users/${userEmail}/mailFolders`;
while (requestUrl) {
try {
const response = await this.graphClient.api(requestUrl).get();
try {
let response = await this.graphClient.api(requestUrl).get();
while (response) {
for (const folder of response.value as MailFolder[]) {
yield folder;
const newPath = currentPath ? `${currentPath}/${folder.displayName || ''}` : folder.displayName || '';
yield { ...folder, path: newPath || '' };
if (folder.childFolderCount && folder.childFolderCount > 0) {
yield* this.listAllFolders(userEmail, folder.id, newPath);
}
}
if (response['@odata.nextLink']) {
response = await this.graphClient.api(response['@odata.nextLink']).get();
} else {
break;
}
requestUrl = response['@odata.nextLink'];
} catch (error) {
logger.error({ err: error, userEmail }, 'Failed to list mail folders');
throw error; // Stop if we can't list folders
}
} catch (error) {
logger.error({ err: error, userEmail }, 'Failed to list mail folders');
throw error;
}
}
@@ -186,6 +199,7 @@ export class MicrosoftConnector implements IEmailConnector {
private async *syncFolder(
userEmail: string,
folderId: string,
path: string,
deltaToken?: string
): AsyncGenerator<EmailObject> {
let requestUrl: string | undefined;
@@ -208,7 +222,7 @@ export class MicrosoftConnector implements IEmailConnector {
if (message.id && !(message)['@removed']) {
const rawEmail = await this.getRawEmail(userEmail, message.id);
if (rawEmail) {
const emailObject = await this.parseEmail(rawEmail, message.id, userEmail);
const emailObject = await this.parseEmail(rawEmail, message.id, userEmail, path);
emailObject.threadId = message.conversationId; // Add conversationId as threadId
yield emailObject;
}
@@ -242,7 +256,7 @@ export class MicrosoftConnector implements IEmailConnector {
}
}
private async parseEmail(rawEmail: Buffer, messageId: string, userEmail: string): Promise<EmailObject> {
private async parseEmail(rawEmail: Buffer, messageId: string, userEmail: string, path: string): Promise<EmailObject> {
const parsedEmail: ParsedMail = await simpleParser(rawEmail);
const attachments = parsedEmail.attachments.map((attachment: Attachment) => ({
filename: attachment.filename || 'untitled',
@@ -270,6 +284,7 @@ export class MicrosoftConnector implements IEmailConnector {
headers: parsedEmail.headers,
attachments,
receivedAt: parsedEmail.date || new Date(),
path
};
}

View File

@@ -159,7 +159,7 @@ export class PSTConnector implements IEmailConnector {
try {
pstFile = await this.loadPstFile();
const root = pstFile.getRootFolder();
yield* this.processFolder(root);
yield* this.processFolder(root, '');
} catch (error) {
logger.error({ error }, 'Failed to fetch email.');
pstFile?.close();
@@ -171,17 +171,19 @@ export class PSTConnector implements IEmailConnector {
}
}
private async *processFolder(folder: PSTFolder): AsyncGenerator<EmailObject | null> {
private async *processFolder(folder: PSTFolder, currentPath: string): AsyncGenerator<EmailObject | null> {
const folderName = folder.displayName.toLowerCase();
if (DELETED_FOLDERS.has(folderName) || JUNK_FOLDERS.has(folderName)) {
logger.info(`Skipping folder: ${folder.displayName}`);
return;
}
const newPath = currentPath ? `${currentPath}/${folder.displayName}` : folder.displayName;
if (folder.contentCount > 0) {
let email: PSTMessage | null = folder.getNextChild();
while (email != null) {
yield await this.parseMessage(email);
yield await this.parseMessage(email, newPath);
try {
email = folder.getNextChild();
} catch (error) {
@@ -193,12 +195,12 @@ export class PSTConnector implements IEmailConnector {
if (folder.hasSubfolders) {
for (const subFolder of folder.getSubFolders()) {
yield* this.processFolder(subFolder);
yield* this.processFolder(subFolder, newPath);
}
}
}
private async parseMessage(msg: PSTMessage): Promise<EmailObject> {
private async parseMessage(msg: PSTMessage, path: string): Promise<EmailObject> {
const emlContent = await this.constructEml(msg);
const emlBuffer = Buffer.from(emlContent, 'utf-8');
const parsedEmail: ParsedMail = await simpleParser(emlBuffer);
@@ -236,7 +238,8 @@ export class PSTConnector implements IEmailConnector {
headers: parsedEmail.headers,
attachments,
receivedAt: parsedEmail.date || new Date(),
eml: emlBuffer
eml: emlBuffer,
path
};
}

View File

@@ -111,6 +111,10 @@
--color-sidebar-ring: var(--sidebar-ring);
}
.link {
@apply hover:text-primary font-medium hover:underline hover:underline-offset-2;
}
@layer base {
* {
@apply border-border outline-ring/50;

View File

@@ -99,28 +99,33 @@
<Table.Header>
<Table.Row>
<Table.Head>Date</Table.Head>
<Table.Head>Inbox</Table.Head>
<Table.Head>Subject</Table.Head>
<Table.Head>Sender</Table.Head>
<Table.Head>Attachments</Table.Head>
<Table.Head>Inbox</Table.Head>
<Table.Head>Path</Table.Head>
<Table.Head class="text-right">Actions</Table.Head>
</Table.Row>
</Table.Header>
<Table.Body>
<Table.Body class="text-sm">
{#if archivedEmails.items.length > 0}
{#each archivedEmails.items as email (email.id)}
<Table.Row>
<Table.Cell>{new Date(email.sentAt).toLocaleString()}</Table.Cell>
<Table.Cell>{email.userEmail}</Table.Cell>
<Table.Cell>
<div class="max-w-100 truncate">
<a href={`/dashboard/archived-emails/${email.id}`}>
<a class="link" href={`/dashboard/archived-emails/${email.id}`}>
{email.subject}
</a>
</div>
</Table.Cell>
<Table.Cell>{email.senderEmail}</Table.Cell>
<Table.Cell>{email.hasAttachments ? 'Yes' : 'No'}</Table.Cell>
<Table.Cell>
{email.senderEmail || email.senderName}
</Table.Cell>
<Table.Cell>{email.userEmail}</Table.Cell>
<Table.Cell>
<span class=" bg-muted truncate rounded p-1.5 text-xs">{email.path} </span>
</Table.Cell>
<Table.Cell class="text-right">
<a href={`/dashboard/archived-emails/${email.id}`}>
<Button variant="outline">View</Button>

View File

@@ -6,6 +6,7 @@
import EmailThread from '$lib/components/custom/EmailThread.svelte';
import { api } from '$lib/api.client';
import { browser } from '$app/environment';
import { formatBytes } from '$lib/utils';
let { data }: { data: PageData } = $props();
let email = $derived(data.email);
@@ -50,9 +51,38 @@
</Card.Header>
<Card.Content>
<div class="space-y-4">
<div>
<div class="space-y-1">
<h3 class="font-semibold">Recipients</h3>
<p>To: {email.recipients.map((r) => r.email || r.name).join(', ')}</p>
<Card.Description>
<p>To: {email.recipients.map((r) => r.email || r.name).join(', ')}</p>
</Card.Description>
</div>
<div class=" space-y-1">
<h3 class="font-semibold">Meta data</h3>
<Card.Description class="space-y-2">
{#if email.path}
<div class="flex flex-wrap items-center gap-2">
<span>Folder:</span>
<span class=" bg-muted truncate rounded p-1.5 text-xs"
>{email.path || '/'}</span
>
</div>
{/if}
{#if email.tags && email.tags.length > 0}
<div class="flex flex-wrap items-center gap-2">
<span> Tags: </span>
{#each email.tags as tag}
<span class=" bg-muted truncate rounded p-1.5 text-xs">{tag}</span>
{/each}
</div>
{/if}
<div class="flex flex-wrap items-center gap-2">
<span>size:</span>
<span class=" bg-muted truncate rounded p-1.5 text-xs"
>{formatBytes(email.sizeBytes)}</span
>
</div>
</Card.Description>
</div>
<div>
<h3 class="font-semibold">Email Preview</h3>

View File

@@ -330,7 +330,9 @@
/>
</Table.Cell>
<Table.Cell>
<a href="/dashboard/archived-emails?ingestionSourceId={source.id}">{source.name}</a>
<a class="link" href="/dashboard/archived-emails?ingestionSourceId={source.id}"
>{source.name}</a
>
</Table.Cell>
<Table.Cell class="capitalize">{source.provider.split('_').join(' ')}</Table.Cell>
<Table.Cell class="min-w-24">

View File

@@ -48,6 +48,8 @@ export interface ArchivedEmail {
attachments?: Attachment[];
raw?: Buffer;
thread?: ThreadEmail[];
path: string | null;
tags: string[] | null;
}
/**

View File

@@ -49,6 +49,10 @@ export interface EmailObject {
eml?: Buffer;
/** The email address of the user whose mailbox this email belongs to. */
userEmail?: string;
/** The folder path of the email in the source mailbox. */
path?: string;
/** An array of tags or labels associated with the email. */
tags?: string[];
}
// Define the structure of the document to be indexed in Meilisearch