Fix CICD AI dedupe

This commit is contained in:
SoftFever
2025-11-23 20:47:07 +08:00
parent b8cb62fb10
commit b78381e4f5
7 changed files with 357 additions and 76 deletions

View File

@@ -0,0 +1,19 @@
---
allowed-tools: Bash(git checkout --branch:*), Bash(git add:*), Bash(git status:*), Bash(git push:*), Bash(git commit:*), Bash(gh pr create:*)
description: Commit, push, and open a PR
---
## Context
- Current git status: !`git status`
- Current git diff (staged and unstaged changes): !`git diff HEAD`
- Current branch: !`git branch --show-current`
## Your task
Based on the above changes:
1. Create a new branch if on main
2. Create a single commit with an appropriate message
3. Push the branch to origin
4. Create a pull request using `gh pr create`
5. You have the capability to call multiple tools in a single response. You MUST do all of the above in a single message. Do not use any other tools or do anything else. Do not send any other text or messages besides these tool calls.

View File

@@ -0,0 +1,38 @@
---
allowed-tools: Bash(gh issue view:*), Bash(gh search:*), Bash(gh issue list:*), Bash(gh api:*), Bash(gh issue comment:*)
description: Find duplicate GitHub issues
---
Find up to 3 likely duplicate issues for a given GitHub issue.
To do this, follow these steps precisely:
1. Use an agent to check if the Github issue (a) is closed, (b) does not need to be deduped (eg. because it is broad product feedback without a specific solution, or positive feedback), or (c) already has a duplicates comment that you made earlier. If so, do not proceed.
2. Use an agent to view a Github issue, and ask the agent to return a summary of the issue
3. Then, launch 5 parallel agents to search Github for duplicates of this issue, using diverse keywords and search approaches, using the summary from #1
4. Next, feed the results from #1 and #2 into another agent, so that it can filter out false positives, that are likely not actually duplicates of the original issue. If there are no duplicates remaining, do not proceed.
5. Finally, comment back on the issue with a list of up to three duplicate issues (or zero, if there are no likely duplicates)
Notes (be sure to tell this to your agents, too):
- Use `gh` to interact with Github, rather than web fetch
- Do not use other tools, beyond `gh` (eg. don't use other MCP servers, file edit, etc.)
- Make a todo list first
- For your comment, follow the following format precisely (assuming for this example that you found 3 suspected duplicates):
---
Found 3 possible duplicate issues:
1. <link to issue>
2. <link to issue>
3. <link to issue>
This issue will be automatically closed as a duplicate in 3 days.
- If your issue is a duplicate, please close it and 👍 the existing issue instead
- To prevent auto-closure, add a comment or 👎 this comment
🤖 OrcaSlicer bot
---

View File

@@ -0,0 +1,40 @@
---
allowed-tools: Bash(gh issue list:*), Bash(gh issue view:*), Bash(gh issue edit:*), TodoWrite
description: Triage GitHub issues and label critical ones for oncall
---
You're an oncall triage assistant for GitHub issues. Your task is to identify critical issues that require immediate oncall attention and apply the "oncall" label.
Repository: OrcaSlicer/OrcaSlicer
Task overview:
1. First, get all open bugs updated in the last 3 days with at least 50 engagements:
```bash
gh issue list --repo OrcaSlicer/OrcaSlicer --state open --label bug --limit 1000 --json number,title,updatedAt,comments,reactions | jq -r '.[] | select((.updatedAt >= (now - 259200 | strftime("%Y-%m-%dT%H:%M:%SZ"))) and ((.comments | length) + ([.reactions[].content] | length) >= 50)) | "\(.number)"'
```
2. Save the list of issue numbers and create a TODO list with ALL of them. This ensures you process every single one.
3. For each issue in your TODO list:
- Use `gh issue view <number> --repo OrcaSlicer/OrcaSlicer --json title,body,labels,comments` to get full details
- Read and understand the full issue content and comments to determine actual user impact
- Evaluate: Is this truly blocking users from using Claude Code?
- Consider: "crash", "stuck", "frozen", "hang", "unresponsive", "cannot use", "blocked", "broken"
- Does it prevent core functionality? Can users work around it?
- Be conservative - only flag issues that truly prevent users from getting work done
4. For issues that are truly blocking and don't already have the "oncall" label:
- Use `gh issue edit <number> --repo OrcaSlicer/OrcaSlicer --add-label "oncall"`
- Mark the issue as complete in your TODO list
5. After processing all issues, provide a summary:
- List each issue number that received the "oncall" label
- Include the issue title and brief reason why it qualified
- If no issues qualified, state that clearly
Important:
- Process ALL issues in your TODO list systematically
- Don't post any comments to issues
- Only add the "oncall" label, never remove it
- Use individual `gh issue view` commands instead of bash for loops to avoid approval prompts

View File

@@ -0,0 +1,44 @@
name: Backfill Duplicate Comments
description: Triggers duplicate detection for old issues that don't have duplicate comments
on:
workflow_dispatch:
inputs:
days_back:
description: 'How many days back to look for old issues'
required: false
default: '90'
type: string
dry_run:
description: 'Dry run mode (true to only log what would be done)'
required: false
default: 'true'
type: choice
options:
- 'true'
- 'false'
jobs:
backfill-duplicate-comments:
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
issues: read
actions: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Backfill duplicate comments
run: bun run scripts/backfill-duplicate-comments.ts
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DAYS_BACK: ${{ inputs.days_back }}
DRY_RUN: ${{ inputs.dry_run }}

View File

@@ -1,5 +1,5 @@
name: Claude Issue Dedupe
description: Automatically dedupe GitHub issues using Claude Code
name: Orca Issue Dedupe
description: Automatically dedupe GitHub issues using AI
on:
issues:
types: [opened]
@@ -11,7 +11,7 @@ on:
type: string
jobs:
claude-dedupe-issues:
dedupe-issues:
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:

View File

@@ -1,73 +0,0 @@
name: Orca bot
on:
schedule:
- cron: "0 0 * * *"
workflow_dispatch:
inputs:
logLevel:
description: 'Log level'
required: true
default: 'warning'
jobs:
stale:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
contents: write # only for delete-branch option
steps:
- uses: actions/stale@v10
with:
# PAT for GitHub API authentication
repo-token: ${{ secrets.GITHUB_TOKEN }}
# Max number of operations per run
operations-per-run: 1000
# Order to get issues/PRs
ascending: true
# ISSUES
# Do not auto-close an issue if it is assigned to a milestone
exempt-all-issue-milestones: true
# Exempt all issues with assignees from stale
exempt-all-issue-assignees: true
# Exempt feature requests
exempt-issue-labels: "enhancement"
# Idle number of days before marking issues stale
days-before-issue-stale: 90
# Idle number of days before marking issues close
days-before-issue-close: 7
# Label to apply on staled issues
stale-issue-label: "stale"
# Issue close reason
close-issue-reason: not_planned
# Remove stale label from issues on updates
remove-issue-stale-when-updated: true
# Issue stale message
stale-issue-message: "Orca bot: this issue is stale because it has been open for 90 days with no activity."
# Issue closure message
close-issue-message: "Orca bot: This issue was closed because it has been inactive for 7 days since being marked as stale."
# PRs
# Do not auto-close a PR if it is assigned to a milestone
exempt-all-pr-milestones: true
# Exempt all PRs with assignees from stale
exempt-all-pr-assignees: true
# Skip the stale action for draft PRs
exempt-draft-pr: true
# Idle number of days before marking PRs stale
days-before-pr-stale: -1
# Idle number of days before marking PRs close
days-before-pr-close: -1
# Label to apply on staled PRs
stale-pr-label: "stale"
# Label to apply on closed PRs
close-pr-label: not_planned
# Remove stale label from PRs on updates
remove-pr-stale-when-updated: true
# PR stale message
stale-pr-message: "Orca bot: this PR is stale because it has been open for XX days with no activity."
# PR closure message
close-pr-message: "Orca bot: This PR was closed because it has been inactive for X days since being marked as stale."
# Delete branch after closing a stale PR
delete-branch: true

View File

@@ -0,0 +1,213 @@
#!/usr/bin/env bun
declare global {
var process: {
env: Record<string, string | undefined>;
};
}
interface GitHubIssue {
number: number;
title: string;
state: string;
state_reason?: string;
user: { id: number };
created_at: string;
closed_at?: string;
}
interface GitHubComment {
id: number;
body: string;
created_at: string;
user: { type: string; id: number };
}
async function githubRequest<T>(endpoint: string, token: string, method: string = 'GET', body?: any): Promise<T> {
const response = await fetch(`https://api.github.com${endpoint}`, {
method,
headers: {
Authorization: `Bearer ${token}`,
Accept: "application/vnd.github.v3+json",
"User-Agent": "backfill-duplicate-comments-script",
...(body && { "Content-Type": "application/json" }),
},
...(body && { body: JSON.stringify(body) }),
});
if (!response.ok) {
throw new Error(
`GitHub API request failed: ${response.status} ${response.statusText}`
);
}
return response.json();
}
async function triggerDedupeWorkflow(
owner: string,
repo: string,
issueNumber: number,
token: string,
dryRun: boolean = true
): Promise<void> {
if (dryRun) {
console.log(`[DRY RUN] Would trigger dedupe workflow for issue #${issueNumber}`);
return;
}
await githubRequest(
`/repos/${owner}/${repo}/actions/workflows/claude-dedupe-issues.yml/dispatches`,
token,
'POST',
{
ref: 'main',
inputs: {
issue_number: issueNumber.toString()
}
}
);
}
async function backfillDuplicateComments(): Promise<void> {
console.log("[DEBUG] Starting backfill duplicate comments script");
const token = process.env.GITHUB_TOKEN;
if (!token) {
throw new Error(`GITHUB_TOKEN environment variable is required
Usage:
GITHUB_TOKEN=your_token bun run scripts/backfill-duplicate-comments.ts
Environment Variables:
GITHUB_TOKEN - GitHub personal access token with repo and actions permissions (required)
DRY_RUN - Set to "false" to actually trigger workflows (default: true for safety)
MAX_ISSUE_NUMBER - Only process issues with numbers less than this value (default: 4050)`);
}
console.log("[DEBUG] GitHub token found");
const owner = "OrcaSlicer";
const repo = "OrcaSlicer";
const dryRun = process.env.DRY_RUN !== "false";
const maxIssueNumber = parseInt(process.env.MAX_ISSUE_NUMBER || "11000", 10);
const minIssueNumber = parseInt(process.env.MIN_ISSUE_NUMBER || "1", 10);
console.log(`[DEBUG] Repository: ${owner}/${repo}`);
console.log(`[DEBUG] Dry run mode: ${dryRun}`);
console.log(`[DEBUG] Looking at issues between #${minIssueNumber} and #${maxIssueNumber}`);
console.log(`[DEBUG] Fetching issues between #${minIssueNumber} and #${maxIssueNumber}...`);
const allIssues: GitHubIssue[] = [];
let page = 1;
const perPage = 100;
while (true) {
const pageIssues: GitHubIssue[] = await githubRequest(
`/repos/${owner}/${repo}/issues?state=all&per_page=${perPage}&page=${page}&sort=created&direction=desc`,
token
);
if (pageIssues.length === 0) break;
// Filter to only include issues within the specified range
const filteredIssues = pageIssues.filter(issue =>
issue.number >= minIssueNumber && issue.number < maxIssueNumber
);
allIssues.push(...filteredIssues);
// If the oldest issue in this page is still above our minimum, we need to continue
// but if the oldest issue is below our minimum, we can stop
const oldestIssueInPage = pageIssues[pageIssues.length - 1];
if (oldestIssueInPage && oldestIssueInPage.number >= maxIssueNumber) {
console.log(`[DEBUG] Oldest issue in page #${page} is #${oldestIssueInPage.number}, continuing...`);
} else if (oldestIssueInPage && oldestIssueInPage.number < minIssueNumber) {
console.log(`[DEBUG] Oldest issue in page #${page} is #${oldestIssueInPage.number}, below minimum, stopping`);
break;
} else if (filteredIssues.length === 0 && pageIssues.length > 0) {
console.log(`[DEBUG] No issues in page #${page} are in range #${minIssueNumber}-#${maxIssueNumber}, continuing...`);
}
page++;
// Safety limit to avoid infinite loops
if (page > 200) {
console.log("[DEBUG] Reached page limit, stopping pagination");
break;
}
}
console.log(`[DEBUG] Found ${allIssues.length} issues between #${minIssueNumber} and #${maxIssueNumber}`);
let processedCount = 0;
let candidateCount = 0;
let triggeredCount = 0;
for (const issue of allIssues) {
processedCount++;
console.log(
`[DEBUG] Processing issue #${issue.number} (${processedCount}/${allIssues.length}): ${issue.title}`
);
console.log(`[DEBUG] Fetching comments for issue #${issue.number}...`);
const comments: GitHubComment[] = await githubRequest(
`/repos/${owner}/${repo}/issues/${issue.number}/comments`,
token
);
console.log(
`[DEBUG] Issue #${issue.number} has ${comments.length} comments`
);
// Look for existing duplicate detection comments (from the dedupe bot)
const dupeDetectionComments = comments.filter(
(comment) =>
comment.body.includes("Found") &&
comment.body.includes("possible duplicate") &&
comment.user.type === "Bot"
);
console.log(
`[DEBUG] Issue #${issue.number} has ${dupeDetectionComments.length} duplicate detection comments`
);
// Skip if there's already a duplicate detection comment
if (dupeDetectionComments.length > 0) {
console.log(
`[DEBUG] Issue #${issue.number} already has duplicate detection comment, skipping`
);
continue;
}
candidateCount++;
const issueUrl = `https://github.com/${owner}/${repo}/issues/${issue.number}`;
try {
console.log(
`[INFO] ${dryRun ? '[DRY RUN] ' : ''}Triggering dedupe workflow for issue #${issue.number}: ${issueUrl}`
);
await triggerDedupeWorkflow(owner, repo, issue.number, token, dryRun);
if (!dryRun) {
console.log(
`[SUCCESS] Successfully triggered dedupe workflow for issue #${issue.number}`
);
}
triggeredCount++;
} catch (error) {
console.error(
`[ERROR] Failed to trigger workflow for issue #${issue.number}: ${error}`
);
}
// Add a delay between workflow triggers to avoid overwhelming the system
await new Promise(resolve => setTimeout(resolve, 1000));
}
console.log(
`[DEBUG] Script completed. Processed ${processedCount} issues, found ${candidateCount} candidates without duplicate comments, ${dryRun ? 'would trigger' : 'triggered'} ${triggeredCount} workflows`
);
}
backfillDuplicateComments().catch(console.error);
// Make it a module
export {};