From 9d03e9b815d28ab0e1c85e7c0c8e8358f78bc70d Mon Sep 17 00:00:00 2001 From: schobele Date: Fri, 22 Nov 2024 16:15:43 +0100 Subject: [PATCH 1/7] refactor Checkly aggregator to utilize utility functions for mapping check and check result data --- src/aggregator/checkly-aggregator.ts | 45 +++------------------------- src/checkly/utils.ts | 33 ++++++++++++++++++++ 2 files changed, 37 insertions(+), 41 deletions(-) create mode 100644 src/checkly/utils.ts diff --git a/src/aggregator/checkly-aggregator.ts b/src/aggregator/checkly-aggregator.ts index 04d5fd6..f0d6ff2 100644 --- a/src/aggregator/checkly-aggregator.ts +++ b/src/aggregator/checkly-aggregator.ts @@ -1,47 +1,10 @@ import { CheckContext, ContextKey } from "./ContextAggregator"; import { checkly } from "../checkly/client"; import { WebhookAlertDto } from "../checkly/alertDTO"; -import { Check, CheckResult } from "../checkly/models"; - -const getCheckLogs = async (checkId: string, checkResultId: string) => { - const logs = await checkly.getCheckResult(checkId, checkResultId); - console.log("logs"); - console.log(logs); - - return logs; -}; - -const mapCheckToContextValue = (check: Check) => { - return { - checkId: check.id, - type: check.checkType, - frequency: check.frequency, - frequencyOffset: check.frequencyOffset, - shouldFail: check.shouldFail, - locations: check.locations, - tags: check.tags, - maxResponseTime: check.maxResponseTime, - sslCheckDomain: check.sslCheckDomain, - retryStrategy: check.retryStrategy, - }; -}; - -const mapCheckResultToContextValue = (result: CheckResult) => { - return { - resultId: result.id, - hasErrors: result.hasErrors, - hasFailures: result.hasFailures, - runLocation: result.runLocation, - startedAt: result.startedAt, - stoppedAt: result.stoppedAt, - responseTime: result.responseTime, - checkId: result.checkId, - attempts: result.attempts, - isDegraded: result.isDegraded, - overMaxResponseTime: result.overMaxResponseTime, - resultType: result.resultType, - }; -}; +import { + mapCheckResultToContextValue, + mapCheckToContextValue, +} from "../checkly/utils"; export const checklyAggregator = { fetchContext: async (alert: WebhookAlertDto): Promise => { diff --git a/src/checkly/utils.ts b/src/checkly/utils.ts new file mode 100644 index 0000000..83d39d3 --- /dev/null +++ b/src/checkly/utils.ts @@ -0,0 +1,33 @@ +import { Check, CheckResult } from "./models"; + +export const mapCheckToContextValue = (check: Check) => { + return { + checkId: check.id, + type: check.checkType, + frequency: check.frequency, + frequencyOffset: check.frequencyOffset, + shouldFail: check.shouldFail, + locations: check.locations, + tags: check.tags, + maxResponseTime: check.maxResponseTime, + sslCheckDomain: check.sslCheckDomain, + retryStrategy: check.retryStrategy, + }; +}; + +export const mapCheckResultToContextValue = (result: CheckResult) => { + return { + resultId: result.id, + hasErrors: result.hasErrors, + hasFailures: result.hasFailures, + runLocation: result.runLocation, + startedAt: result.startedAt, + stoppedAt: result.stoppedAt, + responseTime: result.responseTime, + checkId: result.checkId, + attempts: result.attempts, + isDegraded: result.isDegraded, + overMaxResponseTime: result.overMaxResponseTime, + resultType: result.resultType, + }; +}; From f7803abc72a6219200c3350520615b396e317700 Mon Sep 17 00:00:00 2001 From: schobele Date: Fri, 22 Nov 2024 16:16:00 +0100 Subject: [PATCH 2/7] remove debug logging from formatToolOutput function in utils --- src/ai/utils.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ai/utils.ts b/src/ai/utils.ts index a405edd..e95e224 100644 --- a/src/ai/utils.ts +++ b/src/ai/utils.ts @@ -33,8 +33,6 @@ export const formatToolOutput = ( toolCallId: string, output: unknown ): RunSubmitToolOutputsParams.ToolOutput => { - console.log("output", output); - return { output: JSON.stringify(output), tool_call_id: toolCallId, From 0f2cdeb5623373118fa8d0f3e4fbe0664ebfd0b5 Mon Sep 17 00:00:00 2001 From: schobele Date: Fri, 22 Nov 2024 16:16:19 +0100 Subject: [PATCH 3/7] refactor Slack app initialization and event handling for improved configuration and error handling --- src/slackbot/app.ts | 122 +++++++++++++++++------------------------ src/slackbot/config.ts | 36 ++++++++++++ src/slackbot/utils.ts | 17 ++++++ 3 files changed, 104 insertions(+), 71 deletions(-) create mode 100644 src/slackbot/config.ts create mode 100644 src/slackbot/utils.ts diff --git a/src/slackbot/app.ts b/src/slackbot/app.ts index a972786..6118d17 100644 --- a/src/slackbot/app.ts +++ b/src/slackbot/app.ts @@ -1,87 +1,51 @@ -import { App, LogLevel } from "@slack/bolt"; +import { App } from "@slack/bolt"; import { getOpenaiClient, getOpenaiSDKClient } from "../ai/openai"; import { getRunMessages } from "../ai/utils"; import { SreAssistant } from "../sre-assistant/SreAssistant"; -import GitHubAPI from "../github/github"; -import { GithubAgent } from "../github/agent"; - -export const app = new App({ - signingSecret: process.env.SLACK_SIGNING_SECRET, - token: process.env.SLACK_AUTH_TOKEN, - appToken: process.env.SLACK_APP_TOKEN, - socketMode: true, - logLevel: - process.env.NODE_ENV !== "production" ? LogLevel.DEBUG : LogLevel.INFO, -}); - -app.command("/help123", async ({ command, ack }) => { - await ack(); - await app.client.chat.postEphemeral({ - channel: command.channel_id, - text: "hey", - user: command.user_id, - }); -}); - -app.message(`hey help`, async ({ message, context }) => { - await app.client.chat.postEphemeral({ - channel: message.channel, - text: "e", - user: context.userId!, - }); -}); - -app.message("Hey SREBot", async ({ say }) => { - await say("helloworld"); -}); - -app.message("whatismyuserid", async ({ context, say }) => { - await say(context.userId!); -}); - -let setupAgent = () => { - const CHECKLY_GITHUB_TOKEN = process.env.CHECKLY_GITHUB_TOKEN!; - - let openai = getOpenaiSDKClient(); - let github = new GitHubAPI(CHECKLY_GITHUB_TOKEN); - - return new GithubAgent(openai("gpt-4o"), github); +import { getSlackConfig, validateConfig } from "./config"; +import { getThreadMetadata } from "./utils"; + +// Initialize Slack app with validated configuration +const initializeSlackApp = () => { + const config = getSlackConfig(); + validateConfig(config); + return new App(config); }; -const githubAgent = setupAgent(); +export const app = initializeSlackApp(); +// Event handling app.event("app_mention", async ({ event, context }) => { try { - let threadId; - let alertId = "test"; + let threadId, alertId; + const threadTs = (event as any).thread_ts || event.ts; + // Handle threaded conversations if ((event as any).thread_ts) { try { const result = await app.client.conversations.replies({ channel: event.channel, ts: (event as any).thread_ts, - limit: 1, include_all_metadata: true, }); - if (result.messages && result.messages.length > 0) { - const metadata = result.messages[0].metadata?.event_payload as { - threadId: string; - alertId: string; - }; - threadId = metadata?.threadId; - alertId = metadata?.alertId; - } + const { threadId: existingThreadId, alertId: existingAlertId } = + await getThreadMetadata(result.messages || []); + + threadId = existingThreadId; + alertId = existingAlertId; } catch (error) { - console.error("Error fetching parent message:", error); + console.error("Error fetching thread replies:", error); } } + // Create new thread if needed if (!threadId) { const thread = await getOpenaiClient().beta.threads.create(); threadId = thread.id; } + // Initialize assistant and process message const assistant = new SreAssistant(threadId, alertId, { username: event.user_profile?.display_name || @@ -90,28 +54,44 @@ app.event("app_mention", async ({ event, context }) => { "Unknown User", date: new Date().toISOString(), }); - const userMessage = await assistant.addMessage(event.text); - const responseMessages = await assistant - .runSync() - .then((run) => getRunMessages(threadId, run.id)); - const send = async (msg: string) => { + await assistant.addMessage(event.text); + const run = await assistant.runSync(); + const responseMessages = await getRunMessages(threadId, run.id); + + // Send responses + const sendMessage = (msg: string) => app.client.chat.postMessage({ token: context.botToken, channel: event.channel, text: msg, - thread_ts: (event as any).thread_ts || event.ts, + thread_ts: threadTs, + ...(threadId && { + metadata: { + event_type: "alert", + event_payload: { threadId }, + }, + }), }); - }; - await responseMessages.map((msg) => - send( - msg.content - .map((c) => (c.type === "text" ? c.text.value : "")) - .join("\n") + await Promise.all( + responseMessages.map((msg) => + sendMessage( + msg.content + .filter((c) => c.type === "text") + .map((c) => (c as any).text.value) + .join("") + ) ) ); } catch (error) { - console.error("Error reacting to mention:", error); + console.error("Error processing app mention:", error); + // Send error message to channel + await app.client.chat.postMessage({ + token: context.botToken, + channel: event.channel, + text: "Sorry, I encountered an error while processing your request.", + thread_ts: (event as any).thread_ts || event.ts, + }); } }); diff --git a/src/slackbot/config.ts b/src/slackbot/config.ts new file mode 100644 index 0000000..d89bd1d --- /dev/null +++ b/src/slackbot/config.ts @@ -0,0 +1,36 @@ +import { LogLevel } from "@slack/bolt"; + +interface SlackConfig { + signingSecret: string; + token: string; + appToken: string; + socketMode: boolean; + logLevel: LogLevel; +} + +export const getSlackConfig = (): SlackConfig => ({ + signingSecret: process.env.SLACK_SIGNING_SECRET!, + token: process.env.SLACK_AUTH_TOKEN!, + appToken: process.env.SLACK_APP_TOKEN!, + socketMode: true, + logLevel: + process.env.NODE_ENV !== "production" ? LogLevel.DEBUG : LogLevel.INFO, +}); + +export const validateConfig = (config: SlackConfig): void => { + const requiredEnvVars = [ + "SLACK_SIGNING_SECRET", + "SLACK_AUTH_TOKEN", + "SLACK_APP_TOKEN", + ]; + + const missingVars = requiredEnvVars.filter( + (varName) => !process.env[varName] + ); + + if (missingVars.length > 0) { + throw new Error( + `Missing required environment variables: ${missingVars.join(", ")}` + ); + } +}; diff --git a/src/slackbot/utils.ts b/src/slackbot/utils.ts new file mode 100644 index 0000000..6fd03ac --- /dev/null +++ b/src/slackbot/utils.ts @@ -0,0 +1,17 @@ +export const getThreadMetadata = async (messages: any[]) => { + let threadId, alertId; + + if (messages && messages.length > 0) { + const firstBotMessage = messages.find((msg) => msg.bot_id); + if (firstBotMessage) { + const metadata = firstBotMessage.metadata?.event_payload as { + threadId: string; + alertId: string; + }; + threadId = metadata?.threadId; + alertId = metadata?.alertId; + } + } + + return { threadId, alertId }; +}; From 71e80966424649f5a32dc7b34e5ab723d67f50bc Mon Sep 17 00:00:00 2001 From: schobele Date: Fri, 22 Nov 2024 16:19:01 +0100 Subject: [PATCH 4/7] update Slack channel ID to use environment variable --- src/routes/checklywebhook.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/routes/checklywebhook.ts b/src/routes/checklywebhook.ts index 1472015..18ebaeb 100644 --- a/src/routes/checklywebhook.ts +++ b/src/routes/checklywebhook.ts @@ -84,7 +84,7 @@ router.post("/", async (req: Request, res: Response) => { }); await app.client.chat.postMessage({ - channel: "C07V9GNU9L6", + channel: process.env.SLACK_ALERT_CHANNEL_ID as string, metadata: { event_type: "alert", event_payload: { From 48b2f12aab1e1408e0c10ea5850fa65fe73a9d1c Mon Sep 17 00:00:00 2001 From: schobele Date: Fri, 22 Nov 2024 16:19:19 +0100 Subject: [PATCH 5/7] Add ChecklyTool for interacting with Checkly API and enhance SreAssistant --- src/sre-assistant/SreAssistant.ts | 34 +++++++- src/sre-assistant/tools/ChecklyTool.ts | 113 +++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 src/sre-assistant/tools/ChecklyTool.ts diff --git a/src/sre-assistant/SreAssistant.ts b/src/sre-assistant/SreAssistant.ts index 5a3d7d3..a1386c6 100644 --- a/src/sre-assistant/SreAssistant.ts +++ b/src/sre-assistant/SreAssistant.ts @@ -3,9 +3,11 @@ import { Tool } from "../ai/Tool"; import type { RunCreateParams } from "openai/resources/beta/threads"; import { SearchContextTool } from "./tools/SearchContextTool"; import { GithubAgentInteractionTool } from "./tools/GithubAgentInteractionTool"; +import { ChecklyTool } from "./tools/ChecklyTool"; +import { prisma } from "../prisma"; export class SreAssistant extends BaseAssistant { - alertId: string; + alertId: string | undefined; interactionContext: { username: string; date: string; @@ -13,7 +15,7 @@ export class SreAssistant extends BaseAssistant { constructor( threadId: string, - alertId: string, + alertId: string | undefined = undefined, interactionContext: { username: string; date: string; @@ -30,6 +32,20 @@ export class SreAssistant extends BaseAssistant { } protected async getInstructions(): Promise { + let alertSummary = ""; + if (this.alertId) { + const alert = await prisma.alert.findUniqueOrThrow({ + where: { + id: this.alertId, + }, + select: { + summary: true, + }, + }); + + alertSummary = alert.summary; + } + return `You are an AI-powered SRE Bot designed to assist in real-time incident management. Your primary goal is to reduce Mean Time To Resolution (MTTR) by automatically aggregating and analyzing contextual data, providing actionable insights, and guiding first responders effectively. Important reminders: @@ -38,18 +54,28 @@ Important reminders: - If you're unsure about any aspect, clearly state your level of confidence - Maintain a professional and calm tone throughout your responses - Focus on providing actionable information that can help reduce MTTR +- Load the check to see the script and understand the context and why the check is failing Interaction Context: Username: ${this.interactionContext["Username"]} Date: ${this.interactionContext["Date"]} +${alertSummary.length > 0 ? `Alert Summary:\n${alertSummary}` : ""} + Format your responses as slack mrkdwn messages and keep the answer concise and relevant.`; } protected async getTools(): Promise { + if (!this.alertId) { + return [new ChecklyTool(this), new GithubAgentInteractionTool(this)]; + } + const searchContextTool = new SearchContextTool(this); await searchContextTool.init(); - - return [new SearchContextTool(this), new GithubAgentInteractionTool(this)]; + return [ + searchContextTool, + new GithubAgentInteractionTool(this), + new ChecklyTool(this), + ]; } } diff --git a/src/sre-assistant/tools/ChecklyTool.ts b/src/sre-assistant/tools/ChecklyTool.ts new file mode 100644 index 0000000..c112f9d --- /dev/null +++ b/src/sre-assistant/tools/ChecklyTool.ts @@ -0,0 +1,113 @@ +import { z } from "zod"; +import { Tool, createToolParameters, createToolOutput } from "../../ai/Tool"; +import { SreAssistant } from "../SreAssistant"; +import { checkly } from "../../checkly/client"; +import { stringify } from "yaml"; +import { + mapCheckResultToContextValue, + mapCheckToContextValue, +} from "../../checkly/utils"; +import { generateObject } from "ai"; +import { getOpenaiSDKClient } from "../../ai/openai"; + +const parameters = createToolParameters( + z.object({ + action: z + .enum([ + "getCheck", + "getCheckResult", + "getAllFailingChecks", + "searchCheck", + ]) + .describe("The action to perform on the Checkly API"), + checkId: z + .string() + .describe( + "The ID of the Check to get information about. Omit this field for the 'getChecksStatus' action." + ) + .optional(), + query: z + .string() + .describe( + "A query to search for checks. Use this field only for the 'searchCheck' action." + ) + .optional(), + }) +); + +const outputSchema = createToolOutput( + z.string().describe("The response from the Checkly API") +); + +export class ChecklyTool extends Tool< + typeof parameters, + typeof outputSchema, + SreAssistant +> { + static parameters = parameters; + static outputSchema = outputSchema; + + constructor(agent: SreAssistant) { + super({ + name: "ChecklyAPI", + description: + "Interact with the Checkly API to retrieve relevant context about checks and check results.", + parameters, + agent, + }); + } + + async execute(input: z.infer) { + if (input.action === "getCheck") { + const check = await checkly.getCheck(input.checkId!); + return stringify({ + ...mapCheckToContextValue(check), + script: check.script, + }); + } else if (input.action === "getCheckResult") { + const results = await checkly + .getCheckResults(input.checkId!, undefined, 1) + .then((result) => { + return result[0]; + }); + + if (!results) { + return "No results found"; + } + + return stringify(mapCheckResultToContextValue(results)); + } else if (input.action === "getAllFailingChecks") { + const status = await checkly.getPrometheusCheckStatus(); + return stringify(status.failing); + } else if (input.action === "searchCheck") { + const checks = await checkly.getChecks(); + const search = await generateObject({ + model: getOpenaiSDKClient()("gpt-4o"), + prompt: `You are the Checkly Check Search Engine. You are given a query and a list of checks. Return the most relevant check that relates to the query. + + Available checks: ${stringify( + checks.map((c) => ({ ...mapCheckToContextValue(c) })) + )} + + Search Query: ${input.query ?? ""}`, + schema: z.object({ + checkName: z.string(), + checkId: z.string(), + }), + }); + + const relevantCheck = checks.find((c) => c.id === search.object.checkId); + + if (!relevantCheck) { + return "No relevant check found"; + } + + return stringify({ + ...mapCheckToContextValue(relevantCheck), + script: relevantCheck.script, + }); + } + + return "Invalid action"; + } +} From 1c860cd4f97b3b6396f210ee0e11d4b1c6fb20a6 Mon Sep 17 00:00:00 2001 From: schobele Date: Fri, 22 Nov 2024 16:48:36 +0100 Subject: [PATCH 6/7] merge feature/checkly-tool --- src/slackbot/app.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/slackbot/app.ts b/src/slackbot/app.ts index cc3a711..baaa996 100644 --- a/src/slackbot/app.ts +++ b/src/slackbot/app.ts @@ -78,7 +78,6 @@ app.command("/srebot-releases", async ({ command, ack, respond }) => { }); }); -// Event handling app.event("app_mention", async ({ event, context }) => { try { let threadId, alertId; @@ -123,7 +122,6 @@ app.event("app_mention", async ({ event, context }) => { const run = await assistant.runSync(); const responseMessages = await getRunMessages(threadId, run.id); - // Send responses const sendMessage = (msg: string) => app.client.chat.postMessage({ token: context.botToken, @@ -150,7 +148,6 @@ app.event("app_mention", async ({ event, context }) => { ); } catch (error) { console.error("Error processing app mention:", error); - // Send error message to channel await app.client.chat.postMessage({ token: context.botToken, channel: event.channel, From 0524846e3e2cb03a7cd0839f7818be584581092e Mon Sep 17 00:00:00 2001 From: schobele Date: Fri, 22 Nov 2024 16:52:32 +0100 Subject: [PATCH 7/7] update prompt --- src/sre-assistant/SreAssistant.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sre-assistant/SreAssistant.ts b/src/sre-assistant/SreAssistant.ts index a1386c6..09a9ea6 100644 --- a/src/sre-assistant/SreAssistant.ts +++ b/src/sre-assistant/SreAssistant.ts @@ -55,6 +55,7 @@ Important reminders: - Maintain a professional and calm tone throughout your responses - Focus on providing actionable information that can help reduce MTTR - Load the check to see the script and understand the context and why the check is failing +- The user is a experienced devops engineer. Don't overcomplicate it, focus on the context and provide actionable insights. They know what they are doing, don't worry about the details. Interaction Context: Username: ${this.interactionContext["Username"]}