import { CommitDiffSchema } from "@gitbeaker/rest" import pLimit from "p-limit" import { Logger } from "winston" import chatTools from "../../utils/chatTools" import tokenTools from "../../utils/tokenTools" import { Inputs } from "./utils/inputs" import { Prompts } from "./utils/prompts" /** * 对文件进行总结 * @param {CommitDiffSchema[]} files - 要总结的文件 * @param {boolean} needReview - 是否审查简单更改 * @param {Inputs} rawInputs - 原始输入 * @param {Prompts} prompts - 提示 * @param {Logger} logger - 日志记录器 * @returns {Promise<{ summarizedFileMap: Map, needReviewFileMap: Map }>} 返回包含总结和需要审查的文件Map的Promise */ const summaryFiles = async ( files: CommitDiffSchema[], needReview: boolean, rawInputs: Inputs, logger: Logger ) => { // 生成Prompts实例 const prompts: Prompts = new Prompts() // 按文件名融合Diff const fileMap = new Map() const DIFF_SEPARATOR = "\n--- DIFF SEPARATOR ---\n" files.forEach((file) => { const rawDiff = fileMap.get(file.new_path) || "" // 融合Diff const diff = rawDiff ? rawDiff + DIFF_SEPARATOR + file.diff : file.diff fileMap.set(file.new_path, diff) }) // 总结后的文件Map const summarizedFileMap = new Map() /** * 生成文件的Summary * @param {string} diff - 文件的差异 * @param {string} path - 文件路径 * @returns {Promise} 返回生成Summary的Promise */ const doFileSummary = async (diff: string, path: string) => { const inputs = rawInputs.clone() inputs.fileDiff = diff inputs.filename = path const summarizePrompt = prompts.renderSummarizeFileDiff(inputs, needReview) logger.debug(`summarizePrompt for ${path}: ${summarizePrompt}`) const tokens = tokenTools.getTokenCount(summarizePrompt) logger.debug(`tokens for ${path}: ${tokens}`) if (tokens > 100 * 1000) { logger.error( `File diff too long for ${path} (${tokens} tokens), skipping` ) return } const codeChatBot = await chatTools.getGpt4oModel(0) try { const { content: summarize } = await codeChatBot.invoke(summarizePrompt) if (!summarize) throw new Error("Empty summarize") logger.info(`summarize for ${path}: ${summarize}`) summarizedFileMap.set(path, summarize as string) } catch { logger.error(`Failed to summarize for ${path}`) } } const limit = pLimit(5) const promises = Array.from(fileMap.entries()).map(([path, diff]) => limit(() => doFileSummary(diff, path)) ) await Promise.allSettled(promises) // 需要Review的文件Map const needReviewFileMap = new Map() // 如果不需要审查更改,则直接返回 if (!needReview) return { summarizedFileMap, needReviewFileMap, } /** * 管理文件的审查状态 * @param {string} path - 文件路径 * @param {string} summarize - 文件总结 */ const manageTriage = (path: string, summarize: string) => { const triageRegex = /\[TRIAGE\]:\s*(NEEDS_REVIEW|APPROVED)/ const triageMatch = summarize.match(triageRegex) // 如果没有匹配到TRIAGE,打印错误日志 if (!triageMatch) { logger.error(`Failed to triage for ${path}`) needReviewFileMap.set(path, true) return } // 如果匹配到TRIAGE,根据匹配结果设置needReviewFileMap if (triageMatch[1] === "APPROVED") { logger.info(`Approved for ${path}`) needReviewFileMap.set(path, false) } else { logger.info(`Needs review for ${path}`) needReviewFileMap.set(path, true) } // 删除源总结中的TRIAGE const newSummarize = summarize.replace(triageRegex, "").trim() summarizedFileMap.set(path, newSummarize) } // 全部文件过一遍TRIAGE Array.from(summarizedFileMap.entries()).forEach(([path, summarize]) => manageTriage(path, summarize) ) return { summarizedFileMap, needReviewFileMap, } } export default summaryFiles