import { CommitDiffSchema } from "@gitbeaker/rest" /** * 将diff字符串拆分为多个块。 * @param {string | null | undefined} diff - 包含diff内容的字符串。 * @returns {string[]} 返回包含diff块的字符串数组。 */ const splitDiff = (diff: string | null | undefined): string[] => { if (diff == null) { return [] } const pattern = /(^@@ -(\d+),(\d+) \+(\d+),(\d+) @@).*$/gm const result: string[] = [] let last = -1 let match: RegExpExecArray | null while ((match = pattern.exec(diff)) !== null) { if (last === -1) { last = match.index } else { result.push(diff.substring(last, match.index)) last = match.index } } if (last !== -1) { result.push(diff.substring(last)) } return result } /** * 获取diff块的起始和结束行号。 * @param {string} diff - 包含diff内容的字符串。 * @returns {object | null} 返回包含旧块和新块起始和结束行号的对象,或返回null。 */ const getStartEndLine = ( diff: string ): { oldHunk: { startLine: number; endLine: number } newHunk: { startLine: number; endLine: number } } | null => { const pattern = /(^@@ -(\d+),(\d+) \+(\d+),(\d+) @@)/gm const match = pattern.exec(diff) if (match != null) { const oldBegin = parseInt(match[2]) const oldDiff = parseInt(match[3]) const newBegin = parseInt(match[4]) const newDiff = parseInt(match[5]) return { oldHunk: { startLine: oldBegin, endLine: oldBegin + oldDiff - 1, }, newHunk: { startLine: newBegin, endLine: newBegin + newDiff - 1, }, } } else { return null } } /** * 解析diff字符串,返回旧块和新块的内容。 * @param {string} diff - 包含diff内容的字符串。 * @returns {object | null} 返回包含旧块和新块内容的对象,或返回null。 */ export const parseDiff = ( diff: string ): { oldHunk: string; newHunk: string } | null => { const hunkInfo = getStartEndLine(diff) if (hunkInfo == null) { return null } const oldHunkLines: string[] = [] const newHunkLines: string[] = [] let newLine = hunkInfo.newHunk.startLine const lines = diff.split("\n").slice(1) // 跳过@@行 // 如果最后一行为空,则移除 if (lines[lines.length - 1] === "") { lines.pop() } // 跳过前3行和后3行的注释 const skipStart = 3 const skipEnd = 3 let currentLine = 0 const removalOnly = !lines.some((line) => line.startsWith("+")) for (const line of lines) { currentLine++ if (line.startsWith("-")) { oldHunkLines.push(`${line.substring(1)}`) } else if (line.startsWith("+")) { newHunkLines.push(`${newLine}: ${line.substring(1)}`) newLine++ } else { // 上下文行 oldHunkLines.push(`${line}`) if ( removalOnly || (currentLine > skipStart && currentLine <= lines.length - skipEnd) ) { newHunkLines.push(`${newLine}: ${line}`) } else { newHunkLines.push(`${line}`) } newLine++ } } return { oldHunk: oldHunkLines.join("\n"), newHunk: newHunkLines.join("\n"), } } /** * 解析文件的diff信息,返回包含起始行号、结束行号和hunk内容的数组。 * @param {CommitDiffSchema} file - 包含文件diff信息的对象。 * @returns {[number, number, string][]} 返回包含起始行号、结束行号和hunk内容的数组。 */ const parseFileDiffs = (file: CommitDiffSchema): [number, number, string][] => { // 获取文件的Diff,一个文件的Diff可能包含多个Hunk const diffs = diffTools.splitDiff(file.diff) if (diffs.length === 0) return [] return diffs .map((diff) => { const diffLines = diffTools.getStartEndLine(diff) if (!diffLines) return null const hunks = diffTools.parseDiff(diff) if (!hunks) return null const hunksStr = ` ---new_hunk--- \`\`\` ${hunks.newHunk} \`\`\` ---old_hunk--- \`\`\` ${hunks.oldHunk} \`\`\` ` return [ diffLines.newHunk.startLine, diffLines.newHunk.endLine, hunksStr, ] as [number, number, string] }) .filter((diff) => diff !== null) } export interface Review { startLine: number endLine: number comment: string } /** * 解析审查评论的函数 * @param {string} response - 审查评论的响应字符串 * @param {Array<[number, number, string]>} diffs - 差异数组,每个差异包含开始行号、结束行号和差异内容 * @returns {Review[]} - 返回解析后的审查评论数组 */ const parseReview = ( response: string, diffs: Array<[number, number, string]> ): Review[] => { /** * 存储当前的审查评论 */ const storeReview = (): void => { if (currentStartLine !== null && currentEndLine !== null) { const review: Review = { startLine: currentStartLine, endLine: currentEndLine, comment: currentComment, } let withinDiff = false let bestDiffStartLine = -1 let bestDiffEndLine = -1 let maxIntersection = 0 // 查找与当前审查评论行号范围重叠最多的差异 for (const [startLine, endLine] of diffs) { const intersectionStart = Math.max(review.startLine, startLine) const intersectionEnd = Math.min(review.endLine, endLine) const intersectionLength = Math.max( 0, intersectionEnd - intersectionStart + 1 ) if (intersectionLength > maxIntersection) { maxIntersection = intersectionLength bestDiffStartLine = startLine bestDiffEndLine = endLine withinDiff = intersectionLength === review.endLine - review.startLine + 1 } if (withinDiff) break } // 如果审查评论不在任何差异范围内,进行相应处理 if (!withinDiff) { if (bestDiffStartLine !== -1 && bestDiffEndLine !== -1) { review.comment = `> 注意:此CR评论不在差异范围内,因此被映射到重叠最多的Diff。原始行号 [${review.startLine}-${review.endLine}] ${review.comment}` review.startLine = bestDiffStartLine review.endLine = bestDiffEndLine } else { review.comment = `> 注意:此CR评论不在差异范围内,但未找到与其重叠的Diff。原始行号 [${review.startLine}-${review.endLine}] ${review.comment}` review.startLine = diffs[0][0] review.endLine = diffs[0][1] } } reviews.push(review) } } /** * 清理代码块中的行号 * @param {string} comment - 评论字符串 * @param {string} codeBlockLabel - 代码块标签 * @returns {string} - 返回清理后的评论字符串 */ const sanitizeCodeBlock = ( comment: string, codeBlockLabel: string ): string => { const codeBlockStart = `\`\`\`${codeBlockLabel}` const codeBlockEnd = "```" const lineNumberRegex = /^ *(\d+): /gm let codeBlockStartIndex = comment.indexOf(codeBlockStart) while (codeBlockStartIndex !== -1) { const codeBlockEndIndex = comment.indexOf( codeBlockEnd, codeBlockStartIndex + codeBlockStart.length ) if (codeBlockEndIndex === -1) break const codeBlock = comment.substring( codeBlockStartIndex + codeBlockStart.length, codeBlockEndIndex ) const sanitizedBlock = codeBlock.replace(lineNumberRegex, "") comment = comment.slice(0, codeBlockStartIndex + codeBlockStart.length) + sanitizedBlock + comment.slice(codeBlockEndIndex) codeBlockStartIndex = comment.indexOf( codeBlockStart, codeBlockStartIndex + codeBlockStart.length + sanitizedBlock.length + codeBlockEnd.length ) } return comment } /** * 清理响应字符串中的代码块 * @param {string} comment - 评论字符串 * @returns {string} - 返回清理后的评论字符串 */ const sanitizeResponse = (comment: string): string => { comment = sanitizeCodeBlock(comment, "suggestion") comment = sanitizeCodeBlock(comment, "diff") return comment } const reviews: Review[] = [] // 清理响应字符串 response = sanitizeResponse(response.trim()) const lines = response.split("\n") const lineNumberRangeRegex = /(?:^|\s)(\d+)-(\d+):\s*$/ const commentSeparator = "---" let currentStartLine: number | null = null let currentEndLine: number | null = null let currentComment = "" // 解析响应字符串中的每一行 for (const line of lines) { const lineNumberRangeMatch = line.match(lineNumberRangeRegex) if (lineNumberRangeMatch != null) { storeReview() currentStartLine = parseInt(lineNumberRangeMatch[1], 10) currentEndLine = parseInt(lineNumberRangeMatch[2], 10) currentComment = "" continue } if (line.trim() === commentSeparator) { storeReview() currentStartLine = null currentEndLine = null currentComment = "" continue } if (currentStartLine !== null && currentEndLine !== null) { currentComment += `${line}\n` } } storeReview() return reviews } const diffTools = { splitDiff, parseDiff, getStartEndLine, parseFileDiffs, parseReview, } export default diffTools