gitlab_monitor/controllers/manageMrEvent/utils/diffTools.ts

import { CommitDiffSchema } from "@gitbeaker/rest"

/**
 * 将diff字符串拆分为多个块。
 * @param {string | null | undefined} diff - 包含diff内容的字符串。
 * @returns {string[]} 返回包含diff块的字符串数组。
 */
const splitDiff = (diff: string | null | undefined): string[] => {
  if (diff == null) {
    return []
  }

  const pattern = /(^@@ -(\d+),(\d+) \+(\d+),(\d+) @@).*$/gm

  const result: string[] = []
  let last = -1
  let match: RegExpExecArray | null
  while ((match = pattern.exec(diff)) !== null) {
    if (last === -1) {
      last = match.index
    } else {
      result.push(diff.substring(last, match.index))
      last = match.index
    }
  }
  if (last !== -1) {
    result.push(diff.substring(last))
  }
  return result
}

/**
 * 获取diff块的起始和结束行号。
 * @param {string} diff - 包含diff内容的字符串。
 * @returns {object | null} 返回包含旧块和新块起始和结束行号的对象，或返回null。
 */
const getStartEndLine = (
  diff: string
): {
  oldHunk: { startLine: number; endLine: number }
  newHunk: { startLine: number; endLine: number }
} | null => {
  const pattern = /(^@@ -(\d+),(\d+) \+(\d+),(\d+) @@)/gm
  const match = pattern.exec(diff)
  if (match != null) {
    const oldBegin = parseInt(match[2])
    const oldDiff = parseInt(match[3])
    const newBegin = parseInt(match[4])
    const newDiff = parseInt(match[5])
    return {
      oldHunk: {
        startLine: oldBegin,
        endLine: oldBegin + oldDiff - 1,
      },
      newHunk: {
        startLine: newBegin,
        endLine: newBegin + newDiff - 1,
      },
    }
  } else {
    return null
  }
}

/**
 * 解析diff字符串，返回旧块和新块的内容。
 * @param {string} diff - 包含diff内容的字符串。
 * @returns {object | null} 返回包含旧块和新块内容的对象，或返回null。
 */
export const parseDiff = (
  diff: string
): { oldHunk: string; newHunk: string } | null => {
  const hunkInfo = getStartEndLine(diff)
  if (hunkInfo == null) {
    return null
  }

  const oldHunkLines: string[] = []
  const newHunkLines: string[] = []

  let newLine = hunkInfo.newHunk.startLine

  const lines = diff.split("\n").slice(1) // 跳过@@行

  // 如果最后一行为空，则移除
  if (lines[lines.length - 1] === "") {
    lines.pop()
  }

  // 跳过前3行和后3行的注释
  const skipStart = 3
  const skipEnd = 3

  let currentLine = 0

  const removalOnly = !lines.some((line) => line.startsWith("+"))

  for (const line of lines) {
    currentLine++
    if (line.startsWith("-")) {
      oldHunkLines.push(`${line.substring(1)}`)
    } else if (line.startsWith("+")) {
      newHunkLines.push(`${newLine}: ${line.substring(1)}`)
      newLine++
    } else {
      // 上下文行
      oldHunkLines.push(`${line}`)
      if (
        removalOnly ||
        (currentLine > skipStart && currentLine <= lines.length - skipEnd)
      ) {
        newHunkLines.push(`${newLine}: ${line}`)
      } else {
        newHunkLines.push(`${line}`)
      }
      newLine++
    }
  }

  return {
    oldHunk: oldHunkLines.join("\n"),
    newHunk: newHunkLines.join("\n"),
  }
}

/**
 * 解析文件的diff信息，返回包含起始行号、结束行号和hunk内容的数组。
 * @param {CommitDiffSchema} file - 包含文件diff信息的对象。
 * @returns {[number, number, string][]} 返回包含起始行号、结束行号和hunk内容的数组。
 */
const parseFileDiffs = (file: CommitDiffSchema): [number, number, string][] => {
  // 获取文件的Diff，一个文件的Diff可能包含多个Hunk
  const diffs = diffTools.splitDiff(file.diff)
  if (diffs.length === 0) return []
  return diffs
    .map((diff) => {
      const diffLines = diffTools.getStartEndLine(diff)
      if (!diffLines) return null
      const hunks = diffTools.parseDiff(diff)
      if (!hunks) return null
      const hunksStr = `
---new_hunk---
\`\`\`
${hunks.newHunk}
\`\`\`

---old_hunk---
\`\`\`
${hunks.oldHunk}
\`\`\`
        `
      return [
        diffLines.newHunk.startLine,
        diffLines.newHunk.endLine,
        hunksStr,
      ] as [number, number, string]
    })
    .filter((diff) => diff !== null)
}

export interface Review {
  startLine: number
  endLine: number
  comment: string
}

/**
 * 解析审查评论的函数
 * @param {string} response - 审查评论的响应字符串
 * @param {Array<[number, number, string]>} diffs - 差异数组，每个差异包含开始行号、结束行号和差异内容
 * @returns {Review[]} - 返回解析后的审查评论数组
 */
const parseReview = (
  response: string,
  diffs: Array<[number, number, string]>
): Review[] => {
  /**
   * 存储当前的审查评论
   */
  const storeReview = (): void => {
    if (currentStartLine !== null && currentEndLine !== null) {
      const review: Review = {
        startLine: currentStartLine,
        endLine: currentEndLine,
        comment: currentComment,
      }

      let withinDiff = false
      let bestDiffStartLine = -1
      let bestDiffEndLine = -1
      let maxIntersection = 0

      // 查找与当前审查评论行号范围重叠最多的差异
      for (const [startLine, endLine] of diffs) {
        const intersectionStart = Math.max(review.startLine, startLine)
        const intersectionEnd = Math.min(review.endLine, endLine)
        const intersectionLength = Math.max(
          0,
          intersectionEnd - intersectionStart + 1
        )

        if (intersectionLength > maxIntersection) {
          maxIntersection = intersectionLength
          bestDiffStartLine = startLine
          bestDiffEndLine = endLine
          withinDiff =
            intersectionLength === review.endLine - review.startLine + 1
        }

        if (withinDiff) break
      }

      // 如果审查评论不在任何差异范围内，进行相应处理
      if (!withinDiff) {
        if (bestDiffStartLine !== -1 && bestDiffEndLine !== -1) {
          review.comment = `> 注意：此CR评论不在差异范围内，因此被映射到重叠最多的Diff。原始行号 [${review.startLine}-${review.endLine}]

  ${review.comment}`
          review.startLine = bestDiffStartLine
          review.endLine = bestDiffEndLine
        } else {
          review.comment = `> 注意：此CR评论不在差异范围内，但未找到与其重叠的Diff。原始行号 [${review.startLine}-${review.endLine}]

  ${review.comment}`
          review.startLine = diffs[0][0]
          review.endLine = diffs[0][1]
        }
      }

      reviews.push(review)
    }
  }

  /**
   * 清理代码块中的行号
   * @param {string} comment - 评论字符串
   * @param {string} codeBlockLabel - 代码块标签
   * @returns {string} - 返回清理后的评论字符串
   */
  const sanitizeCodeBlock = (
    comment: string,
    codeBlockLabel: string
  ): string => {
    const codeBlockStart = `\`\`\`${codeBlockLabel}`
    const codeBlockEnd = "```"
    const lineNumberRegex = /^ *(\d+): /gm

    let codeBlockStartIndex = comment.indexOf(codeBlockStart)

    while (codeBlockStartIndex !== -1) {
      const codeBlockEndIndex = comment.indexOf(
        codeBlockEnd,
        codeBlockStartIndex + codeBlockStart.length
      )

      if (codeBlockEndIndex === -1) break

      const codeBlock = comment.substring(
        codeBlockStartIndex + codeBlockStart.length,
        codeBlockEndIndex
      )
      const sanitizedBlock = codeBlock.replace(lineNumberRegex, "")

      comment =
        comment.slice(0, codeBlockStartIndex + codeBlockStart.length) +
        sanitizedBlock +
        comment.slice(codeBlockEndIndex)

      codeBlockStartIndex = comment.indexOf(
        codeBlockStart,
        codeBlockStartIndex +
          codeBlockStart.length +
          sanitizedBlock.length +
          codeBlockEnd.length
      )
    }

    return comment
  }

  /**
   * 清理响应字符串中的代码块
   * @param {string} comment - 评论字符串
   * @returns {string} - 返回清理后的评论字符串
   */
  const sanitizeResponse = (comment: string): string => {
    comment = sanitizeCodeBlock(comment, "suggestion")
    comment = sanitizeCodeBlock(comment, "diff")
    return comment
  }
  const reviews: Review[] = []

  // 清理响应字符串
  response = sanitizeResponse(response.trim())

  const lines = response.split("\n")
  const lineNumberRangeRegex = /(?:^|\s)(\d+)-(\d+):\s*$/
  const commentSeparator = "---"

  let currentStartLine: number | null = null
  let currentEndLine: number | null = null
  let currentComment = ""

  // 解析响应字符串中的每一行
  for (const line of lines) {
    const lineNumberRangeMatch = line.match(lineNumberRangeRegex)

    if (lineNumberRangeMatch != null) {
      storeReview()
      currentStartLine = parseInt(lineNumberRangeMatch[1], 10)
      currentEndLine = parseInt(lineNumberRangeMatch[2], 10)
      currentComment = ""
      continue
    }

    if (line.trim() === commentSeparator) {
      storeReview()
      currentStartLine = null
      currentEndLine = null
      currentComment = ""
      continue
    }

    if (currentStartLine !== null && currentEndLine !== null) {
      currentComment += `${line}\n`
    }
  }

  storeReview()

  return reviews
}

const diffTools = {
  splitDiff,
  parseDiff,
  getStartEndLine,
  parseFileDiffs,
  parseReview,
}

export default diffTools