import { S3Client, S3File } from "bun"
import { JSDOM } from "jsdom"

import initAppConfig, { APP_CONFIG, ConfigModel } from "../constant/config"
import pbClient from "../db/pbClient"
import { Context } from "../types"
import { genContextManually } from "../utils/genContext"
import llm from "../utils/llm"

await initAppConfig()

/**
 * 批量翻译文本列表
 * @param textList - 待翻译的文本列表
 * @param requestId - 请求ID
 * @returns 翻译后的文本映射
 */
const translateTextList = async (textList: string[], requestId: string) => {
  const translatedTexts = await llm.invoke(
    "batchTranslate",
    {
      inputArray: textList,
      targetLang: "简体中文",
    },
    requestId,
    1,
    true
  )

  const translatedList = JSON.parse(translatedTexts as string) as string[]

  const translatedMap: Record<string, string> = {}
  textList.forEach((text, index) => {
    translatedMap[text] = translatedList[index]
  })

  return translatedMap
}

/**
 * 过滤不需要翻译的字符串
 * @param str - 待处理的字符串
 * @returns 是否需要翻译
 */
const getIsTranslatableString = (str: string): boolean => {
  const trimmed: string = str.trim()

  // 1. 排除空字符串或纯空白字符
  if (trimmed === "") return false

  // 2. 排除纯Emoji字符（包括复合Emoji如👨👩👧👦）
  const isEmojiOnly: boolean = /^\p{Emoji}+$/u.test(trimmed)
  if (isEmojiOnly) return false

  // 3. 排除无字母/文字字符（允许包含数字但必须存在文字）
  const hasLetters: boolean = /\p{L}/u.test(trimmed) // 匹配任意语言字母/文字
  return hasLetters
}

/**
 * 获取新的页面内容
 * @param newId - 页面ID
 * @returns 页面内容的HTML字符串
 */
const getNewPage = async (newId: number) => {
  const url = `https://bytes.dev/archives/${newId}`
  const res = await fetch(url)

  if (!res.ok) {
    return ""
  }

  const htmlContent = await res.text()

  // 替换相对路径为绝对路径
  const replacedContent = htmlContent.replace(
    /(href|src)="\/(?!\/)/g,
    `$1="https://bytes.dev/`
  )

  return replacedContent
}

/**
 * 获取页面内容并翻译
 * @param ctx - 上下文对象
 * @param rawHtml - 原始HTML内容
 * @returns 翻译后的HTML内容
 */
const translateHTML = async (ctx: Context, rawHtml: string) => {
  const { logger } = ctx
  const dom = new JSDOM(rawHtml)
  const document = dom.window.document

  // 移除 __NEXT_DATA__ 脚本
  const nextDataEle = document.querySelector(
    "script#__NEXT_DATA__"
  ) as HTMLScriptElement
  nextDataEle.remove()

  // 需要翻译的元素列表（扩展selector需在此添加）
  const targetNodes = Array.from(
    document.querySelectorAll(
      "p:not(code *), h1:not(code *), h2:not(code *), h3:not(code *), h4:not(code *), h5:not(code *), h6:not(code *), span:not(code *), a:not(code *), li:not(code *), td, th:not(code *), caption:not(code *), button:not(code *), label:not(code *), title:not(code *)"
    )
  )

  const needTranslateText: string[] = []

  // 提取需要翻译的文本
  targetNodes.forEach((ele) => {
    const textNodes = Array.from(ele.childNodes).filter(
      (node) => node.nodeType === 3
    )
    textNodes.forEach((textNode) => {
      const text = textNode.textContent?.trim()
      if (text && getIsTranslatableString(text)) {
        needTranslateText.push(text)
      }
    })
  })

  const uniqueTexts = [...new Set(needTranslateText)]

  logger.debug(`uniqueTexts: ${uniqueTexts}`)

  // 翻译，拆分为多个请求以避免超长
  const chunkSize = 20
  const chunks: string[][] = []
  for (let i = 0; i < uniqueTexts.length; i += chunkSize) {
    chunks.push(uniqueTexts.slice(i, i + chunkSize))
  }
  const reqList = []
  for (const chunk of chunks) {
    reqList.push(translateTextList(chunk, "translateHTML"))
  }

  const translatedMaps = await Promise.all(reqList)

  const mergedMap: Record<string, string> = {}
  translatedMaps.forEach((translatedMap) => {
    Object.assign(mergedMap, translatedMap)
  })

  // 更新页面内容
  targetNodes.forEach((ele) => {
    const textNodes = Array.from(ele.childNodes).filter(
      (node) => node.nodeType === 3
    )
    textNodes.forEach((textNode) => {
      const text = textNode.textContent?.trim()
      if (text) {
        const translated = mergedMap[text]
        if (translated) {
          textNode.textContent = translated
        }
      }
    })
  })

  // 将所有 <a> 标签的点击改成在新页面打开链接
  const anchorNodes = document.querySelectorAll("a")
  anchorNodes.forEach((anchor) => {
    anchor.setAttribute("target", "_blank")
  })

  return dom.serialize()
}

/**
 * 获取最新的页面ID
 * @returns 最新的页面ID
 */
const getLatestId = async () => {
  const current = await pbClient
    .collection<ConfigModel>("env")
    .getOne("5l8a8u85p5v4aid")
  return current.value
}

/**
 * 设置最新的页面ID
 * @param id - 页面ID
 */
const setLatestId = async (id: number) => {
  await pbClient
    .collection<ConfigModel>("env")
    .update("5l8a8u85p5v4aid", { value: id })
}

/**
 * 写入HTML内容到文件
 * @param html - HTML内容
 */
const writeHtml = async (html: string, version: number) => {
  const client = new S3Client({
    accessKeyId: APP_CONFIG.S3_MICHAT_AK,
    secretAccessKey: APP_CONFIG.S3_MICHAT_SK,
    region: "cnbj1",
    endpoint: "https://s3-cnbj1.mi-fds.net",
    bucket: "mi-chat-fe",
  })

  const s3file: S3File = client.file(`bytes/${version}.html`)
  await s3file.write(html)
}

/**
 * 监控并翻译最新的页面内容
 */
const byteMonitor = async () => {
  const ctx = await genContextManually()
  const { logger, larkService, appInfo } = ctx
  logger.info("byteMonitor start")
  try {
    const latestId = await getLatestId()
    if (!latestId) throw new Error("getLatestId empty")
    const newId = Number(latestId) + 1
    const newPage = await getNewPage(newId)
    if (!newPage) throw new Error("getNewPage empty")
    const translatedPage = await translateHTML(ctx, newPage)
    await writeHtml(translatedPage, newId)
    await setLatestId(newId)
    await larkService.message.sendText2Chat(
      appInfo.errChatId,
      `页面链接：https://mi-chat-fe.cnbj1.mi-fds.com/mi-chat-fe/bytes/${newId}.html`,
      "byteMonitor 更新"
    )
  } catch (error) {
    const errorMessage = `byteMonitor error: ${error}`
    logger.error(errorMessage)
  } finally {
    logger.info("byteMonitor finished")
  }
}

export default byteMonitor