feat: 支持清洗历史数据
All checks were successful
Egg Server CI/CD / build-image (push) Successful in 45s
Egg Server CI/CD / refresh-image (push) Successful in 12s
Egg Server CI/CD / fast-deploy (push) Successful in 2s

This commit is contained in:
zhaoyingbo 2024-10-15 01:20:21 +00:00
parent e98a93c943
commit 9545670a51
17 changed files with 1402 additions and 33 deletions

View File

@ -19,6 +19,7 @@
"langchain",
"langfuse",
"metas",
"michat",
"mina",
"mindnote",
"openai",

1237
assets/chatHistory.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,141 @@
import { parseJsonString } from "@egg/hooks"
import logger from "@egg/logger"
import { Context, LarkServer } from "../../../types"
interface Message {
user: string
content: string
time: string
}
const extractTextFromJson = (data: any): string => {
let result = ""
if (Array.isArray(data)) {
// 如果是数组,遍历数组元素
for (const element of data) {
result += extractTextFromJson(element) // 递归调用处理每个元素
}
} else if (typeof data === "object" && data !== null) {
// 如果是对象,遍历对象的键
for (const key in data) {
if (key === "text" && typeof data[key] === "string") {
result += data[key] // 拼接 text 值
} else {
result += extractTextFromJson(data[key]) // 递归调用处理子对象
}
}
}
return result
}
const getChatHistory = async ({
larkService,
larkBody: {
actionValue: { chatId, timeScope },
},
}: Context.Data): Promise<Message[]> => {
// 获取历史消息timeScope为1、3、7分别代表1天、3天、7天
// 获取服务器的时区偏移量(以分钟为单位)
const serverTimezoneOffset = new Date().getTimezoneOffset()
// 上海时区的偏移量UTC+8以分钟为单位
const shanghaiTimezoneOffset = -8 * 60
// 计算时间戳,调整为上海时区
const endTimeTimestamp =
Math.round(new Date().getTime() / 1000) +
(shanghaiTimezoneOffset - serverTimezoneOffset) * 60
const startTimeTimestamp = endTimeTimestamp - Number(timeScope) * 24 * 60 * 60
// 获取群聊中的历史记录
const { data: chatHistory } = await larkService.message.getHistory(
chatId,
String(startTimeTimestamp),
String(endTimeTimestamp)
)
if (chatHistory.length === 0) return []
// 清洗数据
// 取出所有的被AT的人以及发送者
const mentions: Map<string, string> = new Map()
const senders: Set<string> = new Set()
// 过滤出文本和post消息
const allowedMsgTypes = ["text", "post"]
const filteredMsg: typeof chatHistory = []
// 遍历历史消息
for (const chat of chatHistory) {
if (chat.mentions) {
for (const mention of chat.mentions) {
mentions.set(mention.id, mention.name)
}
}
if (chat.sender && chat.sender.sender_type === "user") {
senders.add(chat.sender.id)
}
if (allowedMsgTypes.includes(chat.msg_type)) {
filteredMsg.push(chat)
}
}
// 取出没有被AT的发送者
const noMentionSenders = new Set(
[...senders].filter((sender) => !mentions.has(sender))
)
logger.debug(`Mentions: ${JSON.stringify(mentions)}`)
logger.debug(`Senders: ${JSON.stringify(senders)}`)
logger.debug(`No mention senders: ${JSON.stringify(noMentionSenders)}`)
// 从接口获取用户名
if (noMentionSenders.size !== 0) {
const {
data: { items },
} = await larkService.user.batchGet([...noMentionSenders])
logger.debug(`Get user info: ${JSON.stringify(items)}`)
for (const item of items) {
mentions.set(item.open_id, item.name)
}
}
const messages: Message[] = []
const getText = (chat: LarkServer.MessageData) => {
let { text } = parseJsonString(chat.body.content, { text: "" }) as {
text: string
}
if (!text) return ""
// 替换被AT的人
if (chat.mentions) {
for (const mention of chat.mentions) {
const mentionKey = mention.key
const mentionName = `@${mention.name}`
text = text.replace(mentionKey, mentionName)
}
}
// 去除可能出现的标签
return text.replace(/<[^>]+>/g, "")
}
const getPost = (chat: LarkServer.MessageData) => {
const content = parseJsonString(chat.body.content, null)
if (!content) return ""
return extractTextFromJson(content)
}
for (const chat of filteredMsg) {
messages.push({
user: mentions.get(chat.sender.id)!,
content: chat.msg_type === "text" ? getText(chat) : getPost(chat),
time: new Date(Number(chat.create_time)).toLocaleString("zh-CN", {
timeZone: "Asia/Shanghai",
}),
})
}
return messages
}
export default getChatHistory

View File

@ -2,6 +2,7 @@ import { genCardOptions } from "@egg/lark-msg-tool"
import { Context } from "../../../types"
import llm from "../../../utils/llm"
import getChatHistory from "./chatHistory"
/**
*
@ -86,35 +87,21 @@ const sendTimeScopeSelector = async ({
})
}
const sendGroupReport = async ({
larkService,
logger,
requestId,
larkCard,
larkBody: { actionValue, messageId },
}: Context.Data) => {
const sendGroupReport = async (ctx: Context.Data) => {
const {
larkService,
logger,
requestId,
larkCard,
larkBody: { actionValue, messageId },
} = ctx
const cardGender = larkCard.child("groupAgent")
const { chatId, chatName, functionId, functionName, timeScope } = actionValue
const { chatName, functionId, functionName, timeScope } = actionValue
// 记录发送loading消息后的时间戳
const startTime = Date.now()
// 获取历史消息timeScope为1、3、7分别代表1天、3天、7天
// 获取服务器的时区偏移量(以分钟为单位)
const serverTimezoneOffset = new Date().getTimezoneOffset()
// 上海时区的偏移量UTC+8以分钟为单位
const shanghaiTimezoneOffset = -8 * 60
// 计算时间戳,调整为上海时区
const endTimeTimestamp =
Math.round(new Date().getTime() / 1000) +
(shanghaiTimezoneOffset - serverTimezoneOffset) * 60
const startTimeTimestamp = endTimeTimestamp - Number(timeScope) * 24 * 60 * 60
// 获取群聊中的历史记录
const { data: chatHistory } = await larkService.message.getHistory(
chatId,
String(startTimeTimestamp),
String(endTimeTimestamp)
)
// 获取聊天记录
const chatHistory = await getChatHistory(ctx)
// 如果没有历史记录则返回错误消息
if (chatHistory.length === 0) {
logger.error("Chat history is empty")

View File

@ -31,7 +31,10 @@ class LarkUserService extends LarkBaseService {
* @param userIdType ID类型
* @returns
*/
async batchGet(userIds: string[], userIdType: "open_id" | "user_id") {
async batchGet(
userIds: string[],
userIdType: "open_id" | "user_id" = "open_id"
) {
const path = `/contact/v3/users/batch`
// 如果user_id长度超出50需要分批请求,

View File

@ -1,4 +1,4 @@
import { LarkService } from "../services"
import { LarkService } from "../../services"
const service = new LarkService("egg", "")

View File

@ -1,4 +1,4 @@
import LarkDriveService from "../services/lark/drive"
import LarkDriveService from "../../services/lark/drive"
const service = new LarkDriveService("egg", "")

View File

@ -1,4 +1,4 @@
import db from "../db"
import db from "../../db"
const res = await db.apiKey.getOne("uwnpzb9hvoft28h")

View File

@ -1,4 +1,4 @@
import { LarkService } from "../services"
import { LarkService } from "../../services"
const service = new LarkService("egg", "")

View File

@ -1,4 +1,4 @@
import { LarkService } from "../services"
import { LarkService } from "../../services"
const service = new LarkService("egg", "")

View File

@ -1,4 +1,4 @@
import llm from "../utils/llm"
import llm from "../../utils/llm"
const chatHistory = [
{

View File

@ -1,6 +1,6 @@
import { test } from "bun:test"
import LarkSheetService from "../services/lark/sheet"
import LarkSheetService from "../../services/lark/sheet"
const service = new LarkSheetService("egg", "")