feat: 网页爬虫&总结改成使用MIFY支持

This commit is contained in:
zhaoyingbo 2025-02-05 07:47:44 +00:00
parent 45efe9c662
commit f8ab0f791b
4 changed files with 62 additions and 6 deletions

View File

@ -24,6 +24,7 @@
"metas",
"MIAI",
"michat",
"mify",
"mina",
"mindnote",
"openai",

View File

@ -11,7 +11,10 @@ import { extractSheetIds, validateLink } from "../../utils/string"
* @returns {Promise<any>} -
* @throws {Error} -
*/
const crawlWebPage = async (ctx: Context, link: string): Promise<any> => {
export const crawlWebPage = async (
ctx: Context,
link: string
): Promise<any> => {
const { attachService } = ctx
const crawRes = await attachService.crawlWeb(link)
if (!crawRes || crawRes?.code) throw new Error("网页抓取失败")
@ -26,7 +29,7 @@ const crawlWebPage = async (ctx: Context, link: string): Promise<any> => {
* @returns {Promise<string>} -
* @throws {Error} -
*/
const generateSummary = async (
export const generateSummary = async (
ctx: Context,
userDescription: string,
content: any
@ -141,6 +144,7 @@ const agent = async (ctx: Context, link: string, userDescription: string) => {
larkService: { message },
larkCard,
logger,
attachService,
} = ctx
const cardGender = larkCard.child("webAgent")
try {
@ -150,10 +154,12 @@ const agent = async (ctx: Context, link: string, userDescription: string) => {
await message.updateOrReply(
cardGender.genSuccessCard("正在为您收集简报,请稍等片刻~")
)
// 抓取网页
const crawRes = await crawlWebPage(ctx, link)
// 调用模型生成简报
const llmRes = await generateSummary(ctx, userDescription, crawRes)
// // 抓取网页
// const crawRes = await crawlWebPage(ctx, link)
// // 调用模型生成简报
// const llmRes = await generateSummary(ctx, userDescription, crawRes)
// 调用mify服务生成简报
const llmRes = await attachService.mifyCrawler(link, userDescription)
// 插入到表格
const sheetLink = await insert2Sheet(ctx, link, userDescription, llmRes)
// 发送简报卡片

View File

@ -1,6 +1,7 @@
import type { LarkEvent } from "@egg/lark-msg-tool"
import { NetToolBase } from "@egg/net-tool"
import { APP_CONFIG } from "../../constant/config"
import { LarkServer } from "../../types"
interface Chat2SoupParams {
@ -96,6 +97,43 @@ class AttachService extends NetToolBase {
const URL = "https://lark-egg.ai.xiaomi.com/tools/web/crawler"
return this.get<LarkServer.BaseRes<string>>(URL, { url }).catch(() => null)
}
/**
* 使mify爬虫抓取网页内容
* @param {string} link -
* @param {string} userDescription -
* @returns {Promise<any>}
*/
async mifyCrawler(link: string, userDescription: string) {
const URL = "https://mify-be.pt.xiaomi.com/api/v1/workflows/run"
return this.post(
URL,
{
inputs: {
link,
userDescription,
},
response_mode: "blocking",
user: "egg-server",
},
{},
{
Authorization: `Bearer ${APP_CONFIG.MIFY_CRAWLER_TOKEN}`,
}
)
.then((res) => {
const llmRes = res.data.outputs.content
if (!llmRes) throw new Error("模型总结失败")
if (llmRes === "crawlerErr") throw new Error("网页抓取失败")
return llmRes as string
})
.catch((error) => {
if (["网页抓取失败", "模型总结失败"].includes(error.message)) {
throw error
}
throw new Error("MIFY爬虫请求失败")
})
}
}
export default AttachService

11
test/llm/mifyCrawler.ts Normal file
View File

@ -0,0 +1,11 @@
import initAppConfig from "../../constant/config"
import { AttachService } from "../../services"
await initAppConfig()
const server = new AttachService()
server
.mifyCrawler("https://lacus.site", "详细介绍alpine")
.then(console.log)
.catch(console.error)