feat: 网页爬虫&总结改成使用MIFY支持
This commit is contained in:
parent
45efe9c662
commit
f8ab0f791b
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@ -24,6 +24,7 @@
|
|||||||
"metas",
|
"metas",
|
||||||
"MIAI",
|
"MIAI",
|
||||||
"michat",
|
"michat",
|
||||||
|
"mify",
|
||||||
"mina",
|
"mina",
|
||||||
"mindnote",
|
"mindnote",
|
||||||
"openai",
|
"openai",
|
||||||
|
@ -11,7 +11,10 @@ import { extractSheetIds, validateLink } from "../../utils/string"
|
|||||||
* @returns {Promise<any>} - 返回爬取结果
|
* @returns {Promise<any>} - 返回爬取结果
|
||||||
* @throws {Error} - 当爬取失败时抛出错误
|
* @throws {Error} - 当爬取失败时抛出错误
|
||||||
*/
|
*/
|
||||||
const crawlWebPage = async (ctx: Context, link: string): Promise<any> => {
|
export const crawlWebPage = async (
|
||||||
|
ctx: Context,
|
||||||
|
link: string
|
||||||
|
): Promise<any> => {
|
||||||
const { attachService } = ctx
|
const { attachService } = ctx
|
||||||
const crawRes = await attachService.crawlWeb(link)
|
const crawRes = await attachService.crawlWeb(link)
|
||||||
if (!crawRes || crawRes?.code) throw new Error("网页抓取失败")
|
if (!crawRes || crawRes?.code) throw new Error("网页抓取失败")
|
||||||
@ -26,7 +29,7 @@ const crawlWebPage = async (ctx: Context, link: string): Promise<any> => {
|
|||||||
* @returns {Promise<string>} - 返回简报内容
|
* @returns {Promise<string>} - 返回简报内容
|
||||||
* @throws {Error} - 当生成简报失败时抛出错误
|
* @throws {Error} - 当生成简报失败时抛出错误
|
||||||
*/
|
*/
|
||||||
const generateSummary = async (
|
export const generateSummary = async (
|
||||||
ctx: Context,
|
ctx: Context,
|
||||||
userDescription: string,
|
userDescription: string,
|
||||||
content: any
|
content: any
|
||||||
@ -141,6 +144,7 @@ const agent = async (ctx: Context, link: string, userDescription: string) => {
|
|||||||
larkService: { message },
|
larkService: { message },
|
||||||
larkCard,
|
larkCard,
|
||||||
logger,
|
logger,
|
||||||
|
attachService,
|
||||||
} = ctx
|
} = ctx
|
||||||
const cardGender = larkCard.child("webAgent")
|
const cardGender = larkCard.child("webAgent")
|
||||||
try {
|
try {
|
||||||
@ -150,10 +154,12 @@ const agent = async (ctx: Context, link: string, userDescription: string) => {
|
|||||||
await message.updateOrReply(
|
await message.updateOrReply(
|
||||||
cardGender.genSuccessCard("正在为您收集简报,请稍等片刻~")
|
cardGender.genSuccessCard("正在为您收集简报,请稍等片刻~")
|
||||||
)
|
)
|
||||||
// 抓取网页
|
// // 抓取网页
|
||||||
const crawRes = await crawlWebPage(ctx, link)
|
// const crawRes = await crawlWebPage(ctx, link)
|
||||||
// 调用模型生成简报
|
// // 调用模型生成简报
|
||||||
const llmRes = await generateSummary(ctx, userDescription, crawRes)
|
// const llmRes = await generateSummary(ctx, userDescription, crawRes)
|
||||||
|
// 调用mify服务生成简报
|
||||||
|
const llmRes = await attachService.mifyCrawler(link, userDescription)
|
||||||
// 插入到表格
|
// 插入到表格
|
||||||
const sheetLink = await insert2Sheet(ctx, link, userDescription, llmRes)
|
const sheetLink = await insert2Sheet(ctx, link, userDescription, llmRes)
|
||||||
// 发送简报卡片
|
// 发送简报卡片
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import type { LarkEvent } from "@egg/lark-msg-tool"
|
import type { LarkEvent } from "@egg/lark-msg-tool"
|
||||||
import { NetToolBase } from "@egg/net-tool"
|
import { NetToolBase } from "@egg/net-tool"
|
||||||
|
|
||||||
|
import { APP_CONFIG } from "../../constant/config"
|
||||||
import { LarkServer } from "../../types"
|
import { LarkServer } from "../../types"
|
||||||
|
|
||||||
interface Chat2SoupParams {
|
interface Chat2SoupParams {
|
||||||
@ -96,6 +97,43 @@ class AttachService extends NetToolBase {
|
|||||||
const URL = "https://lark-egg.ai.xiaomi.com/tools/web/crawler"
|
const URL = "https://lark-egg.ai.xiaomi.com/tools/web/crawler"
|
||||||
return this.get<LarkServer.BaseRes<string>>(URL, { url }).catch(() => null)
|
return this.get<LarkServer.BaseRes<string>>(URL, { url }).catch(() => null)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 使用mify爬虫抓取网页内容
|
||||||
|
* @param {string} link - 网页链接
|
||||||
|
* @param {string} userDescription - 用户描述
|
||||||
|
* @returns {Promise<any>} 返回爬取结果
|
||||||
|
*/
|
||||||
|
async mifyCrawler(link: string, userDescription: string) {
|
||||||
|
const URL = "https://mify-be.pt.xiaomi.com/api/v1/workflows/run"
|
||||||
|
return this.post(
|
||||||
|
URL,
|
||||||
|
{
|
||||||
|
inputs: {
|
||||||
|
link,
|
||||||
|
userDescription,
|
||||||
|
},
|
||||||
|
response_mode: "blocking",
|
||||||
|
user: "egg-server",
|
||||||
|
},
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
Authorization: `Bearer ${APP_CONFIG.MIFY_CRAWLER_TOKEN}`,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.then((res) => {
|
||||||
|
const llmRes = res.data.outputs.content
|
||||||
|
if (!llmRes) throw new Error("模型总结失败")
|
||||||
|
if (llmRes === "crawlerErr") throw new Error("网页抓取失败")
|
||||||
|
return llmRes as string
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
if (["网页抓取失败", "模型总结失败"].includes(error.message)) {
|
||||||
|
throw error
|
||||||
|
}
|
||||||
|
throw new Error("MIFY爬虫请求失败")
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default AttachService
|
export default AttachService
|
||||||
|
11
test/llm/mifyCrawler.ts
Normal file
11
test/llm/mifyCrawler.ts
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
import initAppConfig from "../../constant/config"
|
||||||
|
import { AttachService } from "../../services"
|
||||||
|
|
||||||
|
await initAppConfig()
|
||||||
|
|
||||||
|
const server = new AttachService()
|
||||||
|
|
||||||
|
server
|
||||||
|
.mifyCrawler("https://lacus.site", "详细介绍alpine")
|
||||||
|
.then(console.log)
|
||||||
|
.catch(console.error)
|
Loading…
x
Reference in New Issue
Block a user