gitlab_monitor/utils/tokenTools.ts
zhaoyingbo 92fa30ef3d
All checks were successful
CI Monitor MIflow / build-image (push) Successful in 2m42s
feat: 支持初步的CR
2024-08-12 12:24:45 +00:00

20 lines
422 B
TypeScript

import { get_encoding as getEncoding } from "@dqbd/tiktoken"
const tokenizer = getEncoding("cl100k_base")
const encode = (input: string): Uint32Array => {
return tokenizer.encode(input)
}
const getTokenCount = (input: string): number => {
const cleanedInput = input.replace(/<\|endoftext\|>/g, "")
return encode(cleanedInput).length
}
const tokenTools = {
getTokenCount,
encode,
}
export default tokenTools