All checks were successful
CI Monitor MIflow / build-image (push) Successful in 2m42s
20 lines
422 B
TypeScript
20 lines
422 B
TypeScript
import { get_encoding as getEncoding } from "@dqbd/tiktoken"
|
|
|
|
const tokenizer = getEncoding("cl100k_base")
|
|
|
|
const encode = (input: string): Uint32Array => {
|
|
return tokenizer.encode(input)
|
|
}
|
|
|
|
const getTokenCount = (input: string): number => {
|
|
const cleanedInput = input.replace(/<\|endoftext\|>/g, "")
|
|
return encode(cleanedInput).length
|
|
}
|
|
|
|
const tokenTools = {
|
|
getTokenCount,
|
|
encode,
|
|
}
|
|
|
|
export default tokenTools
|