import { type ChatCompletionInputMessage } from "@huggingface/tasks"; import type { Conversation, ModelEntryWithTokenizer } from "$lib/types"; import { HfInference } from "@huggingface/inference"; export function createHfInference(token: string): HfInference { return new HfInference(token); } export async function handleStreamingResponse( hf: HfInference, conversation: Conversation, onChunk: (content: string) => void, abortController: AbortController, systemMessage?: ChatCompletionInputMessage ): Promise { const messages = [ ...(isSystemPromptSupported(conversation.model) && systemMessage?.content?.length ? [systemMessage] : []), ...conversation.messages, ]; let out = ""; for await (const chunk of hf.chatCompletionStream( { model: conversation.model.id, messages, temperature: conversation.config.temperature, max_tokens: conversation.config.maxTokens, }, { signal: abortController.signal } )) { if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) { out += chunk.choices[0].delta.content; onChunk(out); } } } export async function handleNonStreamingResponse( hf: HfInference, conversation: Conversation, systemMessage?: ChatCompletionInputMessage ): Promise { const messages = [ ...(isSystemPromptSupported(conversation.model) && systemMessage?.content?.length ? [systemMessage] : []), ...conversation.messages, ]; const response = await hf.chatCompletion({ model: conversation.model, messages, temperature: conversation.config.temperature, max_tokens: conversation.config.maxTokens, }); if (response.choices && response.choices.length > 0) { return response.choices[0].message; } throw new Error("No response from the model"); } export function isSystemPromptSupported(model: ModelEntryWithTokenizer) { return model.tokenizerConfig?.chat_template?.includes("system"); }