import { type ChatCompletionInputMessage } from '@huggingface/tasks'; import { HfInference } from '@huggingface/inference'; export function createHfInference(token: string): HfInference { return new HfInference(token); } export function prepareRequestMessages( systemMessage: ChatCompletionInputMessage, messages: ChatCompletionInputMessage[] ): ChatCompletionInputMessage[] { return [...(systemMessage.content.length ? [systemMessage] : []), ...messages]; } export async function handleStreamingResponse( hf: HfInference, model: string, messages: ChatCompletionInputMessage[], temperature: number, maxTokens: number, jsonMode: boolean, onChunk: (content: string) => void, abortController: AbortController ): Promise { let out = ''; try { for await (const chunk of hf.chatCompletionStream( { model: model, messages: messages, temperature: temperature, max_tokens: maxTokens, json_mode: jsonMode }, { signal: abortController.signal } )) { if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) { out += chunk.choices[0].delta.content; onChunk(out); } } } catch (error) { if (error.name === 'AbortError') { console.log('Stream aborted'); } else { throw error; } } } export async function handleNonStreamingResponse( hf: HfInference, model: string, messages: ChatCompletionInputMessage[], temperature: number, maxTokens: number, jsonMode: boolean ): Promise { const response = await hf.chatCompletion({ model: model, messages: messages, temperature: temperature, max_tokens: maxTokens, json_mode: jsonMode }); if (response.choices && response.choices.length > 0) { return response.choices[0].message; } throw new Error('No response from the model'); }