|
const paramDefaults = { |
|
stream: true, |
|
temperature: 0.2, |
|
}; |
|
|
|
let generation_settings = null; |
|
|
|
export class CompletionError extends Error { |
|
constructor(message, name, data) { |
|
super(message); |
|
this.name = name; |
|
} |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export async function* llama(prompt, params = {}, config = {}) { |
|
let controller = config.controller; |
|
const api_url = config.api_url?.replace(/\/+$/, '') || ""; |
|
|
|
if (!controller) { |
|
controller = new AbortController(); |
|
} |
|
|
|
const completionParams = { ...paramDefaults, ...params, prompt }; |
|
|
|
const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, { |
|
method: 'POST', |
|
body: JSON.stringify(completionParams), |
|
headers: { |
|
'Connection': 'keep-alive', |
|
'Content-Type': 'application/json', |
|
'Accept': 'text/event-stream', |
|
...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {}) |
|
}, |
|
signal: controller.signal, |
|
}); |
|
|
|
const status = response.status; |
|
if (status !== 200) { |
|
try { |
|
const body = await response.json(); |
|
if (body && body.error && body.error.message) { |
|
throw new CompletionError(body.error.message, 'ServerError'); |
|
} |
|
} catch (err) { |
|
throw new CompletionError(err.message, 'ServerError'); |
|
} |
|
} |
|
|
|
const reader = response.body.getReader(); |
|
const decoder = new TextDecoder(); |
|
|
|
let content = ""; |
|
let leftover = ""; |
|
|
|
try { |
|
let cont = true; |
|
|
|
while (cont) { |
|
const result = await reader.read(); |
|
if (result.done) { |
|
break; |
|
} |
|
|
|
|
|
const text = leftover + decoder.decode(result.value); |
|
|
|
|
|
const endsWithLineBreak = text.endsWith('\n'); |
|
|
|
|
|
let lines = text.split('\n'); |
|
|
|
|
|
|
|
if (!endsWithLineBreak) { |
|
leftover = lines.pop(); |
|
} else { |
|
leftover = ""; |
|
} |
|
|
|
|
|
const regex = /^(\S+):\s(.*)$/gm; |
|
for (const line of lines) { |
|
const match = regex.exec(line); |
|
if (match) { |
|
result[match[1]] = match[2]; |
|
if (result.data === '[DONE]') { |
|
cont = false; |
|
break; |
|
} |
|
|
|
|
|
if (result.data) { |
|
result.data = JSON.parse(result.data); |
|
content += result.data.content; |
|
|
|
|
|
yield result; |
|
|
|
|
|
if (result.data.stop) { |
|
if (result.data.generation_settings) { |
|
generation_settings = result.data.generation_settings; |
|
} |
|
cont = false; |
|
break; |
|
} |
|
} |
|
if (result.error) { |
|
try { |
|
result.error = JSON.parse(result.error); |
|
if (result.error.message.includes('slot unavailable')) { |
|
|
|
throw new Error('slot unavailable'); |
|
} else { |
|
console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`); |
|
} |
|
} catch(e) { |
|
console.error(`llama.cpp error ${result.error}`) |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} catch (e) { |
|
if (e.name !== 'AbortError') { |
|
console.error("llama error: ", e); |
|
} |
|
throw e; |
|
} |
|
finally { |
|
controller.abort(); |
|
} |
|
|
|
return content; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export const llamaEventTarget = (prompt, params = {}, config = {}) => { |
|
const eventTarget = new EventTarget(); |
|
(async () => { |
|
let content = ""; |
|
for await (const chunk of llama(prompt, params, config)) { |
|
if (chunk.data) { |
|
content += chunk.data.content; |
|
eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data })); |
|
} |
|
if (chunk.data.generation_settings) { |
|
eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings })); |
|
} |
|
if (chunk.data.timings) { |
|
eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings })); |
|
} |
|
} |
|
eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } })); |
|
})(); |
|
return eventTarget; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export const llamaPromise = (prompt, params = {}, config = {}) => { |
|
return new Promise(async (resolve, reject) => { |
|
let content = ""; |
|
try { |
|
for await (const chunk of llama(prompt, params, config)) { |
|
content += chunk.data.content; |
|
} |
|
resolve(content); |
|
} catch (error) { |
|
reject(error); |
|
} |
|
}); |
|
}; |
|
|
|
|
|
|
|
|
|
export const llamaComplete = async (params, controller, callback) => { |
|
for await (const chunk of llama(params.prompt, params, { controller })) { |
|
callback(chunk); |
|
} |
|
} |
|
|
|
|
|
export const llamaModelInfo = async (config = {}) => { |
|
if (!generation_settings) { |
|
const api_url = config.api_url?.replace(/\/+$/, '') || ""; |
|
const props = await fetch(`${api_url}/props`).then(r => r.json()); |
|
generation_settings = props.default_generation_settings; |
|
} |
|
return generation_settings; |
|
} |
|
|