|
import { existsSync, mkdirSync, writeFileSync } from 'fs'; |
|
import { resolve } from 'path'; |
|
import { d as private_env } from './shared-server-49TKSBDM.js'; |
|
|
|
class LlamaCppService { |
|
url = ""; |
|
constructor(url) { |
|
this.url = url; |
|
} |
|
async health() { |
|
const r = await fetch(`${this.url}/health`, { |
|
method: "GET", |
|
headers: { |
|
"Content-Type": "application/json" |
|
} |
|
}); |
|
const data = await r.json(); |
|
if (data.status === "ok" || data.status === "error" || data.status === "loading model") { |
|
return data.status; |
|
} |
|
return "unavailable"; |
|
} |
|
async tokenize(prompt, abortController) { |
|
const response = await fetch(`${this.url}/tokenize`, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json" |
|
}, |
|
body: JSON.stringify({ |
|
"content": prompt |
|
}), |
|
signal: abortController.signal |
|
}); |
|
if (response.ok) { |
|
const data = await response.json(); |
|
if (data.tokens) { |
|
return data.tokens; |
|
} |
|
} else if (response.status === 404) { |
|
console.log("Tokenization endpoint not found (404)."); |
|
} else { |
|
console.log(`Failed to tokenize: ${await response.text()}`); |
|
} |
|
return null; |
|
} |
|
async detokenize(tokens, abortController) { |
|
const response = await fetch(`${this.url}/detokenize`, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json" |
|
}, |
|
body: JSON.stringify({ |
|
"tokens": tokens |
|
}), |
|
signal: abortController.signal |
|
}); |
|
if (response.ok) { |
|
const data = await response.json(); |
|
if (data.content) { |
|
return data.content.trim(); |
|
} |
|
} else if (response.status === 404) { |
|
console.log("Detokenization endpoint not found (404)."); |
|
} else { |
|
console.log(`Failed to detokenize: ${await response.text()}`); |
|
} |
|
return null; |
|
} |
|
predict(prompt, { abortController }) { |
|
return async ({ prompt: prompt2 }) => { |
|
console.log(prompt2.length); |
|
let tokens = await this.tokenize(prompt2, abortController); |
|
if (tokens !== null) { |
|
console.log("tokens: " + tokens.length); |
|
tokens = tokens.slice(0, 30700); |
|
const detokenizedPrompt = await this.detokenize(tokens, abortController); |
|
if (detokenizedPrompt !== null) { |
|
prompt2 = detokenizedPrompt; |
|
} else { |
|
prompt2 = prompt2.substring(0, 32768); |
|
} |
|
} else { |
|
prompt2 = prompt2.substring(0, 32768); |
|
} |
|
console.log(prompt2.length); |
|
const r = await fetch(`${this.url}/completion`, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json", |
|
"Accept": "text/event-stream" |
|
}, |
|
body: JSON.stringify({ |
|
"stream": true, |
|
"n_predict": 2e3, |
|
"temperature": 0.2, |
|
"stop": ["</s>", "bot:", "user:"], |
|
|
|
"repeat_penalty": 1, |
|
"top_k": 40, |
|
"top_p": 0.95, |
|
"min_p": 0.05, |
|
"seed": 42, |
|
"n_keep": 0, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"cache_prompt": false, |
|
|
|
|
|
"prompt": "[INST]" + prompt2 + "[/INST]" |
|
}), |
|
signal: abortController.signal |
|
}); |
|
if (!r.ok) { |
|
throw new Error(`Failed to generate text: ${await r.text()}`); |
|
} |
|
const encoder = new TextDecoderStream(); |
|
const reader = r.body?.pipeThrough(encoder).getReader(); |
|
let t = this; |
|
return async function* () { |
|
let stop = false; |
|
let generatedText = ""; |
|
let tokenId = 0; |
|
while (!stop) { |
|
const out = await reader?.read() ?? { done: false, value: void 0 }; |
|
if (out.done) { |
|
reader?.cancel(); |
|
t.createLogFile(JSON.stringify({ |
|
"request": prompt2, |
|
"response": generatedText |
|
}), "llm-service"); |
|
return; |
|
} |
|
if (!out.value) { |
|
reader?.cancel(); |
|
return; |
|
} |
|
let tokenValue = ""; |
|
if (out.value.startsWith("data: ")) { |
|
try { |
|
let data2 = JSON.parse(out.value.slice(6)); |
|
tokenValue = data2.content; |
|
} catch (e) { |
|
} |
|
} else if (private_env.LLM_API_VERSION == "v1") { |
|
tokenValue = out.value; |
|
} |
|
generatedText += tokenValue.replace("</s>", "") ?? ""; |
|
yield { |
|
token: { |
|
id: tokenId++, |
|
text: tokenValue.replace("</s>", "") ?? "", |
|
logprob: 0, |
|
special: false |
|
}, |
|
generated_text: null, |
|
details: null |
|
}; |
|
} |
|
}(); |
|
}; |
|
} |
|
conversation(history, { abortController }) { |
|
return async ({ history: history2 }) => { |
|
let prompt = this.formatPrompt(history2); |
|
console.log(prompt.length); |
|
let tokens = await this.tokenize(prompt, abortController); |
|
if (tokens !== null) { |
|
tokens = tokens.slice(-30700); |
|
const detokenizedPrompt = await this.detokenize(tokens, abortController); |
|
if (detokenizedPrompt !== null) { |
|
prompt = detokenizedPrompt; |
|
} else { |
|
prompt = prompt.substring(Math.max(0, prompt.length - 30700)); |
|
} |
|
} else { |
|
prompt = prompt.substring(Math.max(0, prompt.length - 30700)); |
|
} |
|
const r = await fetch(`${this.url}/completion`, { |
|
method: "POST", |
|
headers: { |
|
"Content-Type": "application/json", |
|
"Accept": "text/event-stream" |
|
}, |
|
body: JSON.stringify({ |
|
"stream": true, |
|
"n_predict": 2e3, |
|
"temperature": 0.2, |
|
"stop": ["</s>", "bot:", "user:"], |
|
"repeat_penalty": 1, |
|
"top_k": 40, |
|
"top_p": 0.95, |
|
"min_p": 0.05, |
|
"seed": 42, |
|
"n_keep": 0, |
|
"cache_prompt": false, |
|
"prompt": prompt |
|
}), |
|
signal: abortController.signal |
|
}); |
|
if (!r.ok) { |
|
throw new Error(`Failed to generate text: ${await r.text()}`); |
|
} |
|
const encoder = new TextDecoderStream(); |
|
const reader = r.body?.pipeThrough(encoder).getReader(); |
|
return async function* () { |
|
let stop = false; |
|
let generatedText = ""; |
|
let tokenId = 0; |
|
while (!stop) { |
|
const out = await reader?.read() ?? { done: false, value: void 0 }; |
|
if (out.done) { |
|
reader?.cancel(); |
|
return; |
|
} |
|
if (!out.value) { |
|
reader?.cancel(); |
|
return; |
|
} |
|
let tokenValue = ""; |
|
if (out.value.startsWith("data: ")) { |
|
try { |
|
let data2 = JSON.parse(out.value.slice(6)); |
|
tokenValue = data2.content; |
|
} catch (e) { |
|
} |
|
} else if (private_env.LLM_API_VERSION == "v1") { |
|
tokenValue = out.value; |
|
} |
|
generatedText += tokenValue.replace("</s>", "") ?? ""; |
|
yield { |
|
token: { |
|
id: tokenId++, |
|
text: tokenValue.replace("</s>", "") ?? "", |
|
logprob: 0, |
|
special: false |
|
}, |
|
generated_text: null, |
|
details: null |
|
}; |
|
} |
|
}(); |
|
}; |
|
} |
|
formatPrompt(history) { |
|
let prompt = "<s>"; |
|
for (const [userPrompt, botResponse] of history) { |
|
prompt += `[INST] ${userPrompt} [/INST]`; |
|
if (botResponse) { |
|
prompt += ` ${botResponse}</s> `; |
|
} |
|
} |
|
return prompt; |
|
} |
|
createLogFile(text, namePrefix = "") { |
|
try { |
|
const logsDirectory = resolve(private_env.LOGS_ROOT_FOLDER + "/llama"); |
|
if (!existsSync(logsDirectory)) { |
|
mkdirSync(logsDirectory, { |
|
recursive: true |
|
}); |
|
} |
|
const timestamp = ( new Date()).toISOString().replace(/[:.]/g, ""); |
|
const logFilePath = resolve(logsDirectory, `${namePrefix}${timestamp}.json`); |
|
writeFileSync(logFilePath, text); |
|
console.log(`Log file created: ${logFilePath}`); |
|
} catch (e) { |
|
console.log(`Failed to create log file in llama service`); |
|
console.log(e); |
|
} |
|
} |
|
} |
|
|
|
export { LlamaCppService as L }; |
|
|
|
|