neetnestor's picture
feat: clone with llama 3.2 models
2819fb4
import * as webllm from "@mlc-ai/web-llm";
import rehypeStringify from "rehype-stringify";
import remarkFrontmatter from "remark-frontmatter";
import remarkGfm from "remark-gfm";
import RemarkBreaks from "remark-breaks";
import remarkParse from "remark-parse";
import remarkRehype from "remark-rehype";
import RehypeKatex from "rehype-katex";
import { unified } from "unified";
import remarkMath from "remark-math";
import rehypeHighlight from "rehype-highlight";
/*************** WebLLM logic ***************/
const messageFormatter = unified()
.use(remarkParse)
.use(remarkFrontmatter)
.use(remarkMath)
.use(remarkGfm)
.use(RemarkBreaks)
.use(remarkRehype)
.use(rehypeStringify)
.use(RehypeKatex)
.use(rehypeHighlight, {
detect: true,
ignoreMissing: true,
});
const messages = [
{
content: "You are a helpful AI agent helping users.",
role: "system",
},
];
// Callback function for initializing progress
function updateEngineInitProgressCallback(report) {
console.log("initialize", report.progress);
document.getElementById("download-status").textContent = report.text;
}
// Create engine instance
let modelLoaded = false;
const engine = new webllm.MLCEngine();
engine.setLogLevel("INFO");
engine.setInitProgressCallback(updateEngineInitProgressCallback);
async function initializeWebLLMEngine() {
const model_size = document.getElementById("model_size").value;
const quantization = document.getElementById("quantization").value;
const context_window_size = parseInt(document.getElementById("context").value);
const temperature = parseFloat(document.getElementById("temperature").value);
const top_p = parseFloat(document.getElementById("top_p").value);
const presence_penalty = parseFloat(document.getElementById("presence_penalty").value);
const frequency_penalty = parseFloat(document.getElementById("frequency_penalty").value);
document.getElementById("download-status").classList.remove("hidden");
const selectedModel = `Llama-3.2-${model_size}-Instruct-${quantization}-MLC`;
const config = {
temperature,
top_p,
frequency_penalty,
presence_penalty,
context_window_size,
};
console.log(`Loading Model: ${selectedModel}`);
console.log(`Config: ${JSON.stringify(config)}`);
await engine.reload(selectedModel, config);
modelLoaded = true;
}
async function streamingGenerating(messages, onUpdate, onFinish, onError) {
try {
let curMessage = "";
let usage;
const completion = await engine.chat.completions.create({
stream: true,
messages,
stream_options: { include_usage: true },
});
for await (const chunk of completion) {
const curDelta = chunk.choices[0]?.delta.content;
if (curDelta) {
curMessage += curDelta;
}
if (chunk.usage) {
usage = chunk.usage;
}
onUpdate(curMessage);
}
const finalMessage = await engine.getMessage();
onFinish(finalMessage, usage);
} catch (err) {
onError(err);
}
}
/*************** UI logic ***************/
function onMessageSend() {
if (!modelLoaded) {
return;
}
const input = document.getElementById("user-input").value.trim();
const message = {
content: input,
role: "user",
};
if (input.length === 0) {
return;
}
document.getElementById("send").disabled = true;
messages.push(message);
appendMessage(message);
document.getElementById("user-input").value = "";
document
.getElementById("user-input")
.setAttribute("placeholder", "Generating...");
const aiMessage = {
content: "typing...",
role: "assistant",
};
appendMessage(aiMessage);
const onFinishGenerating = async (finalMessage, usage) => {
updateLastMessage(finalMessage);
document.getElementById("send").disabled = false;
const usageText =
`prompt_tokens: ${usage.prompt_tokens}, ` +
`completion_tokens: ${usage.completion_tokens}, ` +
`prefill: ${usage.extra.prefill_tokens_per_s.toFixed(4)} tokens/sec, ` +
`decoding: ${usage.extra.decode_tokens_per_s.toFixed(4)} tokens/sec`;
document.getElementById("chat-stats").classList.remove("hidden");
document.getElementById("chat-stats").textContent = usageText;
document
.getElementById("user-input")
.setAttribute("placeholder", "Type a message...");
};
streamingGenerating(
messages,
updateLastMessage,
onFinishGenerating,
console.error
);
}
function appendMessage(message) {
const chatBox = document.getElementById("chat-box");
const container = document.createElement("div");
container.classList.add("message-container");
const newMessage = document.createElement("div");
newMessage.classList.add("message");
newMessage.textContent = message.content;
if (message.role === "user") {
container.classList.add("user");
} else {
container.classList.add("assistant");
}
container.appendChild(newMessage);
chatBox.appendChild(container);
chatBox.scrollTop = chatBox.scrollHeight; // Scroll to the latest message
}
async function updateLastMessage(content) {
const formattedMessage = await messageFormatter.process(content);
const messageDoms = document
.getElementById("chat-box")
.querySelectorAll(".message");
const lastMessageDom = messageDoms[messageDoms.length - 1];
lastMessageDom.innerHTML = formattedMessage;
}
/*************** UI binding ***************/
document.addEventListener('DOMContentLoaded', function() {
document.getElementById("download").addEventListener("click", function () {
document.getElementById("send").disabled = true;
initializeWebLLMEngine().then(() => {
document.getElementById("send").disabled = false;
});
});
document.getElementById("send").addEventListener("click", function () {
onMessageSend();
});
document.getElementById("user-input").addEventListener("keydown", (event) => {
if (event.key === "Enter") {
onMessageSend();
}
});
document.getElementById('model_size').addEventListener('change', function() {
const quantizationSelect = document.getElementById('quantization');
const selectedSize = document.getElementById('model_size').value;
const q0Options = Array.from(quantizationSelect.options).filter(option =>
option.value === 'q0f32' || option.value === 'q0f16'
);
if (selectedSize === '3B') {
q0Options.forEach(option => option.style.display = 'none');
} else {
q0Options.forEach(option => option.style.display = '');
}
if (quantizationSelect.selectedOptions[0].style.display === 'none') {
quantizationSelect.value = quantizationSelect.options[0].value;
}
});
});
window.onload = function () {
document.getElementById("download").textContent = "Download";
document.getElementById("download").disabled = false;
}