Spaces:

lmz
/

candle-llama2

Running

App Files Files Community

candle-llama2 / index.html

radames

force cache with cache API and fix max seq

c2a511e about 1 year ago

raw

history blame

10.9 kB

	<html>
	<head>
	<meta content="text/html;charset=utf-8" http-equiv="Content-Type" />
	<title>Candle Llama.c Rust/WASM</title>
	</head>
	<body></body>
	</html>

	<!doctype html>
	<html>
	<head>
	<meta charset="UTF-8" />
	<meta name="viewport" content="width=device-width, initial-scale=1.0" />
	<style>
	@import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
	html,
	body {
	font-family: "Source Sans 3", sans-serif;
	}
	code,
	output,
	select,
	pre {
	font-family: "Source Code Pro", monospace;
	}
	</style>
	<script src="https://cdn.tailwindcss.com"></script>
	<script type="module">
	// base url for audio examples
	const MODELS_BASE_URL =
	"https://huggingface.co/karpathy/tinyllamas/resolve/main";

	// models base url
	const MODELS = {
	stories15M: {
	url: "stories15M.bin",
	seq_len: 256,
	},
	stories42M: {
	url: "stories42M.bin",
	seq_len: 1024,
	},
	stories110M: {
	url: "stories110M.bin",
	seq_len: 1024,
	},
	};

	const llamaWorker = new Worker("./llama2cWorker.js", {
	type: "module",
	});
	async function generateSequence(controller) {
	const getValue = (id) => document.querySelector(`#${id}`).value;
	const modelID = getValue("model");
	const model = MODELS[modelID];
	const weightsURL = `${MODELS_BASE_URL}/${model.url}`;
	const prompt = getValue("prompt");
	const temperature = getValue("temperature");
	const repeatPenalty = getValue("repeat_penalty");
	const seed = getValue("seed");
	const maxSeqLen = getValue("max-seq");

	function updateStatus(data) {
	const outStatus = document.querySelector("#output-status");
	const outGen = document.querySelector("#output-generation");
	const outCounter = document.querySelector("#output-counter");

	switch (data.status) {
	case "loading":
	outStatus.hidden = false;
	outStatus.textContent = data.message;
	outGen.hidden = true;
	outCounter.hidden = true;
	break;
	case "generating":
	const { message, prompt, sentence, tokensSec, totalTime } = data;
	outStatus.hidden = true;
	outCounter.hidden = false;
	outGen.hidden = false;
	outGen.innerHTML = `<span class="font-semibold">${prompt}</span>${sentence.replace(
	/\<s\>\|\<\/s\>/g,
	""
	)}`;
	outCounter.innerHTML = `${(totalTime / 1000).toFixed(
	2
	)}s (${tokensSec.toFixed(2)} tok/s)`;
	break;
	case "complete":
	outStatus.hidden = true;
	outGen.hidden = false;
	break;
	}
	}

	return new Promise((resolve, reject) => {
	llamaWorker.postMessage({
	weightsURL,
	modelID,
	tokenizerURL: "tokenizer.json",
	prompt,
	temp: temperature,
	repeatPenalty,
	seed: BigInt(seed),
	maxSeqLen,
	command: "start",
	});

	const handleAbort = () => {
	llamaWorker.postMessage({ command: "abort" });
	};
	const handleMessage = (event) => {
	const { status, error, message, prompt, sentence } = event.data;
	if (status) updateStatus(event.data);
	if (error) reject(new Error(error));
	if (status === "complete") resolve(event.data);
	};

	controller.signal.addEventListener("abort", handleAbort);
	llamaWorker.addEventListener("message", handleMessage);
	});
	}

	const form = document.querySelector("#form");
	const prompt = document.querySelector("#prompt");
	const clearBtn = document.querySelector("#clear-btn");
	const runBtn = document.querySelector("#run");
	const modelSelect = document.querySelector("#model");
	let runController = new AbortController();
	let isRunning = false;

	modelSelect.addEventListener("change", (e) => {
	const model = MODELS[e.target.value];
	document.querySelector("#max-seq").max = model.seq_len;
	document.querySelector("#max-seq").nextElementSibling.value =
	model.seq_len;
	});

	form.addEventListener("submit", async (e) => {
	e.preventDefault();
	if (isRunning) {
	stopRunning();
	} else {
	startRunning();
	await generateSequence(runController);
	stopRunning();
	}
	});

	function startRunning() {
	isRunning = true;
	runBtn.textContent = "Stop";
	}

	function stopRunning() {
	runController.abort();
	runController = new AbortController();
	runBtn.textContent = "Run";
	isRunning = false;
	}
	clearBtn.addEventListener("click", (e) => {
	e.preventDefault();
	prompt.value = "";
	clearBtn.classList.add("invisible");
	runBtn.disabled = true;
	stopRunning();
	});
	prompt.addEventListener("input", (e) => {
	runBtn.disabled = false;
	if (e.target.value.length > 0) {
	clearBtn.classList.remove("invisible");
	} else {
	clearBtn.classList.add("invisible");
	}
	});
	</script>
	</head>
	<body class="container max-w-4xl mx-auto p-4 text-gray-800">
	<main class="grid grid-cols-1 gap-8 relative">
	<span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
	<div>
	<h1 class="text-5xl font-bold">Candle Llama2.c</h1>
	<h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
	<p class="max-w-lg">
	<a
	href="https://github.com/karpathy/llama2.c"
	target="_blank"
	class="underline hover:text-blue-500 hover:no-underline"
	target="_blank"
	>Llama2.c</a
	>
	is Andrey Karpathy's C implementation of the Llama 2 LLM model in C.
	This demo uses
	<a
	href="https://github.com/huggingface/candle/"
	target="_blank"
	class="underline hover:text-blue-500 hover:no-underline"
	>Candle
	</a>
	to run Llama2.c in the browser using rust/wasm.
	</p>
	</div>

	<div>
	<label for="model" class="font-medium">Models Options: </label>
	<select
	id="model"
	class="border-2 border-gray-500 rounded-md font-light"
	>
	<option value="stories15M" selected>stories 15M (60.8 MB)</option>
	<option value="stories42M">stories 42M (167 MB)</option>
	<option value="stories110M">stories 110M (438 MB)</option>
	</select>
	</div>
	<form
	id="form"
	class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center"
	>
	<input type="submit" hidden />
	<input
	type="text"
	id="prompt"
	class="font-light w-full px-3 py-2 mx-1 resize-none outline-none"
	placeholder="Add your prompt here..."
	value="Once upon a time"
	/>
	<button id="clear-btn">
	<svg
	fill="none"
	xmlns="http://www.w3.org/2000/svg"
	width="40"
	viewBox="0 0 70 40"
	>
	<path opacity=".5" d="M39 .2v40.2" stroke="#1F2937" />
	<path
	d="M1.5 11.5 19 29.1m0-17.6L1.5 29.1"
	opacity=".5"
	stroke="#1F2937"
	stroke-width="2"
	/>
	</svg>
	</button>
	<button
	id="run"
	class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed"
	>
	Run
	</button>
	</form>
	<div class="grid grid-cols-3 max-w-md items-center gap-3">
	<label class="text-sm font-medium" for="max-seq">Maximum length </label>
	<input
	type="range"
	id="max-seq"
	name="temperature"
	min="1"
	max="256"
	step="1"
	value="200"
	oninput="this.nextElementSibling.value = Number(this.value)"
	/>
	<output
	class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
	>
	200</output
	>
	<label class="text-sm font-medium" for="temperature">Temperature</label>
	<input
	type="range"
	id="temperature"
	name="temperature"
	min="0"
	max="2"
	step="0.01"
	value="0.50"
	oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)"
	/>
	<output
	class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
	>
	0.50</output
	>

	<label class="text-sm font-medium" for="repeat_penalty"
	>Repeat Penalty</label
	>

	<input
	type="range"
	id="repeat_penalty"
	name="repeat_penalty"
	min="-2"
	max="2"
	step="0.01"
	value="1.10"
	oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)"
	/>
	<output
	class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
	>1.10</output
	>
	<label class="text-sm font-medium" for="seed">Seed</label>
	<input
	type="number"
	id="seed"
	name="seed"
	value="299792458"
	class="font-light border border-gray-700 text-right rounded-md p-2"
	/>
	<button
	id="run"
	onclick="document.querySelector('#seed').value = BigInt(Math.floor(Math.random() * 2**64-1))"
	class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-1 w-[50px] rounded disabled:bg-gray-300 disabled:cursor-not-allowed text-sm"
	>
	Rand
	</button>
	</div>
	<div>
	<h3 class="font-medium">Generation:</h3>
	<div
	class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2"
	>
	<div
	id="output-counter"
	hidden
	class="ml-auto font-semibold grid-rows-1 text-sm"
	></div>
	<p hidden id="output-generation" class="grid-rows-2"></p>
	<span id="output-status" class="m-auto font-light"
	>No output yet</span
	>
	</div>
	</div>
	</main>
	</body>
	</html>