Implemented streaming chat completion with progress updates.
Browse files
src/lib/components/Playground/Playground.svelte
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
let maxTokens = 32000;
|
24 |
|
25 |
let loading = false;
|
|
|
26 |
|
27 |
function addMessage() {
|
28 |
messages = [
|
@@ -56,22 +57,34 @@
|
|
56 |
}
|
57 |
(document.activeElement as HTMLElement).blur();
|
58 |
loading = true;
|
|
|
|
|
|
|
59 |
try {
|
60 |
const hf = new HfInference(hfToken);
|
61 |
|
62 |
-
const
|
63 |
model: currentModel,
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
68 |
});
|
69 |
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
71 |
} catch (error) {
|
72 |
alert('error: ' + error.message);
|
|
|
|
|
|
|
73 |
}
|
74 |
-
loading = false;
|
75 |
}
|
76 |
|
77 |
$: console.log(messages);
|
|
|
23 |
let maxTokens = 32000;
|
24 |
|
25 |
let loading = false;
|
26 |
+
let streamingMessage: Message | null = null;
|
27 |
|
28 |
function addMessage() {
|
29 |
messages = [
|
|
|
57 |
}
|
58 |
(document.activeElement as HTMLElement).blur();
|
59 |
loading = true;
|
60 |
+
streamingMessage = { role: 'assistant', content: '' };
|
61 |
+
messages = [...messages, streamingMessage];
|
62 |
+
|
63 |
try {
|
64 |
const hf = new HfInference(hfToken);
|
65 |
|
66 |
+
const stream = await hf.textGenerationStream({
|
67 |
model: currentModel,
|
68 |
+
inputs: messages.map(m => m.content).join('\n'),
|
69 |
+
parameters: {
|
70 |
+
max_new_tokens: maxTokens,
|
71 |
+
temperature: temperature,
|
72 |
+
return_full_text: false
|
73 |
+
}
|
74 |
});
|
75 |
|
76 |
+
for await (const response of stream) {
|
77 |
+
if (streamingMessage) {
|
78 |
+
streamingMessage.content += response.token.text;
|
79 |
+
messages = [...messages];
|
80 |
+
}
|
81 |
+
}
|
82 |
} catch (error) {
|
83 |
alert('error: ' + error.message);
|
84 |
+
} finally {
|
85 |
+
loading = false;
|
86 |
+
streamingMessage = null;
|
87 |
}
|
|
|
88 |
}
|
89 |
|
90 |
$: console.log(messages);
|