victor HF staff commited on
Commit
e77d876
·
1 Parent(s): 1546a46

Implemented streaming chat completion with progress updates.

Browse files
src/lib/components/Playground/Playground.svelte CHANGED
@@ -23,6 +23,7 @@
23
  let maxTokens = 32000;
24
 
25
  let loading = false;
 
26
 
27
  function addMessage() {
28
  messages = [
@@ -56,22 +57,34 @@
56
  }
57
  (document.activeElement as HTMLElement).blur();
58
  loading = true;
 
 
 
59
  try {
60
  const hf = new HfInference(hfToken);
61
 
62
- const out = await hf.chatCompletion({
63
  model: currentModel,
64
- messages: systemMessage.content ? [systemMessage, ...messages] : messages,
65
- max_tokens: maxTokens,
66
- temperature: temperature,
67
- seed: 0
 
 
68
  });
69
 
70
- messages = [...messages, ...out.choices.map((o) => o.message)];
 
 
 
 
 
71
  } catch (error) {
72
  alert('error: ' + error.message);
 
 
 
73
  }
74
- loading = false;
75
  }
76
 
77
  $: console.log(messages);
 
23
  let maxTokens = 32000;
24
 
25
  let loading = false;
26
+ let streamingMessage: Message | null = null;
27
 
28
  function addMessage() {
29
  messages = [
 
57
  }
58
  (document.activeElement as HTMLElement).blur();
59
  loading = true;
60
+ streamingMessage = { role: 'assistant', content: '' };
61
+ messages = [...messages, streamingMessage];
62
+
63
  try {
64
  const hf = new HfInference(hfToken);
65
 
66
+ const stream = await hf.textGenerationStream({
67
  model: currentModel,
68
+ inputs: messages.map(m => m.content).join('\n'),
69
+ parameters: {
70
+ max_new_tokens: maxTokens,
71
+ temperature: temperature,
72
+ return_full_text: false
73
+ }
74
  });
75
 
76
+ for await (const response of stream) {
77
+ if (streamingMessage) {
78
+ streamingMessage.content += response.token.text;
79
+ messages = [...messages];
80
+ }
81
+ }
82
  } catch (error) {
83
  alert('error: ' + error.message);
84
+ } finally {
85
+ loading = false;
86
+ streamingMessage = null;
87
  }
 
88
  }
89
 
90
  $: console.log(messages);