Live-Video-Chat

Running

App Files Files Community

KingNish commited on Sep 28, 2024

Commit

8f2e652

verified ·

1 Parent(s): 384e5e7

Update script1.js

Browse files

Files changed (1) hide show

script1.js +46 -59

script1.js CHANGED Viewed

@@ -1,12 +1,12 @@
 // Constants and Configuration
 const USER_SPEECH_INTERRUPT_DELAY = 500;
-const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
 const CHUNK_SIZE = 300;
-const MAX_PREFETCH_REQUESTS = 5; // Reduced to avoid overloading
 const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
 const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
 const WEBCAM_INTERVAL = 5000;
-const MAX_HISTORY_LENGTH = 6; // Limit history for better performance
 // DOM Elements
 const startStopButton = document.getElementById('startStopButton');
@@ -17,6 +17,7 @@ const responseTimeDisplay = document.getElementById('responseTime');
 const userActivityIndicator = document.getElementById('userIndicator');
 const aiActivityIndicator = document.getElementById('aiIndicator');
 const transcriptDiv = document.getElementById('transcript');
 // Speech Recognition
 let speechRecognizer;
@@ -46,6 +47,22 @@ let conversationHistory = [];
 // Audio Caching
 const audioCache = new Map();
 // Utility Functions
 // Normalize query text
@@ -58,7 +75,7 @@ const generateCacheKey = (normalizedQuery, voice, history, modelName) =>
 // Update activity indicators
 const updateActivityIndicators = (state = null) => {
     userActivityIndicator.textContent = isUserSpeaking ? "User: Speaking" : "User: Idle";
     if (isRequestInProgress && !currentAudio) {
         aiActivityIndicator.textContent = "AI: Processing...";
     } else if (currentAudio && !isUserSpeaking) {
@@ -194,7 +211,6 @@ const cancelPrefetchRequests = (query) => {
 // Send a query to the AI
 async function sendQueryToAI(query) {
-    console.log("Sending query to AI:", query);
     isRequestInProgress = true;
     updateActivityIndicators();
     firstResponseTextTimestamp = null;
@@ -210,7 +226,6 @@ async function sendQueryToAI(query) {
         combinedQuery += `, {USER: "${query}"}`;
         await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
     } catch (error) {
         if (error.name !== 'AbortError') {
             console.error("Error sending query to AI:", error);
@@ -226,7 +241,7 @@ const processSpeechTranscript = (transcript) => {
     const trimmedTranscript = transcript.trimStart();
     if (trimmedTranscript !== '' && !isRequestInProgress) {
         activeQuery = trimmedTranscript;
-        addToConversationHistory('user', activeQuery); // Add history before sending
         sendQueryToAI(activeQuery);
     }
 };
@@ -330,7 +345,6 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
     let fullResponseText2 = "";
     let textChunk = "";
     try {
         while (true) {
             const { done, value } = await reader.read();
@@ -346,34 +360,38 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
             buffer += chunk;
             const lines = buffer.split('\n');
-            for (const line of lines) { // Simplified loop
                 if (line.startsWith('data: ')) {
                     const textContent = line.substring(6).trim();
                     if (textContent) {
                         if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
-                        fullResponseText += textContent + " ";  // Accumulate full response
                         fullResponseText2 += textContent + " ";
                         textChunk += textContent + " ";
                         transcriptDiv.textContent = fullResponseText2;
                         if (textChunk.length >= CHUNK_SIZE) {
-                          const audioUrl = await generateTextToSpeechAudio(textChunk, voice);
-                          if (audioUrl) {
-                              audioPlaybackQueue.push({ url: audioUrl });
-                              if (!currentAudio) playNextAudio();
-                          }
-                          textChunk = ""; // Clear after sending
                         }
                     }
                 }
             }
         }
     } catch (error) {
         console.error("Error in handleStreamingResponse:", error);
     } finally {
-        // ... (Send any remaining textChunk)
-        if (textChunk !== "") {
             const audioUrl = await generateTextToSpeechAudio(textChunk, voice);
             if (audioUrl) {
                 audioPlaybackQueue.push({ url: audioUrl });
@@ -381,12 +399,9 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
             }
         }
-        addToConversationHistory('assistant', fullResponseText2);
-        fullResponseText = "";
-        fullResponseText2 = "";
-        reader.releaseLock();
     }
 };
@@ -484,20 +499,14 @@ if ('webkitSpeechRecognition' in window) {
             speechRecognizer.stop();
             isSpeechRecognitionActive = false;
             startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
-            // Stop webcam capture when speech recognition stops
-            clearInterval(webcamInterval);
-            video.srcObject = null;
-            lastCaption = "";
-            isCaptioningEnabled = false;
         } else {
             speechRecognizer.start();
             isSpeechRecognitionActive = true;
             startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
-            // Start webcam capture when speech recognition starts
-            isCaptioningEnabled = true;
             startWebcam();
         }
     });
@@ -508,28 +517,13 @@ if ('webkitSpeechRecognition' in window) {
 setInterval(updateLatency, 100);
-// Webcam Integration
-import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
-const video = document.getElementById('webcam');
-let app;
-let lastCaption = "";
-const clients = [
-    "multimodalart/Florence-2-l4",
-    "gokaygokay/Florence-2",
-    "multimodalart/Florence-2-l4-2",
-    "gokaygokay/Florence-2",
-];
-let webcamInterval;  // Store the interval ID
 async function startWebcam() {
     try {
         const stream = await navigator.mediaDevices.getUserMedia({ video: true });
         video.srcObject = stream;
-        webcamInterval = setInterval(captureAndProcessImage, WEBCAM_INTERVAL);  // Set interval only once
     } catch (error) {
         console.error("Error accessing webcam: ", error);
     }
@@ -559,11 +553,4 @@ async function processWithGradio(imageBlob) {
     } catch (error) {
         console.error("Error processing with Gradio:", error);
     }
-}
-window.onload = () => {
-    // Start webcam only if speech recognition is active
-    if (isCaptioningEnabled) {
-      startWebcam();
-    }
-};

 // Constants and Configuration
 const USER_SPEECH_INTERRUPT_DELAY = 500;
+const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
 const CHUNK_SIZE = 300;
+const MAX_PREFETCH_REQUESTS = 5;
 const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
 const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
 const WEBCAM_INTERVAL = 5000;
+const MAX_HISTORY_LENGTH = 6;
 // DOM Elements
 const startStopButton = document.getElementById('startStopButton');
 const userActivityIndicator = document.getElementById('userIndicator');
 const aiActivityIndicator = document.getElementById('aiIndicator');
 const transcriptDiv = document.getElementById('transcript');
+const video = document.getElementById('webcam');
 // Speech Recognition
 let speechRecognizer;
 // Audio Caching
 const audioCache = new Map();
+// Image Captioning State
+let isCaptioningEnabled = false;
+let lastCaption = "";
+// Webcam Integration
+import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
+const clients = [
+    "multimodalart/Florence-2-l4",
+    "gokaygokay/Florence-2",
+    "multimodalart/Florence-2-l4-2",
+    "gokaygokay/Florence-2",
+];
+let app;
+let webcamInterval;
 // Utility Functions
 // Normalize query text
 // Update activity indicators
 const updateActivityIndicators = (state = null) => {
     userActivityIndicator.textContent = isUserSpeaking ? "User: Speaking" : "User: Idle";
     if (isRequestInProgress && !currentAudio) {
         aiActivityIndicator.textContent = "AI: Processing...";
     } else if (currentAudio && !isUserSpeaking) {
 // Send a query to the AI
 async function sendQueryToAI(query) {
     isRequestInProgress = true;
     updateActivityIndicators();
     firstResponseTextTimestamp = null;
         combinedQuery += `, {USER: "${query}"}`;
         await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
     } catch (error) {
         if (error.name !== 'AbortError') {
             console.error("Error sending query to AI:", error);
     const trimmedTranscript = transcript.trimStart();
     if (trimmedTranscript !== '' && !isRequestInProgress) {
         activeQuery = trimmedTranscript;
+        addToConversationHistory('user', activeQuery);
         sendQueryToAI(activeQuery);
     }
 };
     let fullResponseText2 = "";
     let textChunk = "";
     try {
         while (true) {
             const { done, value } = await reader.read();
             buffer += chunk;
             const lines = buffer.split('\n');
+            for (const line of lines) {
                 if (line.startsWith('data: ')) {
                     const textContent = line.substring(6).trim();
                     if (textContent) {
                         if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
+                        fullResponseText += textContent + " ";
                         fullResponseText2 += textContent + " ";
                         textChunk += textContent + " ";
                         transcriptDiv.textContent = fullResponseText2;
                         if (textChunk.length >= CHUNK_SIZE) {
+                            const audioUrl = await generateTextToSpeechAudio(textChunk, voice);
+                            if (audioUrl) {
+                                audioPlaybackQueue.push({ url: audioUrl });
+                                if (!currentAudio) playNextAudio();
+                            }
+                            textChunk = "";
                         }
                     }
                 }
             }
+            buffer = lines[lines.length - 1];
         }
     } catch (error) {
         console.error("Error in handleStreamingResponse:", error);
     } finally {
+        reader.releaseLock();
+        if (textChunk !== "") { // Send any remaining text
             const audioUrl = await generateTextToSpeechAudio(textChunk, voice);
             if (audioUrl) {
                 audioPlaybackQueue.push({ url: audioUrl });
             }
         }
+        addToConversationHistory('assistant', fullResponseText2);
+        fullResponseText = "";
+        fullResponseText2 = "";
     }
 };
             speechRecognizer.stop();
             isSpeechRecognitionActive = false;
             startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
+            clearInterval(webcamInterval);
+            video.srcObject = null;
+            lastCaption = "";
         } else {
             speechRecognizer.start();
             isSpeechRecognitionActive = true;
             startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
+            isCaptioningEnabled = true;
             startWebcam();
         }
     });
 setInterval(updateLatency, 100);
+// Webcam Functions
 async function startWebcam() {
     try {
         const stream = await navigator.mediaDevices.getUserMedia({ video: true });
         video.srcObject = stream;
+        webcamInterval = setInterval(captureAndProcessImage, WEBCAM_INTERVAL);
     } catch (error) {
         console.error("Error accessing webcam: ", error);
     }
     } catch (error) {
         console.error("Error processing with Gradio:", error);
     }
+}