Live-Video-Chat

Running

App Files Files Community

KingNish commited on Sep 28

Commit

5c2edba

•

1 Parent(s): 31fc4c1

Update script1.js

Browse files

Files changed (1) hide show

script1.js +41 -26

script1.js CHANGED Viewed

@@ -46,6 +46,20 @@ let conversationHistory = [];
 // Audio Caching
 const audioCache = new Map();
 // Utility Functions
 // Normalize query text
@@ -58,7 +72,7 @@ const generateCacheKey = (normalizedQuery, voice, history, modelName) =>
 // Update activity indicators
 const updateActivityIndicators = (state = null) => {
     userActivityIndicator.textContent = isUserSpeaking ? "User: Speaking" : "User: Idle";
     if (isRequestInProgress && !currentAudio) {
         aiActivityIndicator.textContent = "AI: Processing...";
     } else if (currentAudio && !isUserSpeaking) {
@@ -223,7 +237,7 @@ async function sendQueryToAI(query) {
     requestAbortController = new AbortController();
     try {
-        const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
         await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
     } catch (error) {
         if (error.name !== 'AbortError') {
@@ -473,6 +487,12 @@ if ('webkitSpeechRecognition' in window) {
         lastUserSpeechTimestamp = Date.now();
         updateActivityIndicators();
         startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
     };
     speechRecognizer.onresult = (event) => {
@@ -521,6 +541,9 @@ if ('webkitSpeechRecognition' in window) {
             speechRecognizer.stop();
             isSpeechRecognitionActive = false;
             startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
         } else {
             speechRecognizer.start();
             isSpeechRecognitionActive = true;
@@ -531,29 +554,21 @@ if ('webkitSpeechRecognition' in window) {
     alert('Your browser does not support the Web Speech API.');
 }
-setInterval(updateLatency, 100);
-// Webcam Integration
-import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
-const video = document.getElementById('webcam');
-let app;
-let lastCaption = "";
-const clients = [
-    "multimodalart/Florence-2-l4",
-    "gokaygokay/Florence-2",
-    "multimodalart/Florence-2-l4-2",
-    "gokaygokay/Florence-2",
-];
 async function startWebcam() {
     try {
         const stream = await navigator.mediaDevices.getUserMedia({ video: true });
         video.srcObject = stream;
-        setInterval(captureAndProcessImage, 5000);
     } catch (error) {
         console.error("Error accessing webcam: ", error);
     }
@@ -566,25 +581,25 @@ async function captureAndProcessImage() {
     const context = canvas.getContext('2d');
     context.drawImage(video, 0, 0, canvas.width, canvas.height);
-    const blob = await new Promise(resolve => canvas.toBlob(resolve, 'image/png'));
     await processWithGradio(blob);
 }
 async function processWithGradio(imageBlob) {
     try {
         const randomClient = clients[Math.floor(Math.random() * clients.length)];
         app = await client(randomClient);
-        const handledFile = await handle_file(imageBlob);
         const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
         const dataString = result.data[0];
-        lastCaption = dataString || lastCaption;
     } catch (error) {
         console.error("Error processing with Gradio:", error);
     }
 }
-window.onload = () => {
-    startWebcam();
-};

 // Audio Caching
 const audioCache = new Map();
+// Webcam Integration
+import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
+const video = document.getElementById('webcam');
+let app;
+let lastCaption = "";
+let isWebcamActive = false; // Flag for webcam state
+const clients = [
+    "multimodalart/Florence-2-l4",
+    "gokaygokay/Florence-2",
+    "multimodalart/Florence-2-l4-2",
+    "gokaygokay/Florence-2",
+];
 // Utility Functions
 // Normalize query text
 // Update activity indicators
 const updateActivityIndicators = (state = null) => {
     userActivityIndicator.textContent = isUserSpeaking ? "User: Speaking" : "User: Idle";
     if (isRequestInProgress && !currentAudio) {
         aiActivityIndicator.textContent = "AI: Processing...";
     } else if (currentAudio && !isUserSpeaking) {
     requestAbortController = new AbortController();
     try {
+        const combinedQuery = isWebcamActive ? `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}` : query;
         await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
     } catch (error) {
         if (error.name !== 'AbortError') {
         lastUserSpeechTimestamp = Date.now();
         updateActivityIndicators();
         startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
+        // Start webcam processing if not already active
+        if (!isWebcamActive) {
+            startWebcam();
+            isWebcamActive = true;
+        }
     };
     speechRecognizer.onresult = (event) => {
             speechRecognizer.stop();
             isSpeechRecognitionActive = false;
             startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
+            // Stop webcam processing
+            isWebcamActive = false;
         } else {
             speechRecognizer.start();
             isSpeechRecognitionActive = true;
     alert('Your browser does not support the Web Speech API.');
 }
+// Webcam Functions (optimized)
 async function startWebcam() {
     try {
         const stream = await navigator.mediaDevices.getUserMedia({ video: true });
         video.srcObject = stream;
+        // Capture and process image every 5 seconds while webcam is active
+        const webcamInterval = setInterval(async () => {
+            if (!isWebcamActive) {
+                clearInterval(webcamInterval);
+                return;
+            }
+            await captureAndProcessImage();
+        }, 5000);
     } catch (error) {
         console.error("Error accessing webcam: ", error);
     }
     const context = canvas.getContext('2d');
     context.drawImage(video, 0, 0, canvas.width, canvas.height);
+    const blob = await new Promise(resolve => canvas.toBlob(resolve, 'image/jpeg', 0.8)); // Use JPEG for smaller size
     await processWithGradio(blob);
 }
 async function processWithGradio(imageBlob) {
     try {
         const randomClient = clients[Math.floor(Math.random() * clients.length)];
         app = await client(randomClient);
+        const handledFile = await handle_file(imageBlob);
         const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
         const dataString = result.data[0];
+        lastCaption = dataString || lastCaption;
     } catch (error) {
         console.error("Error processing with Gradio:", error);
     }
 }
+setInterval(updateLatency, 100);