Live-Video-Chat

Running

App Files Files Community

KingNish commited on Sep 28, 2024

Commit

8580e13

verified ·

1 Parent(s): 194daa5

Update script1.js

Browse files

Files changed (1) hide show

script1.js +68 -384

script1.js CHANGED Viewed

@@ -1,11 +1,9 @@
-// script1.js
 // Constants and Configuration
 const USER_SPEECH_INTERRUPT_DELAY = 500;
 const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
 const CHUNK_SIZE = 300;
-const MAX_PREFETCH_REQUESTS = 5;
-const PREFETCH_CACHE_EXPIRATION = 30000; // 30 seconds
 const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
 // DOM Elements
@@ -17,6 +15,7 @@ const responseTimeDisplay = document.getElementById('responseTime');
 const userActivityIndicator = document.getElementById('userIndicator');
 const aiActivityIndicator = document.getElementById('aiIndicator');
 const transcriptDiv = document.getElementById('transcript');
 // Speech Recognition
 let speechRecognizer;
@@ -36,7 +35,8 @@ let audioPlaybackQueue = [];
 // Prefetching and Caching
 const prefetchCache = new Map();
-const pendingPrefetchRequests = new Set();
 let prefetchTextQuery = "";
 // Conversation History
@@ -45,9 +45,21 @@ let conversationHistory = [];
 // Audio Caching
 const audioCache = new Map();
 // Utility Functions
-// Normalize query text
 const normalizeQueryText = query => query.trim().toLowerCase().replace(/[^\w\s]/g, '');
 // Generate a cache key
@@ -57,7 +69,6 @@ const generateCacheKey = (normalizedQuery, voice, history, modelName) =>
 // Update activity indicators
 const updateActivityIndicators = (state = null) => {
     userActivityIndicator.textContent = isUserSpeaking ? "User: Speaking" : "User: Idle";
     if (isRequestInProgress && !currentAudio) {
         aiActivityIndicator.textContent = "AI: Processing...";
     } else if (currentAudio && !isUserSpeaking) {
@@ -92,11 +103,6 @@ const addToConversationHistory = (role, content) => {
     if (conversationHistory.length > 6) conversationHistory.splice(0, 2);
 };
-// Check if audio playback should be interrupted
-const shouldInterruptAudioPlayback = (interimTranscript) =>
-    Date.now() - lastUserSpeechTimestamp > USER_SPEECH_INTERRUPT_DELAY || interimTranscript.length > 5;
 // Audio Management Functions
 // Play audio from the queue
@@ -124,319 +130,97 @@ const playNextAudio = async () => {
     }
 };
-// Interrupt audio playback
-const interruptAudioPlayback = (reason = 'unknown') => {
-    console.log(`Interrupting audio (reason: ${reason})...`);
-    if (currentAudio) {
-        currentAudio.pause();
-        currentAudio.currentTime = 0;
-        currentAudio = null;
-    }
-    audioPlaybackQueue.length = 0;
-    isRequestInProgress = false;
-    if (requestAbortController) {
-        requestAbortController.abort();
-        requestAbortController = null;
-    }
-    prefetchCache.clear();
-    updateActivityIndicators();
-};
 // Prefetching and Caching Functions
 // Prefetch and cache the first TTS audio chunk
-const prefetchFirstAudioChunk = async (query, voice) => {
     const normalizedQuery = normalizeQueryText(query);
     const cacheKey = generateCacheKey(normalizedQuery, voice, conversationHistory, modelSelectionDropdown.value);
     if (pendingPrefetchRequests.has(cacheKey) || prefetchCache.has(cacheKey)) return;
-    pendingPrefetchRequests.add(cacheKey);
-    try {
-        const firstAudioUrl = await streamAndPrefetchAudio(query, voice);
-        if (firstAudioUrl) prefetchCache.set(cacheKey, { url: firstAudioUrl, timestamp: Date.now() });
-    } catch (error) {
-        if (error.name !== 'AbortError') console.error("Error prefetching audio:", error);
-    } finally {
-        pendingPrefetchRequests.delete(cacheKey);
-    }
-};
-// Cancel pending prefetch requests
-const cancelPrefetchRequests = (query) => {
-    const normalizedQuery = normalizeQueryText(query);
-    for (const key of pendingPrefetchRequests) {
-        if (key.startsWith(normalizedQuery)) {
-            pendingPrefetchRequests.delete(key);
-            // Implement abort logic if needed for your fetch implementation
-        }
-    }
 };
-// AI Interaction Functions
-// Send a query to the AI
-async function sendQueryToAI(query) {
-    console.log("Sending query to AI:", query);
-    isRequestInProgress = true;
-    updateActivityIndicators();
-    firstResponseTextTimestamp = null;
-    const normalizedQuery = normalizeQueryText(query);
-    const cacheKey = generateCacheKey(normalizedQuery, voiceSelectionDropdown.value, conversationHistory, modelSelectionDropdown.value);
-    queryStartTime = Date.now();
-    // Check prefetch cache
-    if (prefetchCache.has(cacheKey)) {
-        const cachedData = prefetchCache.get(cacheKey);
-        if (Date.now() - cachedData.timestamp < PREFETCH_CACHE_EXPIRATION) {
-            audioPlaybackQueue.push({ url: cachedData.url, isPrefetched: true });
-            playNextAudio();
-        } else {
-            prefetchCache.delete(cacheKey);
-        }
-    }
-    requestAbortController = new AbortController();
     try {
-        const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
-        await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
     } catch (error) {
-        if (error.name !== 'AbortError') {
-            console.error("Error sending query to AI:", error);
-        }
-    } finally {
-        isRequestInProgress = false;
-        updateActivityIndicators();
     }
 };
-// Process the final speech transcript
-const processSpeechTranscript = (transcript) => {
-    const trimmedTranscript = transcript.trimStart();
-    if (trimmedTranscript !== '' && !isRequestInProgress) {
-        activeQuery = trimmedTranscript;
-        sendQueryToAI(activeQuery);
-        addToConversationHistory('user', activeQuery);
     }
 };
-// Network and Streaming Functions
-// Stream AI response and handle audio
-const streamAndHandleAudioResponse = async (query, voice, abortSignal) => {
-    const response = await fetchAIResponse(query, abortSignal);
-    if (!response.ok) {
-        if (response.status === 429) {
-            console.log("Rate limit hit, retrying in 1 second...");
-            await new Promise(resolve => setTimeout(resolve, 1000));
-            await sendQueryToAI(query);
-            return;
-        }
-        throw new Error(`Network response was not ok: ${response.status}`);
-    }
-    console.log("Streaming audio response received");
-    await handleStreamingResponse(response.body, voice, abortSignal);
-};
-// Stream AI response for prefetching
-const streamAndPrefetchAudio = async (query, voice) => {
-    const response = await fetchAIResponse(query, undefined);
-    if (!response.ok) throw new Error('Network response was not ok');
-    return handleStreamingResponseForPrefetch(response.body, voice);
-};
-// Fetch AI response
-const fetchAIResponse = async (query, abortSignal) => {
-    const userSambanovaKey = document.getElementById('apiKey').value.trim() !== '' ? document.getElementById('apiKey').value.trim() : 'none';
-    const url = '/stream_text';
-    const requestBody = {
-        query: query,
-        history: JSON.stringify(conversationHistory),
-        model: modelSelectionDropdown.value,
-        api_key: userSambanovaKey
-    };
-    return fetch(url, {
-        method: 'POST',
-        headers: {
-            'Accept': 'text/event-stream',
-            'Content-Type': 'application/json'
-        },
-        body: JSON.stringify(requestBody),
-        signal: abortSignal
-    });
-};
-// Handle the streaming response for prefetching
-const handleStreamingResponseForPrefetch = async (responseStream, voice) => {
-    const reader = responseStream.getReader();
-    const decoder = new TextDecoder("utf-8");
-    let buffer = "";
-    try {
-        while (true) {
-            const { done, value } = await reader.read();
-            if (done) break;
-            const chunk = decoder.decode(value, { stream: true });
-            buffer += chunk;
-            const lines = buffer.split('\n');
-            for (let i = 0; i < lines.length - 1; i++) {
-                const line = lines[i];
-                if (line.startsWith('data: ')) {
-                    const textContent = line.substring(6).trim();
-                    if (textContent) {
-                        return await generateTextToSpeechAudio(textContent, voice);
-                    }
-                }
-            }
-            buffer = lines[lines.length - 1];
-        }
-    } catch (error) {
-        console.error("Error in handleStreamingResponseForPrefetch:", error);
-    } finally {
-        reader.releaseLock();
-    }
-    return null;
 };
-// Handle the streaming audio response
-const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
-    const reader = responseStream.getReader();
-    const decoder = new TextDecoder("utf-8");
-    let buffer = "";
-    let fullResponseText = "";
-    let fullResponseText2 = "";
-    let textChunk = "";
-    let sentText = "";
     try {
-        while (true) {
-            const { done, value } = await reader.read();
-            if (done) break;
-            if (abortSignal.aborted) throw new DOMException('Request aborted', 'AbortError');
-            if (isUserSpeaking) {
-                interruptAudioPlayback('user is speaking');
-                break;
-            }
-            const chunk = decoder.decode(value, { stream: true });
-            buffer += chunk;
-            const lines = buffer.split('\n');
-            for (let i = 0; i < lines.length - 1; i++) {
-                const line = lines[i];
-                if (line.startsWith('data: ')) {
-                    const textContent = line.substring(6).trim();
-                    if (textContent) {
-                        if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
-                        fullResponseText += textContent + " ";
-                        fullResponseText2 += textContent + " ";
-                        textChunk += textContent + " ";
-                        transcriptDiv.textContent = fullResponseText2;
-                        const audioUrl = await generateTextToSpeechAudio(textContent, voice); // Call TTS immediately
-                        if (audioUrl) {
-                            audioPlaybackQueue.push({ url: audioUrl, isPrefetched: false });
-                            if (!currentAudio) playNextAudio();
-                        }
-                        if (fullResponseText !== '') {
-                            fullResponseText = '';
-                        }
-                    }
-                }
-            }
-            buffer = lines[lines.length - 1];
-        }
     } catch (error) {
-        console.error("Error in handleStreamingResponse:", error);
-    } finally {
-        reader.releaseLock();
-        if (fullResponseText2 !== '') {
-            addToConversationHistory('assistant', fullResponseText2);
-            fullResponseText2 = '';
-        }
     }
 };
-// Generate Text-to-Speech audio with caching
-const generateTextToSpeechAudio = async (text, voice) => {
-    const normalizedText = normalizeQueryText(text);
-    const cacheKey = `${normalizedText}-${voice}`;
-    if (audioCache.has(cacheKey)) {
-        const cachedData = audioCache.get(cacheKey);
-        if (Date.now() - cachedData.timestamp < AUDIO_CACHE_EXPIRATION) {
-            return cachedData.url;
-        } else {
-            audioCache.delete(cacheKey);
-        }
     }
-    try {
-        const response = await fetch(`${TEXT_TO_SPEECH_API_ENDPOINT}?voice=${voice}&text=${encodeURIComponent(text)}`, { method: 'GET' });
-        if (!response.ok) throw new Error('Network response was not ok');
-        const audioBlob = await response.blob();
-        const audioUrl = URL.createObjectURL(audioBlob);
-        audioCache.set(cacheKey, { url: audioUrl, timestamp: Date.now() });
-        return audioUrl;
-    } catch (error) {
-        console.error("Error generating TTS audio:", error);
-        return null;
     }
-};
 // Speech Recognition Initialization
 if ('webkitSpeechRecognition' in window) {
     speechRecognizer = new webkitSpeechRecognition();
-    Object.assign(speechRecognizer, {
-        continuous: true,
-        interimResults: true,
-        language: 'en-US',
-        maxAlternatives: 3
-    });
-    speechRecognizer.onstart = () => {
-        console.log("Speech recognition started");
-        isUserSpeaking = true;
-        lastUserSpeechTimestamp = Date.now();
-        updateActivityIndicators();
-        startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
-    };
     speechRecognizer.onresult = (event) => {
         let interimTranscript = '';
         for (let i = event.resultIndex; i < event.results.length; i++) {
             const transcript = event.results[i][0].transcript;
             if (event.results[i].isFinal) {
-                interruptAudioPlayback('final');
                 processSpeechTranscript(transcript);
                 isUserSpeaking = false;
                 updateActivityIndicators();
@@ -444,114 +228,14 @@ if ('webkitSpeechRecognition' in window) {
             } else {
                 interimTranscript += transcript;
                 isUserSpeaking = true;
-                lastUserSpeechTimestamp = Date.now();
                 updateActivityIndicators();
-                if (interimTranscript.length > prefetchTextQuery.length + 5) {
-                    cancelPrefetchRequests(prefetchTextQuery);
-                }
-                prefetchTextQuery = interimTranscript;
-                prefetchFirstAudioChunk(interimTranscript, voiceSelectionDropdown.value);
-                if (isRequestInProgress && shouldInterruptAudioPlayback(interimTranscript)) {
-                    interruptAudioPlayback('interim');
-                }
             }
         }
     };
-    speechRecognizer.onerror = (event) => {
-        console.error('Speech recognition error:', event.error);
-        if (isSpeechRecognitionActive) speechRecognizer.start();
-    };
-    speechRecognizer.onend = () => {
-        isUserSpeaking = false;
-        updateActivityIndicators();
-        if (isSpeechRecognitionActive) speechRecognizer.start();
-    };
-    startStopButton.addEventListener('click', () => {
-        if (isSpeechRecognitionActive) {
-            speechRecognizer.stop();
-            isSpeechRecognitionActive = false;
-            startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
-            clearInterval(imageCaptureInterval); // Stop webcam processing
-        } else {
-            speechRecognizer.start();
-            isSpeechRecognitionActive = true;
-            startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
-            imageCaptureInterval = setInterval(captureAndProcessImage, 5000); // Start webcam processing
-        }
-    });
-} else {
-    alert('Your browser does not support the Web Speech API.');
 }
 setInterval(updateLatency, 100);
-// Webcam Integration
-import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
-let app;
-let lastCaption = "";
-const clients = [
-    "multimodalart/Florence-2-l4",
-    "gokaygokay/Florence-2",
-    "multimodalart/Florence-2-l4-2",
-    "gokaygokay/Florence-2",
-];
-async function startWebcam() {
-    try {
-        const stream = await navigator.mediaDevices.getUserMedia({ video: true });
-        webcamVideo.srcObject = stream;
-    } catch (error) {
-        console.error("Error accessing webcam: ", error);
-    }
-}
-async function captureAndProcessImage() {
-    const canvas = document.createElement('canvas');
-    canvas.width = webcamVideo.videoWidth;
-    canvas.height = webcamVideo.videoHeight;
-    const context = canvas.getContext('2d');
-    context.drawImage(webcamVideo, 0, 0, canvas.width, canvas.height);
-    const blob = await new Promise(resolve => canvas.toBlob(resolve, 'image/png'));
-    await processWithGradio(blob);
-}
-async function processWithGradio(imageBlob) {
-    try {
-        const randomClient = clients[Math.floor(Math.random() * clients.length)];
-        app = await client(randomClient);
-        const handledFile = await handle_file(imageBlob);
-        const result = await app.predict("/process_image", [handledFile, "Detailed Caption"]);
-        const dataString = result.data[0];
-        lastCaption = dataString || lastCaption;
-    } catch (error) {
-        console.error("Error processing with Gradio:", error);
-    }
-}
-let imageCaptureInterval; // Declare interval outside the event listener
 window.onload = () => {
     startWebcam();
-    startStopButton.addEventListener('click', () => {
-        // ... (start/stop speech recognition and webcam captioning)
-        if (isSpeechRecognitionActive) {
-            clearInterval(imageCaptureInterval); // Stop webcam processing
-        } else {
-            imageCaptureInterval = setInterval(captureAndProcessImage, 5000); // Start webcam processing
-        }
-    });
-};

 // Constants and Configuration
 const USER_SPEECH_INTERRUPT_DELAY = 500;
 const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
 const CHUNK_SIZE = 300;
+const MAX_PREFETCH_REQUESTS = 10;
+const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
 const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
 // DOM Elements
 const userActivityIndicator = document.getElementById('userIndicator');
 const aiActivityIndicator = document.getElementById('aiIndicator');
 const transcriptDiv = document.getElementById('transcript');
+const webcamToggleButton = document.getElementById('webcamToggle');
 // Speech Recognition
 let speechRecognizer;
 // Prefetching and Caching
 const prefetchCache = new Map();
+const pendingPrefetchRequests = new Map();
+const prefetchQueue = [];
 let prefetchTextQuery = "";
 // Conversation History
 // Audio Caching
 const audioCache = new Map();
+// Webcam
+let isWebcamActive = false;
+let app;
+let lastCaption = "";
+const clients = [
+    "multimodalart/Florence-2-l4",
+    "gokaygokay/Florence-2",
+    "multimodalart/Florence-2-l4-2",
+    "gokaygokay/Florence-2",
+];
 // Utility Functions
+// Normalize query text
 const normalizeQueryText = query => query.trim().toLowerCase().replace(/[^\w\s]/g, '');
 // Generate a cache key
 // Update activity indicators
 const updateActivityIndicators = (state = null) => {
     userActivityIndicator.textContent = isUserSpeaking ? "User: Speaking" : "User: Idle";
     if (isRequestInProgress && !currentAudio) {
         aiActivityIndicator.textContent = "AI: Processing...";
     } else if (currentAudio && !isUserSpeaking) {
     if (conversationHistory.length > 6) conversationHistory.splice(0, 2);
 };
 // Audio Management Functions
 // Play audio from the queue
     }
 };
 // Prefetching and Caching Functions
 // Prefetch and cache the first TTS audio chunk
+const prefetchFirstAudioChunk = (query, voice) => {
     const normalizedQuery = normalizeQueryText(query);
     const cacheKey = generateCacheKey(normalizedQuery, voice, conversationHistory, modelSelectionDropdown.value);
     if (pendingPrefetchRequests.has(cacheKey) || prefetchCache.has(cacheKey)) return;
+    prefetchQueue.push({ query: query.trim(), voice, cacheKey });
+    processPrefetchQueue();
 };
+// Webcam Integration Functions
+const startWebcam = async () => {
     try {
+        const stream = await navigator.mediaDevices.getUserMedia({ video: true });
+        document.getElementById('webcam').srcObject = stream;
+        setInterval(captureAndProcessImage, 5000);
     } catch (error) {
+        console.error("Error accessing webcam: ", error);
     }
 };
+const stopWebcam = () => {
+    const stream = document.getElementById('webcam').srcObject;
+    if (stream) {
+        const tracks = stream.getTracks();
+        tracks.forEach(track => track.stop());
     }
 };
+const captureAndProcessImage = async () => {
+    if (!isWebcamActive) return;
+    const canvas = document.createElement('canvas');
+    const video = document.getElementById('webcam');
+    canvas.width = video.videoWidth;
+    canvas.height = video.videoHeight;
+    const context = canvas.getContext('2d');
+    context.drawImage(video, 0, 0, canvas.width, canvas.height);
+    const blob = await new Promise(resolve => canvas.toBlob(resolve, 'image/png'));
+    await processWithGradio(blob);
 };
+const processWithGradio = async (imageBlob) => {
     try {
+        const randomClient = clients[Math.floor(Math.random() * clients.length)];
+        app = await client(randomClient);
+        const handledFile = await handle_file(imageBlob);
+        const result = await app.predict("/process_image", [handledFile, "Detailed Caption"]);
+        const dataString = result.data[0];
+        lastCaption = dataString || lastCaption;
     } catch (error) {
+        console.error("Error processing with Gradio:", error);
     }
 };
+// Event Listeners
+startStopButton.addEventListener('click', () => {
+    isSpeechRecognitionActive = !isSpeechRecognitionActive;
+    if (isSpeechRecognitionActive) {
+        speechRecognizer.start();
+    } else {
+        speechRecognizer.stop();
     }
+});
+webcamToggleButton.addEventListener('click', () => {
+    isWebcamActive = !isWebcamActive;
+    if (isWebcamActive) {
+        startWebcam();
+    } else {
+        stopWebcam();
     }
+});
 // Speech Recognition Initialization
 if ('webkitSpeechRecognition' in window) {
     speechRecognizer = new webkitSpeechRecognition();
+    speechRecognizer.continuous = true;
+    speechRecognizer.interimResults = true;
     speechRecognizer.onresult = (event) => {
         let interimTranscript = '';
         for (let i = event.resultIndex; i < event.results.length; i++) {
             const transcript = event.results[i][0].transcript;
             if (event.results[i].isFinal) {
                 processSpeechTranscript(transcript);
                 isUserSpeaking = false;
                 updateActivityIndicators();
             } else {
                 interimTranscript += transcript;
                 isUserSpeaking = true;
                 updateActivityIndicators();
             }
         }
     };
 }
 setInterval(updateLatency, 100);
 window.onload = () => {
     startWebcam();
+};