Spaces:
Running
Running
Update script1.js
Browse files- script1.js +25 -68
script1.js
CHANGED
@@ -1,6 +1,8 @@
|
|
|
|
|
|
1 |
// Constants and Configuration
|
2 |
const USER_SPEECH_INTERRUPT_DELAY = 500;
|
3 |
-
const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
|
4 |
const CHUNK_SIZE = 300;
|
5 |
const MAX_PREFETCH_REQUESTS = 10;
|
6 |
const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
|
@@ -10,7 +12,7 @@ const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
|
|
10 |
const startStopButton = document.getElementById('startStopButton');
|
11 |
const voiceSelectionDropdown = document.getElementById('voiceSelect');
|
12 |
const modelSelectionDropdown = document.getElementById('modelSelect');
|
13 |
-
const noiseSuppressionCheckbox = document.getElementById('noiseSuppression');
|
14 |
const responseTimeDisplay = document.getElementById('responseTime');
|
15 |
const userActivityIndicator = document.getElementById('userIndicator');
|
16 |
const aiActivityIndicator = document.getElementById('aiIndicator');
|
@@ -44,19 +46,6 @@ let conversationHistory = [];
|
|
44 |
// Audio Caching
|
45 |
const audioCache = new Map();
|
46 |
|
47 |
-
// Webcam and Gradio Integration
|
48 |
-
import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
|
49 |
-
const video = document.getElementById('webcam');
|
50 |
-
const clients = [
|
51 |
-
"multimodalart/Florence-2-l4",
|
52 |
-
"gokaygokay/Florence-2",
|
53 |
-
"multimodalart/Florence-2-l4-2",
|
54 |
-
"gokaygokay/Florence-2",
|
55 |
-
]; // Or your preferred Gradio models
|
56 |
-
let app;
|
57 |
-
let lastCaption = "";
|
58 |
-
|
59 |
-
|
60 |
// Utility Functions
|
61 |
|
62 |
// Normalize query text
|
@@ -209,7 +198,7 @@ const cancelPrefetchRequests = (query) => {
|
|
209 |
// AI Interaction Functions
|
210 |
|
211 |
// Send a query to the AI
|
212 |
-
|
213 |
console.log("Sending query to AI:", query);
|
214 |
isRequestInProgress = true;
|
215 |
updateActivityIndicators();
|
@@ -234,7 +223,8 @@ const sendQueryToAI = async (query) => {
|
|
234 |
requestAbortController = new AbortController();
|
235 |
|
236 |
try {
|
237 |
-
|
|
|
238 |
} catch (error) {
|
239 |
if (error.name !== 'AbortError') {
|
240 |
console.error("Error sending query to AI:", error);
|
@@ -543,20 +533,32 @@ if ('webkitSpeechRecognition' in window) {
|
|
543 |
|
544 |
setInterval(updateLatency, 100);
|
545 |
|
546 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
547 |
|
548 |
async function startWebcam() {
|
549 |
try {
|
550 |
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
551 |
video.srcObject = stream;
|
552 |
-
setInterval(captureAndProcessImage, 5000);
|
553 |
} catch (error) {
|
554 |
console.error("Error accessing webcam: ", error);
|
555 |
-
// Consider adding user feedback here, e.g., alert or display a message.
|
556 |
}
|
557 |
}
|
558 |
|
559 |
-
|
560 |
async function captureAndProcessImage() {
|
561 |
const canvas = document.createElement('canvas');
|
562 |
canvas.width = video.videoWidth;
|
@@ -568,7 +570,6 @@ async function captureAndProcessImage() {
|
|
568 |
await processWithGradio(blob);
|
569 |
}
|
570 |
|
571 |
-
|
572 |
async function processWithGradio(imageBlob) {
|
573 |
try {
|
574 |
const randomClient = clients[Math.floor(Math.random() * clients.length)];
|
@@ -577,57 +578,13 @@ async function processWithGradio(imageBlob) {
|
|
577 |
|
578 |
const result = await app.predict("/process_image", [handledFile, "Detailed Caption"]);
|
579 |
|
580 |
-
const dataString = result.data[0];
|
581 |
-
lastCaption = dataString ||
|
582 |
} catch (error) {
|
583 |
console.error("Error processing with Gradio:", error);
|
584 |
-
// Add error handling here (e.g., display a message to the user).
|
585 |
-
lastCaption = ""; // Reset caption if there's an error.
|
586 |
}
|
587 |
}
|
588 |
|
589 |
-
|
590 |
-
|
591 |
-
// Modify sendQueryToAI to include the caption
|
592 |
-
async function sendQueryToAI(query) {
|
593 |
-
console.log("Sending query to AI:", query);
|
594 |
-
isRequestInProgress = true;
|
595 |
-
updateActivityIndicators();
|
596 |
-
firstResponseTextTimestamp = null;
|
597 |
-
|
598 |
-
const normalizedQuery = normalizeQueryText(query);
|
599 |
-
const cacheKey = generateCacheKey(normalizedQuery, modelSelectionDropdown.value, conversationHistory, modelSelectionDropdown.value);
|
600 |
-
|
601 |
-
queryStartTime = Date.now();
|
602 |
-
|
603 |
-
// Check prefetch cache
|
604 |
-
if (prefetchCache.has(cacheKey)) {
|
605 |
-
const cachedData = prefetchCache.get(cacheKey);
|
606 |
-
if (Date.now() - cachedData.timestamp < PREFETCH_CACHE_EXPIRATION) {
|
607 |
-
audioPlaybackQueue.push({ url: cachedData.url, isPrefetched: true });
|
608 |
-
playNextAudio();
|
609 |
-
} else {
|
610 |
-
prefetchCache.delete(cacheKey);
|
611 |
-
}
|
612 |
-
}
|
613 |
-
|
614 |
-
requestAbortController = new AbortController();
|
615 |
-
|
616 |
-
try {
|
617 |
-
const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
|
618 |
-
await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
|
619 |
-
} catch (error) {
|
620 |
-
if (error.name !== 'AbortError') {
|
621 |
-
console.error("Error sending query to AI:", error);
|
622 |
-
}
|
623 |
-
} finally {
|
624 |
-
isRequestInProgress = false;
|
625 |
-
updateActivityIndicators();
|
626 |
-
}
|
627 |
-
};
|
628 |
-
|
629 |
-
|
630 |
-
// Initialize Webcam and Speech Recognition on Load
|
631 |
window.onload = () => {
|
632 |
startWebcam();
|
633 |
};
|
|
|
1 |
+
// script1.js
|
2 |
+
|
3 |
// Constants and Configuration
|
4 |
const USER_SPEECH_INTERRUPT_DELAY = 500;
|
5 |
+
const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
|
6 |
const CHUNK_SIZE = 300;
|
7 |
const MAX_PREFETCH_REQUESTS = 10;
|
8 |
const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
|
|
|
12 |
const startStopButton = document.getElementById('startStopButton');
|
13 |
const voiceSelectionDropdown = document.getElementById('voiceSelect');
|
14 |
const modelSelectionDropdown = document.getElementById('modelSelect');
|
15 |
+
const noiseSuppressionCheckbox = document.getElementById('noiseSuppression');
|
16 |
const responseTimeDisplay = document.getElementById('responseTime');
|
17 |
const userActivityIndicator = document.getElementById('userIndicator');
|
18 |
const aiActivityIndicator = document.getElementById('aiIndicator');
|
|
|
46 |
// Audio Caching
|
47 |
const audioCache = new Map();
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
// Utility Functions
|
50 |
|
51 |
// Normalize query text
|
|
|
198 |
// AI Interaction Functions
|
199 |
|
200 |
// Send a query to the AI
|
201 |
+
async function sendQueryToAI(query) {
|
202 |
console.log("Sending query to AI:", query);
|
203 |
isRequestInProgress = true;
|
204 |
updateActivityIndicators();
|
|
|
223 |
requestAbortController = new AbortController();
|
224 |
|
225 |
try {
|
226 |
+
const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
|
227 |
+
await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
|
228 |
} catch (error) {
|
229 |
if (error.name !== 'AbortError') {
|
230 |
console.error("Error sending query to AI:", error);
|
|
|
533 |
|
534 |
setInterval(updateLatency, 100);
|
535 |
|
536 |
+
|
537 |
+
|
538 |
+
// Webcam Integration
|
539 |
+
import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
|
540 |
+
|
541 |
+
const video = document.getElementById('webcam');
|
542 |
+
let app;
|
543 |
+
let lastCaption = "";
|
544 |
+
|
545 |
+
const clients = [
|
546 |
+
"multimodalart/Florence-2-l4",
|
547 |
+
"gokaygokay/Florence-2",
|
548 |
+
"multimodalart/Florence-2-l4-2",
|
549 |
+
"gokaygokay/Florence-2",
|
550 |
+
];
|
551 |
|
552 |
async function startWebcam() {
|
553 |
try {
|
554 |
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
555 |
video.srcObject = stream;
|
556 |
+
setInterval(captureAndProcessImage, 5000);
|
557 |
} catch (error) {
|
558 |
console.error("Error accessing webcam: ", error);
|
|
|
559 |
}
|
560 |
}
|
561 |
|
|
|
562 |
async function captureAndProcessImage() {
|
563 |
const canvas = document.createElement('canvas');
|
564 |
canvas.width = video.videoWidth;
|
|
|
570 |
await processWithGradio(blob);
|
571 |
}
|
572 |
|
|
|
573 |
async function processWithGradio(imageBlob) {
|
574 |
try {
|
575 |
const randomClient = clients[Math.floor(Math.random() * clients.length)];
|
|
|
578 |
|
579 |
const result = await app.predict("/process_image", [handledFile, "Detailed Caption"]);
|
580 |
|
581 |
+
const dataString = result.data[0];
|
582 |
+
lastCaption = dataString || lastCaption;
|
583 |
} catch (error) {
|
584 |
console.error("Error processing with Gradio:", error);
|
|
|
|
|
585 |
}
|
586 |
}
|
587 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
588 |
window.onload = () => {
|
589 |
startWebcam();
|
590 |
};
|