KingNish commited on
Commit
6b9530d
·
verified ·
1 Parent(s): 1fa2d43

Update script1.js

Browse files
Files changed (1) hide show
  1. script1.js +25 -68
script1.js CHANGED
@@ -1,6 +1,8 @@
 
 
1
  // Constants and Configuration
2
  const USER_SPEECH_INTERRUPT_DELAY = 500;
3
- const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech"; // Replace with your TTS endpoint
4
  const CHUNK_SIZE = 300;
5
  const MAX_PREFETCH_REQUESTS = 10;
6
  const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
@@ -10,7 +12,7 @@ const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
10
  const startStopButton = document.getElementById('startStopButton');
11
  const voiceSelectionDropdown = document.getElementById('voiceSelect');
12
  const modelSelectionDropdown = document.getElementById('modelSelect');
13
- const noiseSuppressionCheckbox = document.getElementById('noiseSuppression'); // Assuming you have this in your HTML
14
  const responseTimeDisplay = document.getElementById('responseTime');
15
  const userActivityIndicator = document.getElementById('userIndicator');
16
  const aiActivityIndicator = document.getElementById('aiIndicator');
@@ -44,19 +46,6 @@ let conversationHistory = [];
44
  // Audio Caching
45
  const audioCache = new Map();
46
 
47
- // Webcam and Gradio Integration
48
- import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
49
- const video = document.getElementById('webcam');
50
- const clients = [
51
- "multimodalart/Florence-2-l4",
52
- "gokaygokay/Florence-2",
53
- "multimodalart/Florence-2-l4-2",
54
- "gokaygokay/Florence-2",
55
- ]; // Or your preferred Gradio models
56
- let app;
57
- let lastCaption = "";
58
-
59
-
60
  // Utility Functions
61
 
62
  // Normalize query text
@@ -209,7 +198,7 @@ const cancelPrefetchRequests = (query) => {
209
  // AI Interaction Functions
210
 
211
  // Send a query to the AI
212
- const sendQueryToAI = async (query) => {
213
  console.log("Sending query to AI:", query);
214
  isRequestInProgress = true;
215
  updateActivityIndicators();
@@ -234,7 +223,8 @@ const sendQueryToAI = async (query) => {
234
  requestAbortController = new AbortController();
235
 
236
  try {
237
- await streamAndHandleAudioResponse(query, voiceSelectionDropdown.value, requestAbortController.signal);
 
238
  } catch (error) {
239
  if (error.name !== 'AbortError') {
240
  console.error("Error sending query to AI:", error);
@@ -543,20 +533,32 @@ if ('webkitSpeechRecognition' in window) {
543
 
544
  setInterval(updateLatency, 100);
545
 
546
- // Webcam Functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
 
548
  async function startWebcam() {
549
  try {
550
  const stream = await navigator.mediaDevices.getUserMedia({ video: true });
551
  video.srcObject = stream;
552
- setInterval(captureAndProcessImage, 5000); // Adjust interval as needed
553
  } catch (error) {
554
  console.error("Error accessing webcam: ", error);
555
- // Consider adding user feedback here, e.g., alert or display a message.
556
  }
557
  }
558
 
559
-
560
  async function captureAndProcessImage() {
561
  const canvas = document.createElement('canvas');
562
  canvas.width = video.videoWidth;
@@ -568,7 +570,6 @@ async function captureAndProcessImage() {
568
  await processWithGradio(blob);
569
  }
570
 
571
-
572
  async function processWithGradio(imageBlob) {
573
  try {
574
  const randomClient = clients[Math.floor(Math.random() * clients.length)];
@@ -577,57 +578,13 @@ async function processWithGradio(imageBlob) {
577
 
578
  const result = await app.predict("/process_image", [handledFile, "Detailed Caption"]);
579
 
580
- const dataString = result.data[0]; // Assuming the caption is the first element in the response
581
- lastCaption = dataString || ""; // Handle potential errors
582
  } catch (error) {
583
  console.error("Error processing with Gradio:", error);
584
- // Add error handling here (e.g., display a message to the user).
585
- lastCaption = ""; // Reset caption if there's an error.
586
  }
587
  }
588
 
589
-
590
-
591
- // Modify sendQueryToAI to include the caption
592
- async function sendQueryToAI(query) {
593
- console.log("Sending query to AI:", query);
594
- isRequestInProgress = true;
595
- updateActivityIndicators();
596
- firstResponseTextTimestamp = null;
597
-
598
- const normalizedQuery = normalizeQueryText(query);
599
- const cacheKey = generateCacheKey(normalizedQuery, modelSelectionDropdown.value, conversationHistory, modelSelectionDropdown.value);
600
-
601
- queryStartTime = Date.now();
602
-
603
- // Check prefetch cache
604
- if (prefetchCache.has(cacheKey)) {
605
- const cachedData = prefetchCache.get(cacheKey);
606
- if (Date.now() - cachedData.timestamp < PREFETCH_CACHE_EXPIRATION) {
607
- audioPlaybackQueue.push({ url: cachedData.url, isPrefetched: true });
608
- playNextAudio();
609
- } else {
610
- prefetchCache.delete(cacheKey);
611
- }
612
- }
613
-
614
- requestAbortController = new AbortController();
615
-
616
- try {
617
- const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
618
- await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
619
- } catch (error) {
620
- if (error.name !== 'AbortError') {
621
- console.error("Error sending query to AI:", error);
622
- }
623
- } finally {
624
- isRequestInProgress = false;
625
- updateActivityIndicators();
626
- }
627
- };
628
-
629
-
630
- // Initialize Webcam and Speech Recognition on Load
631
  window.onload = () => {
632
  startWebcam();
633
  };
 
1
+ // script1.js
2
+
3
  // Constants and Configuration
4
  const USER_SPEECH_INTERRUPT_DELAY = 500;
5
+ const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
6
  const CHUNK_SIZE = 300;
7
  const MAX_PREFETCH_REQUESTS = 10;
8
  const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
 
12
  const startStopButton = document.getElementById('startStopButton');
13
  const voiceSelectionDropdown = document.getElementById('voiceSelect');
14
  const modelSelectionDropdown = document.getElementById('modelSelect');
15
+ const noiseSuppressionCheckbox = document.getElementById('noiseSuppression');
16
  const responseTimeDisplay = document.getElementById('responseTime');
17
  const userActivityIndicator = document.getElementById('userIndicator');
18
  const aiActivityIndicator = document.getElementById('aiIndicator');
 
46
  // Audio Caching
47
  const audioCache = new Map();
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  // Utility Functions
50
 
51
  // Normalize query text
 
198
  // AI Interaction Functions
199
 
200
  // Send a query to the AI
201
+ async function sendQueryToAI(query) {
202
  console.log("Sending query to AI:", query);
203
  isRequestInProgress = true;
204
  updateActivityIndicators();
 
223
  requestAbortController = new AbortController();
224
 
225
  try {
226
+ const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
227
+ await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
228
  } catch (error) {
229
  if (error.name !== 'AbortError') {
230
  console.error("Error sending query to AI:", error);
 
533
 
534
  setInterval(updateLatency, 100);
535
 
536
+
537
+
538
+ // Webcam Integration
539
+ import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
540
+
541
+ const video = document.getElementById('webcam');
542
+ let app;
543
+ let lastCaption = "";
544
+
545
+ const clients = [
546
+ "multimodalart/Florence-2-l4",
547
+ "gokaygokay/Florence-2",
548
+ "multimodalart/Florence-2-l4-2",
549
+ "gokaygokay/Florence-2",
550
+ ];
551
 
552
  async function startWebcam() {
553
  try {
554
  const stream = await navigator.mediaDevices.getUserMedia({ video: true });
555
  video.srcObject = stream;
556
+ setInterval(captureAndProcessImage, 5000);
557
  } catch (error) {
558
  console.error("Error accessing webcam: ", error);
 
559
  }
560
  }
561
 
 
562
  async function captureAndProcessImage() {
563
  const canvas = document.createElement('canvas');
564
  canvas.width = video.videoWidth;
 
570
  await processWithGradio(blob);
571
  }
572
 
 
573
  async function processWithGradio(imageBlob) {
574
  try {
575
  const randomClient = clients[Math.floor(Math.random() * clients.length)];
 
578
 
579
  const result = await app.predict("/process_image", [handledFile, "Detailed Caption"]);
580
 
581
+ const dataString = result.data[0];
582
+ lastCaption = dataString || lastCaption;
583
  } catch (error) {
584
  console.error("Error processing with Gradio:", error);
 
 
585
  }
586
  }
587
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  window.onload = () => {
589
  startWebcam();
590
  };