KingNish commited on
Commit
5c2edba
1 Parent(s): 31fc4c1

Update script1.js

Browse files
Files changed (1) hide show
  1. script1.js +41 -26
script1.js CHANGED
@@ -46,6 +46,20 @@ let conversationHistory = [];
46
  // Audio Caching
47
  const audioCache = new Map();
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  // Utility Functions
50
 
51
  // Normalize query text
@@ -58,7 +72,7 @@ const generateCacheKey = (normalizedQuery, voice, history, modelName) =>
58
  // Update activity indicators
59
  const updateActivityIndicators = (state = null) => {
60
  userActivityIndicator.textContent = isUserSpeaking ? "User: Speaking" : "User: Idle";
61
-
62
  if (isRequestInProgress && !currentAudio) {
63
  aiActivityIndicator.textContent = "AI: Processing...";
64
  } else if (currentAudio && !isUserSpeaking) {
@@ -223,7 +237,7 @@ async function sendQueryToAI(query) {
223
  requestAbortController = new AbortController();
224
 
225
  try {
226
- const combinedQuery = `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}`;
227
  await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
228
  } catch (error) {
229
  if (error.name !== 'AbortError') {
@@ -473,6 +487,12 @@ if ('webkitSpeechRecognition' in window) {
473
  lastUserSpeechTimestamp = Date.now();
474
  updateActivityIndicators();
475
  startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
 
 
 
 
 
 
476
  };
477
 
478
  speechRecognizer.onresult = (event) => {
@@ -521,6 +541,9 @@ if ('webkitSpeechRecognition' in window) {
521
  speechRecognizer.stop();
522
  isSpeechRecognitionActive = false;
523
  startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
 
 
 
524
  } else {
525
  speechRecognizer.start();
526
  isSpeechRecognitionActive = true;
@@ -531,29 +554,21 @@ if ('webkitSpeechRecognition' in window) {
531
  alert('Your browser does not support the Web Speech API.');
532
  }
533
 
534
- setInterval(updateLatency, 100);
535
-
536
-
537
-
538
- // Webcam Integration
539
- import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
540
-
541
- const video = document.getElementById('webcam');
542
- let app;
543
- let lastCaption = "";
544
-
545
- const clients = [
546
- "multimodalart/Florence-2-l4",
547
- "gokaygokay/Florence-2",
548
- "multimodalart/Florence-2-l4-2",
549
- "gokaygokay/Florence-2",
550
- ];
551
 
 
552
  async function startWebcam() {
553
  try {
554
  const stream = await navigator.mediaDevices.getUserMedia({ video: true });
555
  video.srcObject = stream;
556
- setInterval(captureAndProcessImage, 5000);
 
 
 
 
 
 
 
 
557
  } catch (error) {
558
  console.error("Error accessing webcam: ", error);
559
  }
@@ -566,25 +581,25 @@ async function captureAndProcessImage() {
566
  const context = canvas.getContext('2d');
567
  context.drawImage(video, 0, 0, canvas.width, canvas.height);
568
 
569
- const blob = await new Promise(resolve => canvas.toBlob(resolve, 'image/png'));
570
  await processWithGradio(blob);
571
  }
572
 
 
573
  async function processWithGradio(imageBlob) {
574
  try {
575
  const randomClient = clients[Math.floor(Math.random() * clients.length)];
576
  app = await client(randomClient);
577
- const handledFile = await handle_file(imageBlob);
578
 
 
579
  const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
580
 
581
  const dataString = result.data[0];
582
- lastCaption = dataString || lastCaption;
583
  } catch (error) {
584
  console.error("Error processing with Gradio:", error);
585
  }
586
  }
587
 
588
- window.onload = () => {
589
- startWebcam();
590
- };
 
46
  // Audio Caching
47
  const audioCache = new Map();
48
 
49
+ // Webcam Integration
50
+ import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
51
+ const video = document.getElementById('webcam');
52
+ let app;
53
+ let lastCaption = "";
54
+ let isWebcamActive = false; // Flag for webcam state
55
+
56
+ const clients = [
57
+ "multimodalart/Florence-2-l4",
58
+ "gokaygokay/Florence-2",
59
+ "multimodalart/Florence-2-l4-2",
60
+ "gokaygokay/Florence-2",
61
+ ];
62
+
63
  // Utility Functions
64
 
65
  // Normalize query text
 
72
  // Update activity indicators
73
  const updateActivityIndicators = (state = null) => {
74
  userActivityIndicator.textContent = isUserSpeaking ? "User: Speaking" : "User: Idle";
75
+
76
  if (isRequestInProgress && !currentAudio) {
77
  aiActivityIndicator.textContent = "AI: Processing...";
78
  } else if (currentAudio && !isUserSpeaking) {
 
237
  requestAbortController = new AbortController();
238
 
239
  try {
240
+ const combinedQuery = isWebcamActive ? `{USER: "${query}"}, ${lastCaption}, {USER: "${query}"}` : query;
241
  await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
242
  } catch (error) {
243
  if (error.name !== 'AbortError') {
 
487
  lastUserSpeechTimestamp = Date.now();
488
  updateActivityIndicators();
489
  startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
490
+
491
+ // Start webcam processing if not already active
492
+ if (!isWebcamActive) {
493
+ startWebcam();
494
+ isWebcamActive = true;
495
+ }
496
  };
497
 
498
  speechRecognizer.onresult = (event) => {
 
541
  speechRecognizer.stop();
542
  isSpeechRecognitionActive = false;
543
  startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
544
+
545
+ // Stop webcam processing
546
+ isWebcamActive = false;
547
  } else {
548
  speechRecognizer.start();
549
  isSpeechRecognitionActive = true;
 
554
  alert('Your browser does not support the Web Speech API.');
555
  }
556
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
 
558
+ // Webcam Functions (optimized)
559
  async function startWebcam() {
560
  try {
561
  const stream = await navigator.mediaDevices.getUserMedia({ video: true });
562
  video.srcObject = stream;
563
+
564
+ // Capture and process image every 5 seconds while webcam is active
565
+ const webcamInterval = setInterval(async () => {
566
+ if (!isWebcamActive) {
567
+ clearInterval(webcamInterval);
568
+ return;
569
+ }
570
+ await captureAndProcessImage();
571
+ }, 5000);
572
  } catch (error) {
573
  console.error("Error accessing webcam: ", error);
574
  }
 
581
  const context = canvas.getContext('2d');
582
  context.drawImage(video, 0, 0, canvas.width, canvas.height);
583
 
584
+ const blob = await new Promise(resolve => canvas.toBlob(resolve, 'image/jpeg', 0.8)); // Use JPEG for smaller size
585
  await processWithGradio(blob);
586
  }
587
 
588
+
589
  async function processWithGradio(imageBlob) {
590
  try {
591
  const randomClient = clients[Math.floor(Math.random() * clients.length)];
592
  app = await client(randomClient);
 
593
 
594
+ const handledFile = await handle_file(imageBlob);
595
  const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
596
 
597
  const dataString = result.data[0];
598
+ lastCaption = dataString || lastCaption;
599
  } catch (error) {
600
  console.error("Error processing with Gradio:", error);
601
  }
602
  }
603
 
604
+
605
+ setInterval(updateLatency, 100);