andito HF staff commited on
Commit
8745348
1 Parent(s): ec7729d

Upload folder using huggingface_hub

Browse files
TTS/melo_handler.py CHANGED
@@ -107,3 +107,4 @@ class MeloTTSHandler(BaseHandler):
107
  )
108
 
109
  self.should_listen.set()
 
 
107
  )
108
 
109
  self.should_listen.set()
110
+ yield b"END"
audio_streaming_client.py CHANGED
@@ -65,12 +65,12 @@ class AudioStreamingClient:
65
  time.sleep(0.1)
66
 
67
  def send_request(self, audio_data=None):
68
- payload = {"inputs": ""}
 
69
 
70
  if audio_data is not None:
71
  print("Sending audio data")
72
  payload["inputs"] = base64.b64encode(audio_data).decode('utf-8')
73
- payload["input_type"] = "speech"
74
 
75
  if self.session_id:
76
  payload["session_id"] = self.session_id
@@ -88,11 +88,6 @@ class AudioStreamingClient:
88
  if "status" in response_data and response_data["status"] == "processing":
89
  print("Processing audio data")
90
  self.session_state = "processing"
91
- elif "status" in response_data and response_data["status"] == "completed":
92
- print("Completed audio processing")
93
- self.session_state = None
94
- self.session_id = None
95
- _ = self.send_queue.get() # Clear the queue
96
 
97
  if "output" in response_data and response_data["output"]:
98
  print("Received audio data")
@@ -104,6 +99,15 @@ class AudioStreamingClient:
104
  chunk = audio_np[i:i+self.args.chunk_size]
105
  self.recv_queue.put(chunk)
106
 
 
 
 
 
 
 
 
 
 
107
  except Exception as e:
108
  print(f"Error sending request: {e}")
109
  self.session_state = "idle" # Reset state to idle in case of error
 
65
  time.sleep(0.1)
66
 
67
  def send_request(self, audio_data=None):
68
+ payload = {"input_type": "speech",
69
+ "inputs": ""}
70
 
71
  if audio_data is not None:
72
  print("Sending audio data")
73
  payload["inputs"] = base64.b64encode(audio_data).decode('utf-8')
 
74
 
75
  if self.session_id:
76
  payload["session_id"] = self.session_id
 
88
  if "status" in response_data and response_data["status"] == "processing":
89
  print("Processing audio data")
90
  self.session_state = "processing"
 
 
 
 
 
91
 
92
  if "output" in response_data and response_data["output"]:
93
  print("Received audio data")
 
99
  chunk = audio_np[i:i+self.args.chunk_size]
100
  self.recv_queue.put(chunk)
101
 
102
+ if "status" in response_data and response_data["status"] == "completed":
103
+ print("Completed audio processing")
104
+ self.session_state = None
105
+ self.session_id = None
106
+ while not self.recv_queue.empty():
107
+ time.sleep(0.01) # wait for the queue to empty
108
+ while not self.send_queue.empty():
109
+ _ = self.send_queue.get() # Clear the queue
110
+
111
  except Exception as e:
112
  print(f"Error sending request: {e}")
113
  self.session_state = "idle" # Reset state to idle in case of error
audio_streaming_test.py CHANGED
@@ -66,12 +66,12 @@ class AudioStreamingClient:
66
  time.sleep(0.1)
67
 
68
  def send_request(self, audio_data=None):
69
- payload = {}
 
70
 
71
  if audio_data is not None:
72
  print("Sending audio data")
73
  payload["inputs"] = base64.b64encode(audio_data).decode('utf-8')
74
- payload["input_type"] = "speech"
75
 
76
  if self.session_id:
77
  payload["session_id"] = self.session_id
@@ -88,11 +88,6 @@ class AudioStreamingClient:
88
  if "status" in response_data and response_data["status"] == "processing":
89
  print("Processing audio data")
90
  self.session_state = "processing"
91
- elif "status" in response_data and response_data["status"] == "completed":
92
- print("Completed audio processing")
93
- self.session_state = None
94
- self.session_id = None
95
- _ = self.send_queue.get() # Clear the queue
96
 
97
  if "output" in response_data and response_data["output"]:
98
  print("Received audio data")
@@ -104,6 +99,15 @@ class AudioStreamingClient:
104
  chunk = audio_np[i:i+self.args.chunk_size]
105
  self.recv_queue.put(chunk)
106
 
 
 
 
 
 
 
 
 
 
107
  except Exception as e:
108
  print(f"Error sending request: {e}")
109
  self.session_state = "idle" # Reset state to idle in case of error
 
66
  time.sleep(0.1)
67
 
68
  def send_request(self, audio_data=None):
69
+ payload = {"input_type": "speech",
70
+ "inputs": ""}
71
 
72
  if audio_data is not None:
73
  print("Sending audio data")
74
  payload["inputs"] = base64.b64encode(audio_data).decode('utf-8')
 
75
 
76
  if self.session_id:
77
  payload["session_id"] = self.session_id
 
88
  if "status" in response_data and response_data["status"] == "processing":
89
  print("Processing audio data")
90
  self.session_state = "processing"
 
 
 
 
 
91
 
92
  if "output" in response_data and response_data["output"]:
93
  print("Received audio data")
 
99
  chunk = audio_np[i:i+self.args.chunk_size]
100
  self.recv_queue.put(chunk)
101
 
102
+ if "status" in response_data and response_data["status"] == "completed":
103
+ print("Completed audio processing")
104
+ self.session_state = None
105
+ self.session_id = None
106
+ while not self.recv_queue.empty():
107
+ time.sleep(0.01) # wait for the queue to empty
108
+ while not self.send_queue.empty():
109
+ _ = self.send_queue.get() # Clear the queue
110
+
111
  except Exception as e:
112
  print(f"Error sending request: {e}")
113
  self.session_state = "idle" # Reset state to idle in case of error
handler.py CHANGED
@@ -1,6 +1,3 @@
1
- import subprocess
2
- subprocess.run("pip install flash-attn --no-build-isolation", shell=True, check=True)
3
-
4
  from typing import Dict, Any, List, Generator
5
  import torch
6
  import os
@@ -26,7 +23,8 @@ class EndpointHandler:
26
  self.parler_tts_handler_kwargs,
27
  self.melo_tts_handler_kwargs,
28
  self.chat_tts_handler_kwargs,
29
- ) = get_default_arguments(mode='none', log_level='DEBUG', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct')
 
30
  setup_logger(self.module_kwargs.log_level)
31
 
32
  prepare_all_args(
@@ -104,6 +102,7 @@ class EndpointHandler:
104
  raise ValueError(f"Unsupported request type: {request_type}")
105
 
106
  def _handle_start_request(self, data: Dict[str, Any]) -> Dict[str, Any]:
 
107
  session_id = str(uuid.uuid4())
108
  self.sessions[session_id] = {
109
  'status': 'new',
 
 
 
 
1
  from typing import Dict, Any, List, Generator
2
  import torch
3
  import os
 
23
  self.parler_tts_handler_kwargs,
24
  self.melo_tts_handler_kwargs,
25
  self.chat_tts_handler_kwargs,
26
+ ) = get_default_arguments(mode='none', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', tts='melo')
27
+
28
  setup_logger(self.module_kwargs.log_level)
29
 
30
  prepare_all_args(
 
102
  raise ValueError(f"Unsupported request type: {request_type}")
103
 
104
  def _handle_start_request(self, data: Dict[str, Any]) -> Dict[str, Any]:
105
+ print("Starting new session")
106
  session_id = str(uuid.uuid4())
107
  self.sessions[session_id] = {
108
  'status': 'new',