andito HF staff commited on
Commit
0d00307
1 Parent(s): 3abafc4

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. TTS/parler_handler.py +1 -0
  2. audio_streaming_client.py +149 -0
  3. handler.py +12 -4
TTS/parler_handler.py CHANGED
@@ -189,3 +189,4 @@ class ParlerTTSHandler(BaseHandler):
189
  )
190
 
191
  self.should_listen.set()
 
 
189
  )
190
 
191
  self.should_listen.set()
192
+ yield b"END"
audio_streaming_client.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ from queue import Queue
3
+ import sounddevice as sd
4
+ import numpy as np
5
+ import requests
6
+ import base64
7
+ import time
8
+ from dataclasses import dataclass, field
9
+
10
+ @dataclass
11
+ class AudioStreamingClientArguments:
12
+ sample_rate: int = field(default=16000, metadata={"help": "Audio sample rate in Hz. Default is 16000."})
13
+ chunk_size: int = field(default=1024, metadata={"help": "The size of audio chunks in samples. Default is 1024."})
14
+ api_url: str = field(default="https://yxfmjcvuzgi123sw.us-east-1.aws.endpoints.huggingface.cloud", metadata={"help": "The URL of the API endpoint."})
15
+ auth_token: str = field(default="your_auth_token", metadata={"help": "Authentication token for the API."})
16
+
17
+ class AudioStreamingClient:
18
+ def __init__(self, args: AudioStreamingClientArguments):
19
+ self.args = args
20
+ self.stop_event = threading.Event()
21
+ self.send_queue = Queue()
22
+ self.recv_queue = Queue()
23
+ self.session_id = None
24
+ self.headers = {
25
+ "Accept": "application/json",
26
+ "Authorization": f"Bearer {self.args.auth_token}",
27
+ "Content-Type": "application/json"
28
+ }
29
+
30
+ def start(self):
31
+ print("Starting audio streaming...")
32
+
33
+ send_thread = threading.Thread(target=self.send_audio)
34
+ recv_thread = threading.Thread(target=self.receive_audio)
35
+ play_thread = threading.Thread(target=self.play_audio)
36
+
37
+ with sd.InputStream(samplerate=self.args.sample_rate, channels=1, dtype='int16', callback=self.audio_callback):
38
+ send_thread.start()
39
+ recv_thread.start()
40
+ play_thread.start()
41
+
42
+ try:
43
+ input("Press Enter to stop streaming...")
44
+ except KeyboardInterrupt:
45
+ print("\nStreaming interrupted by user.")
46
+ finally:
47
+ self.stop_event.set()
48
+ send_thread.join()
49
+ recv_thread.join()
50
+ play_thread.join()
51
+ print("Audio streaming stopped.")
52
+
53
+ def audio_callback(self, indata, frames, time, status):
54
+ self.send_queue.put(indata.copy())
55
+
56
+ def send_audio(self):
57
+ buffer = b''
58
+ while not self.stop_event.is_set():
59
+ if not self.send_queue.empty():
60
+ chunk = self.send_queue.get().tobytes()
61
+ buffer += chunk
62
+ if len(buffer) >= self.args.chunk_size * 2: # * 2 because of int16
63
+ self.send_request(buffer)
64
+ buffer = b''
65
+ else:
66
+ time.sleep(0.01)
67
+
68
+ def send_request(self, audio_data):
69
+ if not self.session_id:
70
+ payload = {
71
+ "request_type": "start",
72
+ "inputs": base64.b64encode(audio_data).decode('utf-8'),
73
+ "input_type": "speech",
74
+ }
75
+ else:
76
+ payload = {
77
+ "request_type": "continue",
78
+ "session_id": self.session_id,
79
+ "inputs": base64.b64encode(audio_data).decode('utf-8'),
80
+ }
81
+
82
+ try:
83
+ response = requests.post(self.args.api_url, headers=self.headers, json=payload)
84
+ response_data = response.json()
85
+
86
+ if "session_id" in response_data:
87
+ self.session_id = response_data["session_id"]
88
+
89
+ if "output" in response_data and response_data["output"]:
90
+ audio_bytes = base64.b64decode(response_data["output"])
91
+ audio_np = np.frombuffer(audio_bytes, dtype=np.int16)
92
+ self.recv_queue.put(audio_np)
93
+
94
+ except Exception as e:
95
+ print(f"Error sending request: {e}")
96
+
97
+ def receive_audio(self):
98
+ while not self.stop_event.is_set():
99
+ if self.session_id:
100
+ payload = {
101
+ "request_type": "continue",
102
+ "session_id": self.session_id
103
+ }
104
+ try:
105
+ response = requests.post(self.args.api_url, headers=self.headers, json=payload)
106
+ response_data = response.json()
107
+
108
+ if response_data["status"] == "completed" and not response_data["output"]:
109
+ break
110
+
111
+ if response_data["output"]:
112
+ audio_bytes = base64.b64decode(response_data["output"])
113
+ audio_np = np.frombuffer(audio_bytes, dtype=np.int16)
114
+ self.recv_queue.put(audio_np)
115
+
116
+ except Exception as e:
117
+ print(f"Error receiving audio: {e}")
118
+
119
+ time.sleep(0.1)
120
+
121
+ def play_audio(self):
122
+ def audio_callback(outdata, frames, time, status):
123
+ if not self.recv_queue.empty():
124
+ chunk = self.recv_queue.get()
125
+ if len(chunk) < len(outdata):
126
+ outdata[:len(chunk)] = chunk.reshape(-1, 1)
127
+ outdata[len(chunk):] = 0
128
+ else:
129
+ outdata[:] = chunk[:len(outdata)].reshape(-1, 1)
130
+ else:
131
+ outdata[:] = 0
132
+
133
+ with sd.OutputStream(samplerate=self.args.sample_rate, channels=1, callback=audio_callback):
134
+ while not self.stop_event.is_set():
135
+ time.sleep(0.1)
136
+
137
+ if __name__ == "__main__":
138
+ import argparse
139
+
140
+ parser = argparse.ArgumentParser(description="Audio Streaming Client")
141
+ parser.add_argument("--sample_rate", type=int, default=16000, help="Audio sample rate in Hz. Default is 16000.")
142
+ parser.add_argument("--chunk_size", type=int, default=1024, help="The size of audio chunks in samples. Default is 1024.")
143
+ parser.add_argument("--api_url", type=str, required=True, help="The URL of the API endpoint.")
144
+ parser.add_argument("--auth_token", type=str, required=True, help="Authentication token for the API.")
145
+
146
+ args = parser.parse_args()
147
+ client_args = AudioStreamingClientArguments(**vars(args))
148
+ client = AudioStreamingClient(client_args)
149
+ client.start()
handler.py CHANGED
@@ -72,8 +72,7 @@ class EndpointHandler:
72
  else:
73
  self.sessions[session_id]['chunks'].append(output)
74
  except Empty:
75
- self.sessions[session_id]['status'] = 'completed'
76
- break
77
 
78
  def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
79
  request_type = data.get("request_type", "start")
@@ -97,11 +96,12 @@ class EndpointHandler:
97
  input_data = data.get("inputs", "")
98
 
99
  if input_type == "speech":
100
- audio_array = np.frombuffer(input_data, dtype=np.int16)
 
101
  self.queues_and_events['recv_audio_chunks_queue'].put(audio_array.tobytes())
102
  elif input_type == "text":
103
  self.queues_and_events['text_prompt_queue'].put(input_data)
104
- else:
105
  raise ValueError(f"Unsupported input type: {input_type}")
106
 
107
  # Start output collection in a separate thread
@@ -115,6 +115,14 @@ class EndpointHandler:
115
  raise ValueError("Invalid or missing session_id")
116
 
117
  session = self.sessions[session_id]
 
 
 
 
 
 
 
 
118
  chunks_to_send = session['chunks'][session['last_sent_index']:]
119
  session['last_sent_index'] = len(session['chunks'])
120
 
 
72
  else:
73
  self.sessions[session_id]['chunks'].append(output)
74
  except Empty:
75
+ continue
 
76
 
77
  def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
78
  request_type = data.get("request_type", "start")
 
96
  input_data = data.get("inputs", "")
97
 
98
  if input_type == "speech":
99
+ audio_bytes = base64.b64decode(input_data)
100
+ audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
101
  self.queues_and_events['recv_audio_chunks_queue'].put(audio_array.tobytes())
102
  elif input_type == "text":
103
  self.queues_and_events['text_prompt_queue'].put(input_data)
104
+ else:
105
  raise ValueError(f"Unsupported input type: {input_type}")
106
 
107
  # Start output collection in a separate thread
 
115
  raise ValueError("Invalid or missing session_id")
116
 
117
  session = self.sessions[session_id]
118
+
119
+ # Handle additional input if provided
120
+ if "inputs" in data:
121
+ input_data = data["inputs"]
122
+ audio_bytes = base64.b64decode(input_data)
123
+ audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
124
+ self.queues_and_events['recv_audio_chunks_queue'].put(audio_array.tobytes())
125
+
126
  chunks_to_send = session['chunks'][session['last_sent_index']:]
127
  session['last_sent_index'] = len(session['chunks'])
128