sergey.agapov commited on
Commit
951be40
1 Parent(s): ee6428d

initial commit

Browse files
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ # Install FFmpeg
4
+ RUN apt-get update && apt-get install -y ffmpeg
5
+
6
+ WORKDIR /code
7
+
8
+ # Create the directory and set permissions
9
+ RUN mkdir -p /tmp/dash/test_stream && chmod 777 /tmp/dash/test_stream
10
+
11
+ COPY ./requirements.txt /code/requirements.txt
12
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13
+
14
+ COPY . /code
15
+
16
+ CMD ["python", "validator.py"]
requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ subprocess
3
+ json
4
+ psutil
5
+ time
6
+ signal
7
+ os
8
+ flask_cors
9
+ threading
10
+ argparse
11
+ fcntl
12
+ select
13
+ whisper
14
+ ffmpeg
15
+ signal
16
+ numpy
17
+ queue
18
+ time
19
+ webrtcvad
20
+ collections
21
+ transformers
22
+ SentencePiece
23
+
static/images/logo.png ADDED
templates/tiktok_player.html ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Multilanguage Player</title>
7
+ <script src="https://cdn.dashjs.org/latest/dash.all.min.js"></script>
8
+ <script src="https://cdn.jsdelivr.net/npm/webvtt-parser@2.1.2/dist/parser.min.js"></script>
9
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
10
+ <style>
11
+ body {
12
+ font-family: Arial, sans-serif;
13
+ display: flex;
14
+ flex-direction: column;
15
+ align-items: center;
16
+ margin: 0;
17
+ background-color: #ffffff;
18
+ }
19
+ .top-div {
20
+ width: 100%;
21
+ height: 100px;
22
+ background-color: #3C8DF9;
23
+ margin-bottom: 20px;
24
+ }
25
+ .container {
26
+ display: flex;
27
+ justify-content: space-between;
28
+ width: 90%;
29
+ max-width: 1200px;
30
+ }
31
+ .left-panel {
32
+ width: 30%;
33
+ padding: 20px;
34
+ box-sizing: border-box;
35
+ }
36
+ .right-panel {
37
+ width: 65%;
38
+ padding: 20px;
39
+ box-sizing: border-box;
40
+ }
41
+ .frame {
42
+ width: 402px;
43
+ height: 720px;
44
+ border: 2px solid #ccc;
45
+ display: flex;
46
+ justify-content: center;
47
+ align-items: center;
48
+ margin-bottom: 10px;
49
+ }
50
+ video {
51
+ max-width: 100%;
52
+ max-height: 100%;
53
+ box-shadow: 0 0 10px rgba(0,0,0,0.1);
54
+ }
55
+ select, input, button {
56
+ margin: 10px 0;
57
+ padding: 5px;
58
+ width: 100%;
59
+ }
60
+ #result {
61
+ margin-top: 10px;
62
+ font-weight: bold;
63
+ }
64
+ </style>
65
+ </head>
66
+ <body>
67
+ <div class="top-div"><img src="{{ url_for('static', filename='images/logo.png') }}" alt="Bytedance" height="100px"></div>
68
+ <div class="container">
69
+ <div class="left-panel">
70
+ <h2>FLV Stream Url</h2>
71
+ <form id="flvForm">
72
+ <input type="text" id="flvInput" placeholder="http://example.com/stream.flv" required>
73
+ <button type="submit">Play Stream</button>
74
+ </form>
75
+ <div id="result"></div>
76
+ <form id="terminateStreamForm">
77
+ <button type="submit">Stop Stream</button>
78
+ </form>
79
+ <h3>Language Selection</h3>
80
+ <select id="captionSelect">
81
+ <option value="original">Original</option>
82
+ <option value="es">Spanish</option>
83
+ <option value="ru">Russian</option>
84
+ <option value="en">English</option>
85
+ <option value="zh">Chinese</option>
86
+ </select>
87
+ <h3>Model Selection</h3>
88
+ <select id="models">
89
+ <option value="base">Base</option>
90
+ <option value="small">Small</option>
91
+ <option value="medium">Medium</option>
92
+ <option value="large">Large</option>
93
+ <option value="large-v2">Large-V2</option>
94
+ </select>
95
+ </div>
96
+ <div class="right-panel">
97
+ <div class="frame">
98
+ <div id="waitingMessage">Waiting for the stream...</div>
99
+ <video id="videoPlayer" controls style="display: none;"></video>
100
+ </div>
101
+ </div>
102
+ </div>
103
+
104
+ <script>
105
+ (function () {
106
+ var url = "{{ url_for('serve_file', filename='manifest.mpd') }}";
107
+ var player = dashjs.MediaPlayer().create();
108
+ var video = document.querySelector("#videoPlayer");
109
+ var waitingMessage = document.querySelector("#waitingMessage");
110
+ var captionSelect = document.querySelector("#captionSelect");
111
+ var currentLanguage = "original";
112
+ var refreshInterval = 10000;
113
+ var desiredDelay = 45;
114
+ var checkInterval = 5000; // Check every 5 seconds
115
+
116
+ function initializePlayer() {
117
+ // Reset the player if it's already initialized
118
+ if (player.isReady()) {
119
+ player.reset();
120
+ }
121
+ console.log("Initializing the player with %s", url)
122
+ player.updateSettings({
123
+ streaming: {
124
+ delay: {
125
+ liveDelay: 50,
126
+ },
127
+ buffer: {
128
+ bufferToKeep: 40,
129
+ bufferTimeAtTopQuality: 50,
130
+ bufferTimeAtTopQualityLongForm: 50,
131
+ initialBufferLevel: 50,
132
+ },
133
+ }
134
+ });
135
+
136
+ fetch(url, {method: 'GET', cache: "no-store"})
137
+ .then(response => {
138
+ if (response.ok) {
139
+ console.log("Response: ", response.text())
140
+ } else {
141
+ console.log("Response not ok: ", response.body)
142
+ }
143
+ })
144
+ .catch(() => {
145
+ console.log("Error")
146
+ });
147
+
148
+ player.initialize(video, url, false);
149
+ // player.attachView(video);
150
+ player.setMute(true)
151
+
152
+ player.enableForcedTextStreaming(true);
153
+
154
+ player.on(dashjs.MediaPlayer.events.STREAM_INITIALIZED, onStreamInitialized);
155
+ player.on(dashjs.MediaPlayer.events.ERROR, onPlayerError);
156
+ }
157
+
158
+ function checkStreamAvailability() {
159
+ fetch(url, {method: 'GET', cache: "no-store"})
160
+ .then(response => {
161
+ if (response.ok) {
162
+ console.log("Stream is ready")
163
+ //manifest is there, wait a sec for init segments
164
+ setTimeout(initializePlayer, 15000)
165
+ } else {
166
+ setTimeout(checkStreamAvailability, checkInterval);
167
+ }
168
+ })
169
+ .catch(() => {
170
+ setTimeout(checkStreamAvailability, checkInterval);
171
+ });
172
+ }
173
+
174
+ function onStreamInitialized() {
175
+ console.log("Stream initialized, setting up captions");
176
+ setupCaptions();
177
+ setInterval(refreshCaptions, refreshInterval);
178
+ waitForInitialData();
179
+ player.play()
180
+ }
181
+
182
+ function onPlayerError(e) {
183
+ console.log("Player error:", e);
184
+ let errorCode = e.code || e.error?.code || e.error?.error?.code;
185
+ console.log("Extracted error code:", errorCode);
186
+ if (errorCode === 25) {
187
+ console.log("Rescheduling...")
188
+ waitingMessage.style.display = "block";
189
+ video.style.display = "none";
190
+ checkStreamAvailability();
191
+ }
192
+ console.log("None...")
193
+ //waitingMessage.style.display = "block";
194
+ //video.style.display = "none";
195
+ //checkStreamAvailability();
196
+ }
197
+
198
+ function waitForInitialData() {
199
+ console.log("Waiting for initial data");
200
+ if (player.getBufferLength() > 0 && video.readyState >= 2) {
201
+ console.log("Initial data buffered, starting playback");
202
+ waitingMessage.style.display = "none";
203
+ video.style.display = "block";
204
+ //player.play();
205
+ } else {
206
+ setTimeout(waitForInitialData, 100);
207
+ }
208
+ }
209
+
210
+ function parseVTT(vttContent) {
211
+ const lines = vttContent.trim().split('\n');
212
+ let cues = [];
213
+ let cue = {};
214
+
215
+ for (let i = 0; i < lines.length; i++) {
216
+ if (lines[i].includes('-->')) {
217
+ const [start, end] = lines[i].split('-->').map(timeString => {
218
+ const [hours, minutes, seconds] = timeString.trim().split(':');
219
+ return parseFloat(hours) * 3600 + parseFloat(minutes) * 60 + parseFloat(seconds);
220
+ });
221
+ cue = {start, end, text: ''};
222
+ } else if (lines[i].trim() !== '' && cue.start !== undefined) {
223
+ cue.text += lines[i] + '\n';
224
+ } else if (lines[i].trim() === '' && cue.text) {
225
+ cues.push(cue);
226
+ cue = {};
227
+ }
228
+ }
229
+ if (cue.text) {
230
+ cues.push(cue);
231
+ }
232
+ return cues;
233
+ }
234
+
235
+ function loadCaptions(lang) {
236
+ console.log("Loading captions for language: " + lang);
237
+ var baseUrl = "{{ url_for('serve_file', filename='') }}"; // This will give the base URL for the 'serve_file' endpoint.
238
+ var fileName = "captions_" + lang + ".vtt";
239
+ var captionUrl = baseUrl + fileName;
240
+
241
+ for (var i = 0; i < video.textTracks.length; i++) {
242
+ video.textTracks[i].mode = 'disabled';
243
+ }
244
+
245
+ var track = Array.from(video.textTracks).find(t => t.language === lang);
246
+
247
+ if (!track) {
248
+ track = video.addTextTrack("captions", lang, lang);
249
+ }
250
+
251
+ track.mode = 'showing';
252
+
253
+ updateTrackCues(track, captionUrl);
254
+
255
+ currentLanguage = lang;
256
+ console.log("Captions loaded for language: " + lang);
257
+ }
258
+
259
+ function updateTrackCues(track, url) {
260
+ fetch(url)
261
+ .then(response => response.text())
262
+ .then(vttContent => {
263
+ const cues = parseVTT(vttContent);
264
+
265
+ while (track.cues.length > 0) {
266
+ track.removeCue(track.cues[0]);
267
+ }
268
+
269
+ cues.forEach(cue => {
270
+ const vttCue = new VTTCue(cue.start, cue.end, cue.text.trim());
271
+ track.addCue(vttCue);
272
+ });
273
+ })
274
+ .catch(error => console.error('Error updating captions:', error));
275
+ }
276
+
277
+ function refreshCaptions() {
278
+ if (currentLanguage) {
279
+ var track = Array.from(video.textTracks).find(t => t.language === currentLanguage);
280
+ if (track) {
281
+ var baseUrl = "{{ url_for('serve_file', filename='') }}"; // This will give the base URL for the 'serve_file' endpoint.
282
+ var fileName = "captions_" + currentLanguage + ".vtt";
283
+ var captionUrl = baseUrl + fileName;
284
+ updateTrackCues(track, captionUrl);
285
+ }
286
+ }
287
+ }
288
+
289
+ function setupCaptions() {
290
+ var tracks = player.getTracksFor('text');
291
+ console.log("Available text tracks:", tracks);
292
+
293
+ if (tracks.length > 0) {
294
+ captionSelect.innerHTML = '';
295
+ tracks.forEach(function (track) {
296
+ var option = document.createElement('option');
297
+ option.value = track.lang;
298
+ option.text = track.lang;
299
+ captionSelect.appendChild(option);
300
+ });
301
+ loadCaptions(tracks[0].lang);
302
+ } else {
303
+ loadCaptions(currentLanguage);
304
+ }
305
+ }
306
+
307
+ captionSelect.addEventListener("change", function () {
308
+ loadCaptions(this.value);
309
+ });
310
+
311
+ // FLV Stream Checker
312
+ $('#flvForm').submit(function(e) {
313
+ $('#waitingMessage').text("Checking the url...");
314
+ e.preventDefault();
315
+ $.ajax({
316
+ url: '/terminate',
317
+ method: 'POST',
318
+ data: null,
319
+ });
320
+ if (player.isReady()) {
321
+ player.pause()
322
+ }
323
+ $.ajax({
324
+ url: '/check_flv',
325
+ method: 'POST',
326
+ data: { url: $('#flvInput').val(), model: $('#models').val() },
327
+ success: function(response) {
328
+ $('#waitingMessage').text(response.message);
329
+ if (response.status === 'success') {
330
+ waitingMessage.style.display = "block";
331
+ video.style.display = "none";
332
+ //initializePlayer();
333
+ //checkStreamAvailability();
334
+ }
335
+ },
336
+ error: function() {
337
+ $('#result').text('An error occurred');
338
+ }
339
+ });
340
+ });
341
+ // FLV Stream Checker
342
+ $('#terminateStreamForm').submit(function(e) {
343
+ e.preventDefault();
344
+ $.ajax({
345
+ url: '/terminate',
346
+ method: 'POST',
347
+ data: null,
348
+ });
349
+ if (player.isReady()) {
350
+ player.pause()
351
+ }
352
+ });
353
+
354
+ // Start checking for stream availability
355
+ //initializePlayer();
356
+ checkStreamAvailability();
357
+
358
+ // Log current live delay every 5 seconds
359
+ // setInterval(() => {
360
+ // var currentLiveDelay = player.duration() - player.time();
361
+ // console.log("Current live delay:", currentLiveDelay);
362
+ //}, 5000);
363
+ })();
364
+ </script>
365
+ </body>
366
+ </html>
translator.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import threading
3
+ import argparse
4
+ import fcntl
5
+ import select
6
+ import whisper
7
+ import ffmpeg
8
+ import signal
9
+ import numpy as np
10
+ import queue
11
+ import time
12
+ import webrtcvad
13
+ import collections
14
+ import os
15
+ from transformers import MarianMTModel, MarianTokenizer
16
+
17
+ # Global variables
18
+ rtmp_url = ""
19
+ dash_output_path = ""
20
+ segment_duration = 2
21
+ last_activity_time = 0.0
22
+ cleanup_threshold = 10 # seconds of inactivity before cleanup
23
+ start_time = 0.0
24
+
25
+ # Languages for translation (ISO 639-1 codes)
26
+ target_languages = ["es", "zh", "ru"] # Example: Spanish, Chinese, Russian
27
+
28
+ # Initialize Whisper model
29
+ whisper_model = {}
30
+
31
+ # Define Frame class
32
+ class Frame:
33
+ def __init__(self, data, timestamp, duration):
34
+ self.data = data
35
+ self.timestamp = timestamp
36
+ self.duration = duration
37
+
38
+ # Audio buffer and caption queues
39
+ audio_buffer = queue.Queue()
40
+ caption_queues = {lang: queue.Queue() for lang in target_languages + ["original", "en"]}
41
+ language_model_names = {
42
+ "es": "Helsinki-NLP/opus-mt-en-es",
43
+ "zh": "Helsinki-NLP/opus-mt-en-zh",
44
+ "ru": "Helsinki-NLP/opus-mt-en-ru",
45
+ }
46
+ translation_models = {}
47
+ tokenizers = {}
48
+
49
+ # Initialize VAD
50
+ vad = webrtcvad.Vad(3) # Aggressiveness mode 3 (most aggressive)
51
+
52
+ # Event to signal threads to stop
53
+ stop_event = threading.Event()
54
+
55
+
56
+ def transcode_rtmp_to_dash():
57
+ ffmpeg_command = [
58
+ "/opt/homebrew/bin/ffmpeg",
59
+ "-i", rtmp_url,
60
+ "-map", "0:v:0", "-map", "0:a:0",
61
+ "-c:v", "libx264", "-preset", "slow",
62
+ "-c:a", "aac", "-b:a", "128k",
63
+ "-f", "dash",
64
+ "-seg_duration", str(segment_duration),
65
+ "-use_timeline", "1",
66
+ "-use_template", "1",
67
+ "-init_seg_name", "init_$RepresentationID$.m4s",
68
+ "-media_seg_name", "chunk_$RepresentationID$_$Number%05d$.m4s",
69
+ "-adaptation_sets", "id=0,streams=v id=1,streams=a",
70
+ f"{dash_output_path}/manifest.mpd"
71
+ ]
72
+ process = subprocess.Popen(ffmpeg_command)
73
+ while not stop_event.is_set():
74
+ time.sleep(1)
75
+ process.kill()
76
+
77
+
78
+ def capture_audio():
79
+ global last_activity_time
80
+ command = [
81
+ '/opt/homebrew/bin/ffmpeg',
82
+ '-i', rtmp_url,
83
+ '-acodec', 'pcm_s16le',
84
+ '-ar', '16000',
85
+ '-ac', '1',
86
+ '-f', 's16le',
87
+ '-'
88
+ ]
89
+
90
+ sample_rate = 16000
91
+ frame_duration_ms = 30
92
+ sample_width = 2 # Only 16-bit audio supported
93
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
94
+ # Set stdout to non-blocking mode
95
+ fd = process.stdout.fileno()
96
+ fl = fcntl.fcntl(fd, fcntl.F_GETFL)
97
+ fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
98
+
99
+ frame_size = int(sample_rate * frame_duration_ms / 1000) * sample_width
100
+ frame_count = 0
101
+ while not stop_event.is_set():
102
+ ready, _, _ = select.select([process.stdout], [], [], 0.1)
103
+ if ready:
104
+ try:
105
+ in_bytes = os.read(fd, frame_size)
106
+ if not in_bytes:
107
+ break
108
+ if len(in_bytes) < frame_size:
109
+ in_bytes += b'\x00' * (frame_size - len(in_bytes))
110
+ last_activity_time = time.time()
111
+ timestamp = frame_count * frame_duration_ms * 0.85
112
+ frame = Frame(np.frombuffer(in_bytes, np.int16), timestamp, frame_duration_ms)
113
+ audio_buffer.put(frame)
114
+ frame_count += 1
115
+ except BlockingIOError:
116
+ continue
117
+ else:
118
+ time.sleep(0.01)
119
+ process.kill()
120
+
121
+ def frames_to_numpy(frames):
122
+ all_frames = np.concatenate([f.data for f in frames])
123
+ float_samples = all_frames.astype(np.float32) / np.iinfo(np.int16).max
124
+ return float_samples
125
+
126
+ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
127
+ num_padding_frames = int(padding_duration_ms / frame_duration_ms)
128
+ ring_buffer = collections.deque(maxlen=num_padding_frames)
129
+ triggered = False
130
+
131
+ for frame in frames:
132
+ if len(frame.data) != int(sample_rate * (frame_duration_ms / 1000.0)):
133
+ print(f"Skipping frame with incorrect size: {len(frame.data)} samples", flush=True)
134
+ continue
135
+
136
+ is_speech = vad.is_speech(frame.data.tobytes(), sample_rate)
137
+
138
+ if not triggered:
139
+ ring_buffer.append((frame, is_speech))
140
+ num_voiced = len([f for f, speech in ring_buffer if speech])
141
+ if num_voiced > 0.8 * ring_buffer.maxlen:
142
+ triggered = True
143
+ for f, s in ring_buffer:
144
+ yield f
145
+ ring_buffer.clear()
146
+ else:
147
+ yield frame
148
+ ring_buffer.append((frame, is_speech))
149
+ num_unvoiced = len([f for f, speech in ring_buffer if not speech])
150
+ if num_unvoiced > 0.8 * ring_buffer.maxlen:
151
+ triggered = False
152
+ yield None
153
+ ring_buffer.clear()
154
+ for f, s in ring_buffer:
155
+ yield f
156
+ ring_buffer.clear()
157
+
158
+ def process_audio():
159
+ global last_activity_time
160
+ frames = []
161
+ buffer_duration_ms = 1500 # About 1.5 seconds of audio
162
+
163
+ while not stop_event.is_set():
164
+ while not audio_buffer.empty():
165
+ frame = audio_buffer.get(timeout=5.0)
166
+ frames.append(frame)
167
+
168
+ if frames and sum(f.duration for f in frames) >= buffer_duration_ms:
169
+ vad_frames = list(vad_collector(16000, 30, 300, vad, frames))
170
+
171
+ if vad_frames:
172
+ audio_segment = [f for f in vad_frames if f is not None]
173
+ if audio_segment:
174
+ # Transcribe the original audio
175
+ result = whisper_model.transcribe(frames_to_numpy(audio_segment))
176
+
177
+ if result["text"]:
178
+ timestamp = audio_segment[0].timestamp
179
+ caption_queues["original"].put((timestamp, result["text"]))
180
+
181
+ english_translation = whisper_model.transcribe(frames_to_numpy(audio_segment), task="translate")
182
+ caption_queues["en"].put((timestamp, english_translation["text"]))
183
+
184
+ # Translate to target languages
185
+ for lang in target_languages:
186
+ tokenizer = tokenizers[lang]
187
+ translation_model = translation_models[lang]
188
+
189
+ inputs = tokenizer.encode(english_translation["text"], return_tensors="pt", padding=True, truncation=True)
190
+ translated_tokens = translation_model.generate(inputs)
191
+ translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
192
+ caption_queues[lang].put((timestamp, translated_text))
193
+
194
+ frames = []
195
+
196
+ time.sleep(0.01)
197
+
198
+ def write_captions(lang):
199
+ os.makedirs(dash_output_path, exist_ok=True)
200
+ filename = f"{dash_output_path}/captions_{lang}.vtt"
201
+
202
+ with open(filename, "w", encoding="utf-8") as f:
203
+ f.write("WEBVTT\n\n")
204
+
205
+ last_end_time = None
206
+
207
+ while not stop_event.is_set():
208
+ if not caption_queues[lang].empty():
209
+ timestamp, text = caption_queues[lang].get()
210
+ start_time = format_time(timestamp / 1000) # Convert ms to seconds
211
+ end_time = format_time((timestamp + 5000) / 1000) # Assume 5-second duration for each caption
212
+
213
+ # Adjust the previous caption's end time if necessary
214
+ if last_end_time and start_time != last_end_time:
215
+ adjust_previous_caption(filename, last_end_time, start_time)
216
+
217
+ # Write the new caption
218
+ with open(filename, "a", encoding="utf-8") as f:
219
+ f.write(f"{start_time} --> {end_time}\n")
220
+ f.write(f"{text}\n\n")
221
+ f.flush()
222
+
223
+ last_end_time = end_time
224
+
225
+ time.sleep(0.1)
226
+
227
+ def adjust_previous_caption(filename, old_end_time, new_end_time):
228
+ with open(filename, "r", encoding="utf-8") as f:
229
+ lines = f.readlines()
230
+
231
+ for i in range(len(lines) - 1, -1, -1):
232
+ if "-->" in lines[i]:
233
+ parts = lines[i].split("-->")
234
+ if parts[1].strip() == old_end_time:
235
+ lines[i] = f"{parts[0].strip()} --> {new_end_time}\n"
236
+ break
237
+
238
+ with open(filename, "w", encoding="utf-8") as f:
239
+ f.writelines(lines)
240
+
241
+ def format_time(seconds):
242
+ hours, remainder = divmod(seconds, 3600)
243
+ minutes, seconds = divmod(remainder, 60)
244
+ return f"{int(hours):02d}:{int(minutes):02d}:{seconds:06.3f}"
245
+
246
+ def signal_handler(signum, frame):
247
+ print(f"Received signal {signum}. Cleaning up and exiting...")
248
+ # Signal all threads to stop
249
+ stop_event.set()
250
+
251
+ def cleanup():
252
+ global last_activity_time
253
+ while not stop_event.is_set():
254
+ current_time = time.time()
255
+ if last_activity_time != 0.0 and current_time - last_activity_time > cleanup_threshold:
256
+ print("No activity detected for 10 seconds. Cleaning up...", flush=True)
257
+
258
+ # Signal all threads to stop
259
+ stop_event.set()
260
+ break
261
+
262
+ time.sleep(1) # Check for inactivity every second
263
+ # Clear caption queues
264
+ for lang in target_languages + ["original", "en"]:
265
+ while not caption_queues[lang].empty():
266
+ caption_queues[lang].get()
267
+
268
+ # Delete DASH output files
269
+ for root, dirs, files in os.walk(dash_output_path, topdown=False):
270
+ for name in files:
271
+ os.remove(os.path.join(root, name))
272
+ for name in dirs:
273
+ os.rmdir(os.path.join(root, name))
274
+
275
+ print("Cleanup completed.", flush=True)
276
+
277
+
278
+ if __name__ == "__main__":
279
+ # Get RTMP URL and DASH output path from user input
280
+ signal.signal(signal.SIGTERM, signal_handler)
281
+ parser = argparse.ArgumentParser(description="Process audio for translation.")
282
+ parser.add_argument('--rtmp_url', help='rtmp url')
283
+ parser.add_argument('--output_directory', help='Dash directory')
284
+ parser.add_argument('--model', help='Whisper model size: base|small|medium|large|large-v2')
285
+ start_time = time.time()
286
+
287
+ args = parser.parse_args()
288
+ rtmp_url = args.rtmp_url
289
+ dash_output_path = args.output_directory
290
+ model_size = args.model
291
+
292
+ print(f"RTMP URL: {rtmp_url}")
293
+ print(f"DASH output path: {dash_output_path}")
294
+ print(f"Model: {dash_output_path}")
295
+
296
+ print("Downloading models\n")
297
+ print("Whisper\n")
298
+ whisper_model = whisper.load_model(model_size, download_root="/tmp/model/") # Adjust model size as necessary
299
+
300
+ for lang, model_name in language_model_names.items():
301
+ print(f"Lang: {lang}, model: {model_name}\n")
302
+ tokenizers[lang] = MarianTokenizer.from_pretrained(model_name)
303
+ translation_models[lang] = MarianMTModel.from_pretrained(model_name)
304
+
305
+ # Start RTMP to DASH transcoding in a separate thread
306
+ transcode_thread = threading.Thread(target=transcode_rtmp_to_dash)
307
+ transcode_thread.start()
308
+
309
+ # Start audio capture in a separate thread
310
+ audio_capture_thread = threading.Thread(target=capture_audio)
311
+ audio_capture_thread.start()
312
+
313
+ # Start audio processing in a separate thread
314
+ audio_processing_thread = threading.Thread(target=process_audio)
315
+ audio_processing_thread.start()
316
+
317
+ # Start caption writing threads for original and all target languages
318
+ caption_threads = []
319
+ for lang in target_languages + ["original", "en"]:
320
+ caption_thread = threading.Thread(target=write_captions, args=(lang,))
321
+ caption_threads.append(caption_thread)
322
+ caption_thread.start()
323
+
324
+ # Start the cleanup thread
325
+ cleanup_thread = threading.Thread(target=cleanup)
326
+ cleanup_thread.start()
327
+
328
+ # Wait for all threads to complete
329
+ print("Join transcode", flush=True)
330
+ if transcode_thread.is_alive():
331
+ transcode_thread.join()
332
+ print("Join sudio capture", flush=True)
333
+ if audio_capture_thread.is_alive():
334
+ audio_capture_thread.join()
335
+ print("Join audio processing", flush=True)
336
+ if audio_processing_thread.is_alive():
337
+ audio_processing_thread.join()
338
+ for thread in caption_threads:
339
+ if thread.is_alive():
340
+ thread.join()
341
+ print("Join clenaup", flush=True)
342
+ if cleanup_thread.is_alive():
343
+ cleanup_thread.join()
344
+
345
+ print("All threads have been stopped and cleaned up.")
346
+ exit(0)
validator.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify, send_from_directory
2
+ import subprocess
3
+ import json
4
+ import psutil
5
+ import time
6
+ import signal
7
+ import os
8
+ from flask_cors import CORS
9
+
10
+ app = Flask(__name__)
11
+ CORS(app) # This will allow all domains to make requests to your server
12
+
13
+ script_process = None
14
+ SCRIPT_NAME = 'translator.py'
15
+
16
+ def find_process_by_name(name):
17
+ for process in psutil.process_iter(['pid', 'name', 'cmdline']):
18
+ cmdline = process.info['cmdline']
19
+ if cmdline and any(name in arg for arg in cmdline):
20
+ return process
21
+ return None
22
+
23
+ def terminate_script():
24
+ global script_process
25
+ process = find_process_by_name(SCRIPT_NAME)
26
+
27
+ if process:
28
+ print(f"Terminating existing script process (PID: {process.pid})")
29
+ process.send_signal(signal.SIGTERM)
30
+ try:
31
+ process.wait(timeout=20) # Wait up to 10 seconds for the process to terminate
32
+ except psutil.TimeoutExpired:
33
+ print(f"Process {process.pid} did not terminate in time, forcing...")
34
+ process.kill() # Force kill if it doesn't terminate
35
+
36
+ # Double-check if the process is really terminated
37
+ if not find_process_by_name(SCRIPT_NAME):
38
+ print(f"Process {SCRIPT_NAME} successfully terminated")
39
+ else:
40
+ print(f"Warning: Process {SCRIPT_NAME} could not be terminated")
41
+ else:
42
+ print(f"No running process found with name: {SCRIPT_NAME}")
43
+
44
+ script_process = None
45
+
46
+ @app.route('/', methods=['GET'])
47
+ def index():
48
+ return render_template('tiktok_player.html')
49
+
50
+ @app.route('/terminate', methods=['POST'])
51
+ def terminate():
52
+ terminate_script()
53
+ return jsonify({'status': 'success', 'message': 'Stream stopped'})
54
+
55
+ @app.route('/stream/<path:filename>')
56
+ def serve_file(filename):
57
+ return send_from_directory('/tmp/dash/test_stream', filename)
58
+
59
+ @app.route('/check_flv', methods=['POST'])
60
+ def check_flv():
61
+ global script_process
62
+ flv_url = request.form['url']
63
+ model = request.form['model']
64
+ try:
65
+ # Use ffprobe to check the FLV stream
66
+ result = subprocess.run([
67
+ 'ffprobe',
68
+ '-v', 'quiet',
69
+ '-print_format', 'json',
70
+ '-show_streams',
71
+ flv_url
72
+ ], capture_output=True, text=True, timeout=10)
73
+
74
+ if result.returncode == 0:
75
+ # Parse the JSON output
76
+ probe_data = json.loads(result.stdout)
77
+
78
+ # Check if there are any streams in the output
79
+ if 'streams' in probe_data and len(probe_data['streams']) > 0:
80
+ # Stream is valid
81
+ # Terminate existing script if running
82
+ terminate_script()
83
+
84
+ # Start new script
85
+ new_process = subprocess.Popen(['python', '/Users/bytedance/source/video_translator/src/flask_app'
86
+ '/translator.py', '--rtmp_url', flv_url,
87
+ '--output_directory', '/tmp/dash/test_stream/', '--model', model])
88
+ script_process = psutil.Process(new_process.pid)
89
+
90
+ return jsonify({'status': 'success', 'message': 'Buffering...'})
91
+ else:
92
+ return jsonify({'status': 'error', 'message': 'No valid streams found in the FLV'})
93
+ else:
94
+ # Stream is invalid
95
+ return jsonify({'status': 'error', 'message': 'Invalid FLV stream'})
96
+ except subprocess.TimeoutExpired:
97
+ return jsonify({'status': 'error', 'message': 'Timeout while checking FLV stream'})
98
+ except Exception as e:
99
+ return jsonify({'status': 'error', 'message': f'Error: {str(e)}'})
100
+
101
+ if __name__ == '__main__':
102
+ app.run(host='0.0.0.0', port=7860)