Spaces:
Sleeping
Sleeping
sergey.agapov
commited on
Commit
•
12b350f
1
Parent(s):
ee6428d
initial commit
Browse files- Dockerfile +16 -0
- requirements.txt +8 -0
- static/images/logo.png +0 -0
- templates/tiktok_player.html +366 -0
- translator.py +346 -0
- validator.py +102 -0
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
# Install FFmpeg
|
4 |
+
RUN apt-get update && apt-get install -y ffmpeg
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
# Create the directory and set permissions
|
9 |
+
RUN mkdir -p /tmp/dash/test_stream && chmod 777 /tmp/dash/test_stream
|
10 |
+
|
11 |
+
COPY ./requirements.txt /code/requirements.txt
|
12 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
13 |
+
|
14 |
+
COPY . /code
|
15 |
+
|
16 |
+
CMD ["python", "validator.py"]
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
flask
|
2 |
+
flask-cors
|
3 |
+
psutil
|
4 |
+
numpy
|
5 |
+
transformers
|
6 |
+
sentencepiece
|
7 |
+
webrtcvad
|
8 |
+
openai-whisper
|
static/images/logo.png
ADDED
templates/tiktok_player.html
ADDED
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Multilanguage Player</title>
|
7 |
+
<script src="https://cdn.dashjs.org/latest/dash.all.min.js"></script>
|
8 |
+
<script src="https://cdn.jsdelivr.net/npm/webvtt-parser@2.1.2/dist/parser.min.js"></script>
|
9 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
10 |
+
<style>
|
11 |
+
body {
|
12 |
+
font-family: Arial, sans-serif;
|
13 |
+
display: flex;
|
14 |
+
flex-direction: column;
|
15 |
+
align-items: center;
|
16 |
+
margin: 0;
|
17 |
+
background-color: #ffffff;
|
18 |
+
}
|
19 |
+
.top-div {
|
20 |
+
width: 100%;
|
21 |
+
height: 100px;
|
22 |
+
background-color: #3C8DF9;
|
23 |
+
margin-bottom: 20px;
|
24 |
+
}
|
25 |
+
.container {
|
26 |
+
display: flex;
|
27 |
+
justify-content: space-between;
|
28 |
+
width: 90%;
|
29 |
+
max-width: 1200px;
|
30 |
+
}
|
31 |
+
.left-panel {
|
32 |
+
width: 30%;
|
33 |
+
padding: 20px;
|
34 |
+
box-sizing: border-box;
|
35 |
+
}
|
36 |
+
.right-panel {
|
37 |
+
width: 65%;
|
38 |
+
padding: 20px;
|
39 |
+
box-sizing: border-box;
|
40 |
+
}
|
41 |
+
.frame {
|
42 |
+
width: 402px;
|
43 |
+
height: 720px;
|
44 |
+
border: 2px solid #ccc;
|
45 |
+
display: flex;
|
46 |
+
justify-content: center;
|
47 |
+
align-items: center;
|
48 |
+
margin-bottom: 10px;
|
49 |
+
}
|
50 |
+
video {
|
51 |
+
max-width: 100%;
|
52 |
+
max-height: 100%;
|
53 |
+
box-shadow: 0 0 10px rgba(0,0,0,0.1);
|
54 |
+
}
|
55 |
+
select, input, button {
|
56 |
+
margin: 10px 0;
|
57 |
+
padding: 5px;
|
58 |
+
width: 100%;
|
59 |
+
}
|
60 |
+
#result {
|
61 |
+
margin-top: 10px;
|
62 |
+
font-weight: bold;
|
63 |
+
}
|
64 |
+
</style>
|
65 |
+
</head>
|
66 |
+
<body>
|
67 |
+
<div class="top-div"><img src="{{ url_for('static', filename='images/logo.png') }}" alt="Bytedance" height="100px"></div>
|
68 |
+
<div class="container">
|
69 |
+
<div class="left-panel">
|
70 |
+
<h2>FLV Stream Url</h2>
|
71 |
+
<form id="flvForm">
|
72 |
+
<input type="text" id="flvInput" placeholder="http://example.com/stream.flv" required>
|
73 |
+
<button type="submit">Play Stream</button>
|
74 |
+
</form>
|
75 |
+
<div id="result"></div>
|
76 |
+
<form id="terminateStreamForm">
|
77 |
+
<button type="submit">Stop Stream</button>
|
78 |
+
</form>
|
79 |
+
<h3>Language Selection</h3>
|
80 |
+
<select id="captionSelect">
|
81 |
+
<option value="original">Original</option>
|
82 |
+
<option value="es">Spanish</option>
|
83 |
+
<option value="ru">Russian</option>
|
84 |
+
<option value="en">English</option>
|
85 |
+
<option value="zh">Chinese</option>
|
86 |
+
</select>
|
87 |
+
<h3>Model Selection</h3>
|
88 |
+
<select id="models">
|
89 |
+
<option value="base">Base</option>
|
90 |
+
<option value="small">Small</option>
|
91 |
+
<option value="medium">Medium</option>
|
92 |
+
<option value="large">Large</option>
|
93 |
+
<option value="large-v2">Large-V2</option>
|
94 |
+
</select>
|
95 |
+
</div>
|
96 |
+
<div class="right-panel">
|
97 |
+
<div class="frame">
|
98 |
+
<div id="waitingMessage">Waiting for the stream...</div>
|
99 |
+
<video id="videoPlayer" controls style="display: none;"></video>
|
100 |
+
</div>
|
101 |
+
</div>
|
102 |
+
</div>
|
103 |
+
|
104 |
+
<script>
|
105 |
+
(function () {
|
106 |
+
var url = "{{ url_for('serve_file', filename='manifest.mpd') }}";
|
107 |
+
var player = dashjs.MediaPlayer().create();
|
108 |
+
var video = document.querySelector("#videoPlayer");
|
109 |
+
var waitingMessage = document.querySelector("#waitingMessage");
|
110 |
+
var captionSelect = document.querySelector("#captionSelect");
|
111 |
+
var currentLanguage = "original";
|
112 |
+
var refreshInterval = 10000;
|
113 |
+
var desiredDelay = 45;
|
114 |
+
var checkInterval = 5000; // Check every 5 seconds
|
115 |
+
|
116 |
+
function initializePlayer() {
|
117 |
+
// Reset the player if it's already initialized
|
118 |
+
if (player.isReady()) {
|
119 |
+
player.reset();
|
120 |
+
}
|
121 |
+
console.log("Initializing the player with %s", url)
|
122 |
+
player.updateSettings({
|
123 |
+
streaming: {
|
124 |
+
delay: {
|
125 |
+
liveDelay: 50,
|
126 |
+
},
|
127 |
+
buffer: {
|
128 |
+
bufferToKeep: 40,
|
129 |
+
bufferTimeAtTopQuality: 50,
|
130 |
+
bufferTimeAtTopQualityLongForm: 50,
|
131 |
+
initialBufferLevel: 50,
|
132 |
+
},
|
133 |
+
}
|
134 |
+
});
|
135 |
+
|
136 |
+
fetch(url, {method: 'GET', cache: "no-store"})
|
137 |
+
.then(response => {
|
138 |
+
if (response.ok) {
|
139 |
+
console.log("Response: ", response.text())
|
140 |
+
} else {
|
141 |
+
console.log("Response not ok: ", response.body)
|
142 |
+
}
|
143 |
+
})
|
144 |
+
.catch(() => {
|
145 |
+
console.log("Error")
|
146 |
+
});
|
147 |
+
|
148 |
+
player.initialize(video, url, false);
|
149 |
+
// player.attachView(video);
|
150 |
+
player.setMute(true)
|
151 |
+
|
152 |
+
player.enableForcedTextStreaming(true);
|
153 |
+
|
154 |
+
player.on(dashjs.MediaPlayer.events.STREAM_INITIALIZED, onStreamInitialized);
|
155 |
+
player.on(dashjs.MediaPlayer.events.ERROR, onPlayerError);
|
156 |
+
}
|
157 |
+
|
158 |
+
function checkStreamAvailability() {
|
159 |
+
fetch(url, {method: 'GET', cache: "no-store"})
|
160 |
+
.then(response => {
|
161 |
+
if (response.ok) {
|
162 |
+
console.log("Stream is ready")
|
163 |
+
//manifest is there, wait a sec for init segments
|
164 |
+
setTimeout(initializePlayer, 15000)
|
165 |
+
} else {
|
166 |
+
setTimeout(checkStreamAvailability, checkInterval);
|
167 |
+
}
|
168 |
+
})
|
169 |
+
.catch(() => {
|
170 |
+
setTimeout(checkStreamAvailability, checkInterval);
|
171 |
+
});
|
172 |
+
}
|
173 |
+
|
174 |
+
function onStreamInitialized() {
|
175 |
+
console.log("Stream initialized, setting up captions");
|
176 |
+
setupCaptions();
|
177 |
+
setInterval(refreshCaptions, refreshInterval);
|
178 |
+
waitForInitialData();
|
179 |
+
player.play()
|
180 |
+
}
|
181 |
+
|
182 |
+
function onPlayerError(e) {
|
183 |
+
console.log("Player error:", e);
|
184 |
+
let errorCode = e.code || e.error?.code || e.error?.error?.code;
|
185 |
+
console.log("Extracted error code:", errorCode);
|
186 |
+
if (errorCode === 25) {
|
187 |
+
console.log("Rescheduling...")
|
188 |
+
waitingMessage.style.display = "block";
|
189 |
+
video.style.display = "none";
|
190 |
+
checkStreamAvailability();
|
191 |
+
}
|
192 |
+
console.log("None...")
|
193 |
+
//waitingMessage.style.display = "block";
|
194 |
+
//video.style.display = "none";
|
195 |
+
//checkStreamAvailability();
|
196 |
+
}
|
197 |
+
|
198 |
+
function waitForInitialData() {
|
199 |
+
console.log("Waiting for initial data");
|
200 |
+
if (player.getBufferLength() > 0 && video.readyState >= 2) {
|
201 |
+
console.log("Initial data buffered, starting playback");
|
202 |
+
waitingMessage.style.display = "none";
|
203 |
+
video.style.display = "block";
|
204 |
+
//player.play();
|
205 |
+
} else {
|
206 |
+
setTimeout(waitForInitialData, 100);
|
207 |
+
}
|
208 |
+
}
|
209 |
+
|
210 |
+
function parseVTT(vttContent) {
|
211 |
+
const lines = vttContent.trim().split('\n');
|
212 |
+
let cues = [];
|
213 |
+
let cue = {};
|
214 |
+
|
215 |
+
for (let i = 0; i < lines.length; i++) {
|
216 |
+
if (lines[i].includes('-->')) {
|
217 |
+
const [start, end] = lines[i].split('-->').map(timeString => {
|
218 |
+
const [hours, minutes, seconds] = timeString.trim().split(':');
|
219 |
+
return parseFloat(hours) * 3600 + parseFloat(minutes) * 60 + parseFloat(seconds);
|
220 |
+
});
|
221 |
+
cue = {start, end, text: ''};
|
222 |
+
} else if (lines[i].trim() !== '' && cue.start !== undefined) {
|
223 |
+
cue.text += lines[i] + '\n';
|
224 |
+
} else if (lines[i].trim() === '' && cue.text) {
|
225 |
+
cues.push(cue);
|
226 |
+
cue = {};
|
227 |
+
}
|
228 |
+
}
|
229 |
+
if (cue.text) {
|
230 |
+
cues.push(cue);
|
231 |
+
}
|
232 |
+
return cues;
|
233 |
+
}
|
234 |
+
|
235 |
+
function loadCaptions(lang) {
|
236 |
+
console.log("Loading captions for language: " + lang);
|
237 |
+
var baseUrl = "{{ url_for('serve_file', filename='') }}"; // This will give the base URL for the 'serve_file' endpoint.
|
238 |
+
var fileName = "captions_" + lang + ".vtt";
|
239 |
+
var captionUrl = baseUrl + fileName;
|
240 |
+
|
241 |
+
for (var i = 0; i < video.textTracks.length; i++) {
|
242 |
+
video.textTracks[i].mode = 'disabled';
|
243 |
+
}
|
244 |
+
|
245 |
+
var track = Array.from(video.textTracks).find(t => t.language === lang);
|
246 |
+
|
247 |
+
if (!track) {
|
248 |
+
track = video.addTextTrack("captions", lang, lang);
|
249 |
+
}
|
250 |
+
|
251 |
+
track.mode = 'showing';
|
252 |
+
|
253 |
+
updateTrackCues(track, captionUrl);
|
254 |
+
|
255 |
+
currentLanguage = lang;
|
256 |
+
console.log("Captions loaded for language: " + lang);
|
257 |
+
}
|
258 |
+
|
259 |
+
function updateTrackCues(track, url) {
|
260 |
+
fetch(url)
|
261 |
+
.then(response => response.text())
|
262 |
+
.then(vttContent => {
|
263 |
+
const cues = parseVTT(vttContent);
|
264 |
+
|
265 |
+
while (track.cues.length > 0) {
|
266 |
+
track.removeCue(track.cues[0]);
|
267 |
+
}
|
268 |
+
|
269 |
+
cues.forEach(cue => {
|
270 |
+
const vttCue = new VTTCue(cue.start, cue.end, cue.text.trim());
|
271 |
+
track.addCue(vttCue);
|
272 |
+
});
|
273 |
+
})
|
274 |
+
.catch(error => console.error('Error updating captions:', error));
|
275 |
+
}
|
276 |
+
|
277 |
+
function refreshCaptions() {
|
278 |
+
if (currentLanguage) {
|
279 |
+
var track = Array.from(video.textTracks).find(t => t.language === currentLanguage);
|
280 |
+
if (track) {
|
281 |
+
var baseUrl = "{{ url_for('serve_file', filename='') }}"; // This will give the base URL for the 'serve_file' endpoint.
|
282 |
+
var fileName = "captions_" + currentLanguage + ".vtt";
|
283 |
+
var captionUrl = baseUrl + fileName;
|
284 |
+
updateTrackCues(track, captionUrl);
|
285 |
+
}
|
286 |
+
}
|
287 |
+
}
|
288 |
+
|
289 |
+
function setupCaptions() {
|
290 |
+
var tracks = player.getTracksFor('text');
|
291 |
+
console.log("Available text tracks:", tracks);
|
292 |
+
|
293 |
+
if (tracks.length > 0) {
|
294 |
+
captionSelect.innerHTML = '';
|
295 |
+
tracks.forEach(function (track) {
|
296 |
+
var option = document.createElement('option');
|
297 |
+
option.value = track.lang;
|
298 |
+
option.text = track.lang;
|
299 |
+
captionSelect.appendChild(option);
|
300 |
+
});
|
301 |
+
loadCaptions(tracks[0].lang);
|
302 |
+
} else {
|
303 |
+
loadCaptions(currentLanguage);
|
304 |
+
}
|
305 |
+
}
|
306 |
+
|
307 |
+
captionSelect.addEventListener("change", function () {
|
308 |
+
loadCaptions(this.value);
|
309 |
+
});
|
310 |
+
|
311 |
+
// FLV Stream Checker
|
312 |
+
$('#flvForm').submit(function(e) {
|
313 |
+
$('#waitingMessage').text("Checking the url...");
|
314 |
+
e.preventDefault();
|
315 |
+
$.ajax({
|
316 |
+
url: '/terminate',
|
317 |
+
method: 'POST',
|
318 |
+
data: null,
|
319 |
+
});
|
320 |
+
if (player.isReady()) {
|
321 |
+
player.pause()
|
322 |
+
}
|
323 |
+
$.ajax({
|
324 |
+
url: '/check_flv',
|
325 |
+
method: 'POST',
|
326 |
+
data: { url: $('#flvInput').val(), model: $('#models').val() },
|
327 |
+
success: function(response) {
|
328 |
+
$('#waitingMessage').text(response.message);
|
329 |
+
if (response.status === 'success') {
|
330 |
+
waitingMessage.style.display = "block";
|
331 |
+
video.style.display = "none";
|
332 |
+
//initializePlayer();
|
333 |
+
//checkStreamAvailability();
|
334 |
+
}
|
335 |
+
},
|
336 |
+
error: function() {
|
337 |
+
$('#result').text('An error occurred');
|
338 |
+
}
|
339 |
+
});
|
340 |
+
});
|
341 |
+
// FLV Stream Checker
|
342 |
+
$('#terminateStreamForm').submit(function(e) {
|
343 |
+
e.preventDefault();
|
344 |
+
$.ajax({
|
345 |
+
url: '/terminate',
|
346 |
+
method: 'POST',
|
347 |
+
data: null,
|
348 |
+
});
|
349 |
+
if (player.isReady()) {
|
350 |
+
player.pause()
|
351 |
+
}
|
352 |
+
});
|
353 |
+
|
354 |
+
// Start checking for stream availability
|
355 |
+
//initializePlayer();
|
356 |
+
checkStreamAvailability();
|
357 |
+
|
358 |
+
// Log current live delay every 5 seconds
|
359 |
+
// setInterval(() => {
|
360 |
+
// var currentLiveDelay = player.duration() - player.time();
|
361 |
+
// console.log("Current live delay:", currentLiveDelay);
|
362 |
+
//}, 5000);
|
363 |
+
})();
|
364 |
+
</script>
|
365 |
+
</body>
|
366 |
+
</html>
|
translator.py
ADDED
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
import threading
|
3 |
+
import argparse
|
4 |
+
import fcntl
|
5 |
+
import select
|
6 |
+
import whisper
|
7 |
+
import ffmpeg
|
8 |
+
import signal
|
9 |
+
import numpy as np
|
10 |
+
import queue
|
11 |
+
import time
|
12 |
+
import webrtcvad
|
13 |
+
import collections
|
14 |
+
import os
|
15 |
+
from transformers import MarianMTModel, MarianTokenizer
|
16 |
+
|
17 |
+
# Global variables
|
18 |
+
rtmp_url = ""
|
19 |
+
dash_output_path = ""
|
20 |
+
segment_duration = 2
|
21 |
+
last_activity_time = 0.0
|
22 |
+
cleanup_threshold = 10 # seconds of inactivity before cleanup
|
23 |
+
start_time = 0.0
|
24 |
+
|
25 |
+
# Languages for translation (ISO 639-1 codes)
|
26 |
+
target_languages = ["es", "zh", "ru"] # Example: Spanish, Chinese, Russian
|
27 |
+
|
28 |
+
# Initialize Whisper model
|
29 |
+
whisper_model = {}
|
30 |
+
|
31 |
+
# Define Frame class
|
32 |
+
class Frame:
|
33 |
+
def __init__(self, data, timestamp, duration):
|
34 |
+
self.data = data
|
35 |
+
self.timestamp = timestamp
|
36 |
+
self.duration = duration
|
37 |
+
|
38 |
+
# Audio buffer and caption queues
|
39 |
+
audio_buffer = queue.Queue()
|
40 |
+
caption_queues = {lang: queue.Queue() for lang in target_languages + ["original", "en"]}
|
41 |
+
language_model_names = {
|
42 |
+
"es": "Helsinki-NLP/opus-mt-en-es",
|
43 |
+
"zh": "Helsinki-NLP/opus-mt-en-zh",
|
44 |
+
"ru": "Helsinki-NLP/opus-mt-en-ru",
|
45 |
+
}
|
46 |
+
translation_models = {}
|
47 |
+
tokenizers = {}
|
48 |
+
|
49 |
+
# Initialize VAD
|
50 |
+
vad = webrtcvad.Vad(3) # Aggressiveness mode 3 (most aggressive)
|
51 |
+
|
52 |
+
# Event to signal threads to stop
|
53 |
+
stop_event = threading.Event()
|
54 |
+
|
55 |
+
|
56 |
+
def transcode_rtmp_to_dash():
|
57 |
+
ffmpeg_command = [
|
58 |
+
"/opt/homebrew/bin/ffmpeg",
|
59 |
+
"-i", rtmp_url,
|
60 |
+
"-map", "0:v:0", "-map", "0:a:0",
|
61 |
+
"-c:v", "libx264", "-preset", "slow",
|
62 |
+
"-c:a", "aac", "-b:a", "128k",
|
63 |
+
"-f", "dash",
|
64 |
+
"-seg_duration", str(segment_duration),
|
65 |
+
"-use_timeline", "1",
|
66 |
+
"-use_template", "1",
|
67 |
+
"-init_seg_name", "init_$RepresentationID$.m4s",
|
68 |
+
"-media_seg_name", "chunk_$RepresentationID$_$Number%05d$.m4s",
|
69 |
+
"-adaptation_sets", "id=0,streams=v id=1,streams=a",
|
70 |
+
f"{dash_output_path}/manifest.mpd"
|
71 |
+
]
|
72 |
+
process = subprocess.Popen(ffmpeg_command)
|
73 |
+
while not stop_event.is_set():
|
74 |
+
time.sleep(1)
|
75 |
+
process.kill()
|
76 |
+
|
77 |
+
|
78 |
+
def capture_audio():
|
79 |
+
global last_activity_time
|
80 |
+
command = [
|
81 |
+
'/opt/homebrew/bin/ffmpeg',
|
82 |
+
'-i', rtmp_url,
|
83 |
+
'-acodec', 'pcm_s16le',
|
84 |
+
'-ar', '16000',
|
85 |
+
'-ac', '1',
|
86 |
+
'-f', 's16le',
|
87 |
+
'-'
|
88 |
+
]
|
89 |
+
|
90 |
+
sample_rate = 16000
|
91 |
+
frame_duration_ms = 30
|
92 |
+
sample_width = 2 # Only 16-bit audio supported
|
93 |
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
94 |
+
# Set stdout to non-blocking mode
|
95 |
+
fd = process.stdout.fileno()
|
96 |
+
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
|
97 |
+
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
|
98 |
+
|
99 |
+
frame_size = int(sample_rate * frame_duration_ms / 1000) * sample_width
|
100 |
+
frame_count = 0
|
101 |
+
while not stop_event.is_set():
|
102 |
+
ready, _, _ = select.select([process.stdout], [], [], 0.1)
|
103 |
+
if ready:
|
104 |
+
try:
|
105 |
+
in_bytes = os.read(fd, frame_size)
|
106 |
+
if not in_bytes:
|
107 |
+
break
|
108 |
+
if len(in_bytes) < frame_size:
|
109 |
+
in_bytes += b'\x00' * (frame_size - len(in_bytes))
|
110 |
+
last_activity_time = time.time()
|
111 |
+
timestamp = frame_count * frame_duration_ms * 0.85
|
112 |
+
frame = Frame(np.frombuffer(in_bytes, np.int16), timestamp, frame_duration_ms)
|
113 |
+
audio_buffer.put(frame)
|
114 |
+
frame_count += 1
|
115 |
+
except BlockingIOError:
|
116 |
+
continue
|
117 |
+
else:
|
118 |
+
time.sleep(0.01)
|
119 |
+
process.kill()
|
120 |
+
|
121 |
+
def frames_to_numpy(frames):
|
122 |
+
all_frames = np.concatenate([f.data for f in frames])
|
123 |
+
float_samples = all_frames.astype(np.float32) / np.iinfo(np.int16).max
|
124 |
+
return float_samples
|
125 |
+
|
126 |
+
def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
|
127 |
+
num_padding_frames = int(padding_duration_ms / frame_duration_ms)
|
128 |
+
ring_buffer = collections.deque(maxlen=num_padding_frames)
|
129 |
+
triggered = False
|
130 |
+
|
131 |
+
for frame in frames:
|
132 |
+
if len(frame.data) != int(sample_rate * (frame_duration_ms / 1000.0)):
|
133 |
+
print(f"Skipping frame with incorrect size: {len(frame.data)} samples", flush=True)
|
134 |
+
continue
|
135 |
+
|
136 |
+
is_speech = vad.is_speech(frame.data.tobytes(), sample_rate)
|
137 |
+
|
138 |
+
if not triggered:
|
139 |
+
ring_buffer.append((frame, is_speech))
|
140 |
+
num_voiced = len([f for f, speech in ring_buffer if speech])
|
141 |
+
if num_voiced > 0.8 * ring_buffer.maxlen:
|
142 |
+
triggered = True
|
143 |
+
for f, s in ring_buffer:
|
144 |
+
yield f
|
145 |
+
ring_buffer.clear()
|
146 |
+
else:
|
147 |
+
yield frame
|
148 |
+
ring_buffer.append((frame, is_speech))
|
149 |
+
num_unvoiced = len([f for f, speech in ring_buffer if not speech])
|
150 |
+
if num_unvoiced > 0.8 * ring_buffer.maxlen:
|
151 |
+
triggered = False
|
152 |
+
yield None
|
153 |
+
ring_buffer.clear()
|
154 |
+
for f, s in ring_buffer:
|
155 |
+
yield f
|
156 |
+
ring_buffer.clear()
|
157 |
+
|
158 |
+
def process_audio():
|
159 |
+
global last_activity_time
|
160 |
+
frames = []
|
161 |
+
buffer_duration_ms = 1500 # About 1.5 seconds of audio
|
162 |
+
|
163 |
+
while not stop_event.is_set():
|
164 |
+
while not audio_buffer.empty():
|
165 |
+
frame = audio_buffer.get(timeout=5.0)
|
166 |
+
frames.append(frame)
|
167 |
+
|
168 |
+
if frames and sum(f.duration for f in frames) >= buffer_duration_ms:
|
169 |
+
vad_frames = list(vad_collector(16000, 30, 300, vad, frames))
|
170 |
+
|
171 |
+
if vad_frames:
|
172 |
+
audio_segment = [f for f in vad_frames if f is not None]
|
173 |
+
if audio_segment:
|
174 |
+
# Transcribe the original audio
|
175 |
+
result = whisper_model.transcribe(frames_to_numpy(audio_segment))
|
176 |
+
|
177 |
+
if result["text"]:
|
178 |
+
timestamp = audio_segment[0].timestamp
|
179 |
+
caption_queues["original"].put((timestamp, result["text"]))
|
180 |
+
|
181 |
+
english_translation = whisper_model.transcribe(frames_to_numpy(audio_segment), task="translate")
|
182 |
+
caption_queues["en"].put((timestamp, english_translation["text"]))
|
183 |
+
|
184 |
+
# Translate to target languages
|
185 |
+
for lang in target_languages:
|
186 |
+
tokenizer = tokenizers[lang]
|
187 |
+
translation_model = translation_models[lang]
|
188 |
+
|
189 |
+
inputs = tokenizer.encode(english_translation["text"], return_tensors="pt", padding=True, truncation=True)
|
190 |
+
translated_tokens = translation_model.generate(inputs)
|
191 |
+
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
|
192 |
+
caption_queues[lang].put((timestamp, translated_text))
|
193 |
+
|
194 |
+
frames = []
|
195 |
+
|
196 |
+
time.sleep(0.01)
|
197 |
+
|
198 |
+
def write_captions(lang):
|
199 |
+
os.makedirs(dash_output_path, exist_ok=True)
|
200 |
+
filename = f"{dash_output_path}/captions_{lang}.vtt"
|
201 |
+
|
202 |
+
with open(filename, "w", encoding="utf-8") as f:
|
203 |
+
f.write("WEBVTT\n\n")
|
204 |
+
|
205 |
+
last_end_time = None
|
206 |
+
|
207 |
+
while not stop_event.is_set():
|
208 |
+
if not caption_queues[lang].empty():
|
209 |
+
timestamp, text = caption_queues[lang].get()
|
210 |
+
start_time = format_time(timestamp / 1000) # Convert ms to seconds
|
211 |
+
end_time = format_time((timestamp + 5000) / 1000) # Assume 5-second duration for each caption
|
212 |
+
|
213 |
+
# Adjust the previous caption's end time if necessary
|
214 |
+
if last_end_time and start_time != last_end_time:
|
215 |
+
adjust_previous_caption(filename, last_end_time, start_time)
|
216 |
+
|
217 |
+
# Write the new caption
|
218 |
+
with open(filename, "a", encoding="utf-8") as f:
|
219 |
+
f.write(f"{start_time} --> {end_time}\n")
|
220 |
+
f.write(f"{text}\n\n")
|
221 |
+
f.flush()
|
222 |
+
|
223 |
+
last_end_time = end_time
|
224 |
+
|
225 |
+
time.sleep(0.1)
|
226 |
+
|
227 |
+
def adjust_previous_caption(filename, old_end_time, new_end_time):
|
228 |
+
with open(filename, "r", encoding="utf-8") as f:
|
229 |
+
lines = f.readlines()
|
230 |
+
|
231 |
+
for i in range(len(lines) - 1, -1, -1):
|
232 |
+
if "-->" in lines[i]:
|
233 |
+
parts = lines[i].split("-->")
|
234 |
+
if parts[1].strip() == old_end_time:
|
235 |
+
lines[i] = f"{parts[0].strip()} --> {new_end_time}\n"
|
236 |
+
break
|
237 |
+
|
238 |
+
with open(filename, "w", encoding="utf-8") as f:
|
239 |
+
f.writelines(lines)
|
240 |
+
|
241 |
+
def format_time(seconds):
|
242 |
+
hours, remainder = divmod(seconds, 3600)
|
243 |
+
minutes, seconds = divmod(remainder, 60)
|
244 |
+
return f"{int(hours):02d}:{int(minutes):02d}:{seconds:06.3f}"
|
245 |
+
|
246 |
+
def signal_handler(signum, frame):
|
247 |
+
print(f"Received signal {signum}. Cleaning up and exiting...")
|
248 |
+
# Signal all threads to stop
|
249 |
+
stop_event.set()
|
250 |
+
|
251 |
+
def cleanup():
|
252 |
+
global last_activity_time
|
253 |
+
while not stop_event.is_set():
|
254 |
+
current_time = time.time()
|
255 |
+
if last_activity_time != 0.0 and current_time - last_activity_time > cleanup_threshold:
|
256 |
+
print("No activity detected for 10 seconds. Cleaning up...", flush=True)
|
257 |
+
|
258 |
+
# Signal all threads to stop
|
259 |
+
stop_event.set()
|
260 |
+
break
|
261 |
+
|
262 |
+
time.sleep(1) # Check for inactivity every second
|
263 |
+
# Clear caption queues
|
264 |
+
for lang in target_languages + ["original", "en"]:
|
265 |
+
while not caption_queues[lang].empty():
|
266 |
+
caption_queues[lang].get()
|
267 |
+
|
268 |
+
# Delete DASH output files
|
269 |
+
for root, dirs, files in os.walk(dash_output_path, topdown=False):
|
270 |
+
for name in files:
|
271 |
+
os.remove(os.path.join(root, name))
|
272 |
+
for name in dirs:
|
273 |
+
os.rmdir(os.path.join(root, name))
|
274 |
+
|
275 |
+
print("Cleanup completed.", flush=True)
|
276 |
+
|
277 |
+
|
278 |
+
if __name__ == "__main__":
|
279 |
+
# Get RTMP URL and DASH output path from user input
|
280 |
+
signal.signal(signal.SIGTERM, signal_handler)
|
281 |
+
parser = argparse.ArgumentParser(description="Process audio for translation.")
|
282 |
+
parser.add_argument('--rtmp_url', help='rtmp url')
|
283 |
+
parser.add_argument('--output_directory', help='Dash directory')
|
284 |
+
parser.add_argument('--model', help='Whisper model size: base|small|medium|large|large-v2')
|
285 |
+
start_time = time.time()
|
286 |
+
|
287 |
+
args = parser.parse_args()
|
288 |
+
rtmp_url = args.rtmp_url
|
289 |
+
dash_output_path = args.output_directory
|
290 |
+
model_size = args.model
|
291 |
+
|
292 |
+
print(f"RTMP URL: {rtmp_url}")
|
293 |
+
print(f"DASH output path: {dash_output_path}")
|
294 |
+
print(f"Model: {dash_output_path}")
|
295 |
+
|
296 |
+
print("Downloading models\n")
|
297 |
+
print("Whisper\n")
|
298 |
+
whisper_model = whisper.load_model(model_size, download_root="/tmp/model/") # Adjust model size as necessary
|
299 |
+
|
300 |
+
for lang, model_name in language_model_names.items():
|
301 |
+
print(f"Lang: {lang}, model: {model_name}\n")
|
302 |
+
tokenizers[lang] = MarianTokenizer.from_pretrained(model_name)
|
303 |
+
translation_models[lang] = MarianMTModel.from_pretrained(model_name)
|
304 |
+
|
305 |
+
# Start RTMP to DASH transcoding in a separate thread
|
306 |
+
transcode_thread = threading.Thread(target=transcode_rtmp_to_dash)
|
307 |
+
transcode_thread.start()
|
308 |
+
|
309 |
+
# Start audio capture in a separate thread
|
310 |
+
audio_capture_thread = threading.Thread(target=capture_audio)
|
311 |
+
audio_capture_thread.start()
|
312 |
+
|
313 |
+
# Start audio processing in a separate thread
|
314 |
+
audio_processing_thread = threading.Thread(target=process_audio)
|
315 |
+
audio_processing_thread.start()
|
316 |
+
|
317 |
+
# Start caption writing threads for original and all target languages
|
318 |
+
caption_threads = []
|
319 |
+
for lang in target_languages + ["original", "en"]:
|
320 |
+
caption_thread = threading.Thread(target=write_captions, args=(lang,))
|
321 |
+
caption_threads.append(caption_thread)
|
322 |
+
caption_thread.start()
|
323 |
+
|
324 |
+
# Start the cleanup thread
|
325 |
+
cleanup_thread = threading.Thread(target=cleanup)
|
326 |
+
cleanup_thread.start()
|
327 |
+
|
328 |
+
# Wait for all threads to complete
|
329 |
+
print("Join transcode", flush=True)
|
330 |
+
if transcode_thread.is_alive():
|
331 |
+
transcode_thread.join()
|
332 |
+
print("Join sudio capture", flush=True)
|
333 |
+
if audio_capture_thread.is_alive():
|
334 |
+
audio_capture_thread.join()
|
335 |
+
print("Join audio processing", flush=True)
|
336 |
+
if audio_processing_thread.is_alive():
|
337 |
+
audio_processing_thread.join()
|
338 |
+
for thread in caption_threads:
|
339 |
+
if thread.is_alive():
|
340 |
+
thread.join()
|
341 |
+
print("Join clenaup", flush=True)
|
342 |
+
if cleanup_thread.is_alive():
|
343 |
+
cleanup_thread.join()
|
344 |
+
|
345 |
+
print("All threads have been stopped and cleaned up.")
|
346 |
+
exit(0)
|
validator.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request, jsonify, send_from_directory
|
2 |
+
import subprocess
|
3 |
+
import json
|
4 |
+
import psutil
|
5 |
+
import time
|
6 |
+
import signal
|
7 |
+
import os
|
8 |
+
from flask_cors import CORS
|
9 |
+
|
10 |
+
app = Flask(__name__)
|
11 |
+
CORS(app) # This will allow all domains to make requests to your server
|
12 |
+
|
13 |
+
script_process = None
|
14 |
+
SCRIPT_NAME = 'translator.py'
|
15 |
+
|
16 |
+
def find_process_by_name(name):
|
17 |
+
for process in psutil.process_iter(['pid', 'name', 'cmdline']):
|
18 |
+
cmdline = process.info['cmdline']
|
19 |
+
if cmdline and any(name in arg for arg in cmdline):
|
20 |
+
return process
|
21 |
+
return None
|
22 |
+
|
23 |
+
def terminate_script():
|
24 |
+
global script_process
|
25 |
+
process = find_process_by_name(SCRIPT_NAME)
|
26 |
+
|
27 |
+
if process:
|
28 |
+
print(f"Terminating existing script process (PID: {process.pid})")
|
29 |
+
process.send_signal(signal.SIGTERM)
|
30 |
+
try:
|
31 |
+
process.wait(timeout=20) # Wait up to 10 seconds for the process to terminate
|
32 |
+
except psutil.TimeoutExpired:
|
33 |
+
print(f"Process {process.pid} did not terminate in time, forcing...")
|
34 |
+
process.kill() # Force kill if it doesn't terminate
|
35 |
+
|
36 |
+
# Double-check if the process is really terminated
|
37 |
+
if not find_process_by_name(SCRIPT_NAME):
|
38 |
+
print(f"Process {SCRIPT_NAME} successfully terminated")
|
39 |
+
else:
|
40 |
+
print(f"Warning: Process {SCRIPT_NAME} could not be terminated")
|
41 |
+
else:
|
42 |
+
print(f"No running process found with name: {SCRIPT_NAME}")
|
43 |
+
|
44 |
+
script_process = None
|
45 |
+
|
46 |
+
@app.route('/', methods=['GET'])
|
47 |
+
def index():
|
48 |
+
return render_template('tiktok_player.html')
|
49 |
+
|
50 |
+
@app.route('/terminate', methods=['POST'])
|
51 |
+
def terminate():
|
52 |
+
terminate_script()
|
53 |
+
return jsonify({'status': 'success', 'message': 'Stream stopped'})
|
54 |
+
|
55 |
+
@app.route('/stream/<path:filename>')
|
56 |
+
def serve_file(filename):
|
57 |
+
return send_from_directory('/tmp/dash/test_stream', filename)
|
58 |
+
|
59 |
+
@app.route('/check_flv', methods=['POST'])
|
60 |
+
def check_flv():
|
61 |
+
global script_process
|
62 |
+
flv_url = request.form['url']
|
63 |
+
model = request.form['model']
|
64 |
+
try:
|
65 |
+
# Use ffprobe to check the FLV stream
|
66 |
+
result = subprocess.run([
|
67 |
+
'ffprobe',
|
68 |
+
'-v', 'quiet',
|
69 |
+
'-print_format', 'json',
|
70 |
+
'-show_streams',
|
71 |
+
flv_url
|
72 |
+
], capture_output=True, text=True, timeout=10)
|
73 |
+
|
74 |
+
if result.returncode == 0:
|
75 |
+
# Parse the JSON output
|
76 |
+
probe_data = json.loads(result.stdout)
|
77 |
+
|
78 |
+
# Check if there are any streams in the output
|
79 |
+
if 'streams' in probe_data and len(probe_data['streams']) > 0:
|
80 |
+
# Stream is valid
|
81 |
+
# Terminate existing script if running
|
82 |
+
terminate_script()
|
83 |
+
|
84 |
+
# Start new script
|
85 |
+
new_process = subprocess.Popen(['python', '/Users/bytedance/source/video_translator/src/flask_app'
|
86 |
+
'/translator.py', '--rtmp_url', flv_url,
|
87 |
+
'--output_directory', '/tmp/dash/test_stream/', '--model', model])
|
88 |
+
script_process = psutil.Process(new_process.pid)
|
89 |
+
|
90 |
+
return jsonify({'status': 'success', 'message': 'Buffering...'})
|
91 |
+
else:
|
92 |
+
return jsonify({'status': 'error', 'message': 'No valid streams found in the FLV'})
|
93 |
+
else:
|
94 |
+
# Stream is invalid
|
95 |
+
return jsonify({'status': 'error', 'message': 'Invalid FLV stream'})
|
96 |
+
except subprocess.TimeoutExpired:
|
97 |
+
return jsonify({'status': 'error', 'message': 'Timeout while checking FLV stream'})
|
98 |
+
except Exception as e:
|
99 |
+
return jsonify({'status': 'error', 'message': f'Error: {str(e)}'})
|
100 |
+
|
101 |
+
if __name__ == '__main__':
|
102 |
+
app.run(host='0.0.0.0', port=7860)
|