Spaces:
Running
Running
File size: 8,306 Bytes
eee16bc a6f672d eee16bc a6f672d eee16bc f465388 eee16bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 |
const express = require('express');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path');
const crypto = require('crypto');
const { spawn } = require('child_process');
const fetch = require('node-fetch');
const { v4: uuidv4 } = require('uuid');
const cors = require('cors');
const {generateImage} = require('./image.js')
const app = express();
app.use(express.json()); // To parse JSON payloads
app.use(cors()); // Enable CORS for all routes
require('dotenv').config()
const MEDIA_FOLDER = `${process.env.STATIC_SITE_ROOT}/media`
const OPENAI_API_KEY = process.env.OPENAI_API_KEY
// Ensure the MEDIA_FOLDER directory exists
async function ensureDir(dir) {
try {
await fsp.mkdir(dir, { recursive: true });
} catch (err) {
if (err.code !== 'EEXIST') throw err;
}
}
(async () => {
await ensureDir(MEDIA_FOLDER);
})();
const audioCache = {}; // { [scriptHash]: audioFilePath }
function parseScript(script) {
const segments = script.trim().split('\n\n');
const parsedSegments = [];
for (const segment of segments) {
const [speaker_name, ...contentParts] = segment.split(': ');
const content = contentParts.join(': ');
parsedSegments.push({ speaker_name, content });
}
return parsedSegments;
}
async function runOpenAITTS(text, audioFilename, voiceId, ttsModel='tts-1') {
if (!OPENAI_API_KEY) {
throw new Error('OPENAI_API_KEY is not set.');
}
// Replace the URL below with the actual OpenAI TTS endpoint if available
const response = await fetch('https://api.openai.com/v1/audio/speech', {
method: 'POST',
headers: {
Authorization: `Bearer ${OPENAI_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: ttsModel,
voice: voiceId,
input: text,
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenAI TTS request failed: ${errorText}`);
}
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
await fsp.writeFile(audioFilename, buffer);
}
async function generateAudio(speakerName, content) {
const voiceLookupTable = {
DEFAULT: 'alloy',
ALICE: 'shimmer',
BOB: 'echo',
JENNIFER: 'nova',
PROFESSOR: 'fable',
MALE_GUEST: 'onyx',
FEMALE_GUEST: 'alloy',
};
const actualVoiceId = voiceLookupTable[speakerName] || voiceLookupTable['DEFAULT'];
const fileName = path.join(MEDIA_FOLDER, `${uuidv4()}.mp3`);
await runOpenAITTS(content, fileName, actualVoiceId, 'tts-1-hd');
return fileName;
}
function concatenateAudioFiles(audioFiles, outputFilePath) {
return new Promise((resolve, reject) => {
if (audioFiles.length === 1) {
// If only one audio file, copy it directly
fs.copyFileSync(audioFiles[0], outputFilePath);
resolve();
return;
}
const listContent = audioFiles.join('|');
// Run FFmpeg with the concat protocol
// ffmpeg -i "concat:file1.mp3|file2.mp3" -acodec copy output.mp3
const ffmpeg = spawn('ffmpeg', [
'-i',
`concat:${listContent}`,
'-acodec',
'copy',
outputFilePath,
]);
ffmpeg.stdout.on('data', (data) => {
console.log(`stdout: ${data}`);
});
ffmpeg.stderr.on('data', (data) => {
console.error(`stderr: ${data}`);
});
ffmpeg.on('close', (code) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`FFmpeg failed with exit code ${code}`));
}
});
});
}
// Existing GET endpoint (unchanged)
app.get('/list-models', (req, res) => {
// Placeholder for listing models, replace with actual implementation if needed
res.json(['Model 1', 'Model 2', 'Model 3']);
});
// Existing GET endpoint (unchanged)
app.get('/generate/speech', async (req, res) => {
try {
const apiKey = req.query.api_key || 'their_api_key';
if (apiKey !== 'their_api_key') {
// Replace "their_api_key" with your actual method of managing API keys
res.status(401).send('Unauthorized');
return;
}
const script = req.query.payload;
if (!script) {
res.status(400).send('Bad Request: Missing payload');
return;
}
const hash = crypto.createHash('sha1');
hash.update(script);
const scriptHash = hash.digest('hex');
if (audioCache[scriptHash]) {
const filePath = audioCache[scriptHash];
res.sendFile(path.resolve(filePath), { headers: { 'Content-Type': 'audio/mpeg' } });
return;
}
const parsedSegments = parseScript(script);
const audioSegments = [];
for (const segment of parsedSegments) {
const audioPath = await generateAudio(segment.speaker_name, segment.content);
audioSegments.push(audioPath);
}
if (audioSegments.length === 0) {
res.status(400).send('No audio generated');
return;
}
// Concatenate audio files into one using FFmpeg
const combinedAudioPath = path.join(MEDIA_FOLDER, `combined_${uuidv4()}.mp3`);
await concatenateAudioFiles(audioSegments, combinedAudioPath);
audioCache[scriptHash] = combinedAudioPath;
res.sendFile(path.resolve(combinedAudioPath), { headers: { 'Content-Type': 'audio/mpeg' } });
} catch (error) {
console.error('Error generating speech:', error);
res.status(500).send('Internal Server Error');
}
});
// New POST endpoint with SSE support
app.post('/generate/speech/stream', async (req, res) => {
try {
const apiKey = req.query.api_key || 'their_api_key';
if (apiKey !== 'their_api_key') {
// Replace "their_api_key" with your actual method of managing API keys
res.status(401).send('Unauthorized');
return;
}
const script = req.body.payload;
if (!script) {
res.status(400).send('Bad Request: Missing payload');
return;
}
// Set headers for SSE
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
const hash = crypto.createHash('sha1');
hash.update(script);
const scriptHash = hash.digest('hex');
if (audioCache[scriptHash]) {
// If audio is cached, send the final SSE with the combined audio URL
const filePath = audioCache[scriptHash];
console.log(filePath)
res.write(`event: audio_complete\ndata: ${req.protocol}://${req.get('host')}/${filePath}\n\n`);
res.end();
return;
}
const parsedSegments = parseScript(script);
const audioSegments = [];
for (const segment of parsedSegments) {
const audioPath = await generateAudio(segment.speaker_name, segment.content);
audioSegments.push(audioPath);
// Send SSE with the URL of the generated audio segment
res.write(`event: audio_segment\ndata: ${req.protocol}://${req.get('host')}/${audioPath}\n\n`);
}
if (audioSegments.length === 0) {
res.write(`event: error\ndata: No audio generated\n\n`);
res.end();
return;
}
// Concatenate audio files into one using FFmpeg
const combinedAudioPath = path.join(MEDIA_FOLDER, `combined_${uuidv4()}.mp3`);
await concatenateAudioFiles(audioSegments, combinedAudioPath);
audioCache[scriptHash] = combinedAudioPath;
console.log(combinedAudioPath)
// Send SSE with the URL of the combined audio
res.write(`event: audio_complete\ndata: ${req.protocol}://${req.get('host')}/${combinedAudioPath}\n\n`);
res.end();
} catch (error) {
console.error('Error generating speech:', error);
res.write(`event: error\ndata: Internal Server Error\n\n`);
res.end();
}
});
//for prompt-in-url: <img src="https://yourserver.com/generate/image?prompt=A%20large%20hamster&width=1024&height=1024"
app.get('/generate/image', async (req, res) => {
const responseFormat = req.query.response_format || 'image';
await generateImage(req.query, res, responseFormat)
});
app.post("/generate/image", async(req, res)=> {
const responseFormat = req.query.response_format || 'url';
await generateImage(req.body, res, responseFormat)
})
// Client webpages and storage for generated content
app.use('/', express.static(process.env.STATIC_SITE_ROOT));
const port = 7860;
app.listen(port, () => {
console.log(`Listening on port ${port}`);
});
|