import { join } from "node:path" import { ClapProject, ClapSegmentCategory } from "@aitube/clap" import { deleteFilesWithName, getRandomDirectory, writeBase64ToFile } from "@aitube/io" import { concatenateAudio, concatenateVideos, concatenateVideosWithAudio, defaultExportFormat, type SupportedExportFormat, type ConcatenateAudioOutput, getMediaInfo // } from "@aitube/ffmpeg" } from "./bug-in-bun/aitube_ffmpeg" import { clapWithStoryboardsToVideoFile } from "./core/exporters/clapWithStoryboardsToVideoFile" import { clapWithVideosToVideoFile } from "./core/exporters/clapWithVideosToVideoFile" import { extractBase64 } from "@aitube/encoders" /** * Generate a .mp4 video inside a directory (if none is provided, it will be created in /tmp) * * @param clap * @returns file path to the final .mp4 */ export async function clapToTmpVideoFilePath({ clap, format = defaultExportFormat, outputDir = "", clearTmpFilesAtEnd = false }: { clap: ClapProject format?: SupportedExportFormat outputDir?: string // if you leave this to false, you will have to clear files yourself // (eg. after sending the final video file over) clearTmpFilesAtEnd?: boolean }): Promise<{ tmpWorkDir: string outputFilePath: string }> { // in case we have an issue with the format if (format !== "mp4" && format !== "webm") { format = "mp4" } outputDir = outputDir || (await getRandomDirectory()) const videoSegments = clap.segments.filter(s => s.category === ClapSegmentCategory.VIDEO && s.assetUrl.startsWith("data:video/")) const storyboardSegments = clap.segments.filter(s => s.category === ClapSegmentCategory.STORYBOARD && s.assetUrl.startsWith("data:image/")) const canUseVideos = videoSegments.length > 0 const canUseStoryboards = !canUseVideos && storyboardSegments.length > 0 // we count the duration of the whole video let totalDurationInMs = 0 clap.segments.forEach(s => { if (s.endTimeInMs > totalDurationInMs) { totalDurationInMs = s.endTimeInMs } }) let videoFilePaths: string[] = [] // two possibilities: // we can either generate from the video files, or from the storyboards // the storyboard video will be a bit more boring, but at least it should process faster if (canUseVideos) { const concatenatedData = await clapWithVideosToVideoFile({ clap, videoSegments, outputDir, }) // console.log(`clapToTmpVideoFilePath: called clapWithVideosToVideoFile, got concatenatedData = ${JSON.stringify(concatenatedData, null, 2)}`) videoFilePaths = concatenatedData.videoFilePaths } else if (canUseStoryboards) { const concatenatedData = await clapWithStoryboardsToVideoFile({ clap, storyboardSegments, outputDir, }) // console.log(`clapToTmpVideoFilePath: called clapWithStoryboardsToVideoFile, got concatenatedData = ${JSON.stringify(concatenatedData, null, 2)}`) videoFilePaths = concatenatedData.videoFilePaths } else { throw new Error(`the provided Clap doesn't contain any video or storyboard`) } console.log(`clapToTmpVideoFilePath: calling concatenateVideos over ${videoFilePaths.length} video chunks: ${JSON.stringify(videoFilePaths, null, 2)}\nconcatenateVideos(${JSON.stringify({ videoFilePaths, output: join(outputDir, `tmp_asset_concatenated_videos.mp4`) }, null, 2)})`) const concatenatedVideosNoMusic = await concatenateVideos({ videoFilePaths, output: join(outputDir, `tmp_asset_concatenated_videos.mp4`) }) console.log(`clapToTmpVideoFilePath: concatenatedVideosNoMusic`, concatenatedVideosNoMusic) const audioTracks: string[] = [] const musicSegments = clap.segments.filter(s => s.category === ClapSegmentCategory.MUSIC && s.assetUrl.startsWith("data:audio/") ) console.log(`clapToTmpVideoFilePath: got ${musicSegments.length} music segments in total`) // note: once we start with a certain type eg. mp3, there is no going to back // another format like wav, we can't concatenate them together (well, not yet) let detectedMusicTrackFormat = '' // we count how much music has been generated // if it is not enough to fill the full video, we will loop it (using cross-fading) let availableMusicDurationInMs = 0 for (const segment of musicSegments) { const analysis = extractBase64(segment.assetUrl) if (!detectedMusicTrackFormat) { detectedMusicTrackFormat = analysis.extension } else if (detectedMusicTrackFormat !== analysis.extension) { throw new Error(`fatal error: concatenating a mixture of ${detectedMusicTrackFormat} and ${analysis.extension} tracks isn't supported yet`) } const { durationInMs, hasAudio } = await getMediaInfo(segment.assetUrl) // we have to skip silent music tracks if (!hasAudio) { console.log(`skipping a silent music track`) continue } const newTrackFileName = await writeBase64ToFile( segment.assetUrl, join(outputDir, `tmp_asset_${segment.id}.${analysis.extension}`) ) audioTracks.push(newTrackFileName) availableMusicDurationInMs += durationInMs } let concatenatedAudio: ConcatenateAudioOutput | undefined = undefined if (audioTracks.length > 0) { console.log(`clapToTmpVideoFilePath: calling concatenateAudio over ${audioTracks.length} audio tracks`) if (!detectedMusicTrackFormat) { throw new Error(`uh that's weird, we couldn't detect the audio type`) } const availableMusicTracks = [...audioTracks] // if we don't have enough music audio content while (availableMusicDurationInMs < totalDurationInMs) { let trackToUse = availableMusicTracks.shift() // abort if there are no available tracks (for some reason) if (!trackToUse) { break } availableMusicTracks.push(trackToUse) // we artificially duplicate it (note: this will be cross-faded) const { durationInMs } = await getMediaInfo(trackToUse) // let's abord if we have bad data if (!durationInMs || durationInMs < 1000) { break } audioTracks.push(trackToUse) availableMusicDurationInMs += durationInMs } concatenatedAudio = await concatenateAudio({ output: join(outputDir, `tmp_asset_concatenated_audio.${detectedMusicTrackFormat}`), audioTracks, crossfadeDurationInSec: 2, // 2 seconds outputFormat: detectedMusicTrackFormat }) console.log(`clapToTmpVideoFilePath: concatenatedAudio = ${concatenatedAudio}`) } console.log(`calling concatenateVideosWithAudio: `, { output: join(outputDir, `final_video.${format}`), format, audioFilePath: concatenatedAudio ? concatenatedAudio?.filepath : undefined, videoFilePaths: [concatenatedVideosNoMusic.filepath], // videos are silent, so they can stay at 0 videoTracksVolume: concatenatedAudio ? 0.85 : 1.0, audioTrackVolume: concatenatedAudio ? 0.15 : 0.0, // let's keep the music volume low }) const finalFilePathOfVideoWithMusic = await concatenateVideosWithAudio({ output: join(outputDir, `final_video.${format}`), format, audioFilePath: concatenatedAudio ? concatenatedAudio?.filepath : undefined, videoFilePaths: [concatenatedVideosNoMusic.filepath], // videos are silent, so they can stay at 0 videoTracksVolume: concatenatedAudio ? 0.85 : 1.0, audioTrackVolume: concatenatedAudio ? 0.15 : 0.0, // let's keep the music volume low }) console.log(`clapToTmpVideoFilePath: finalFilePathOfVideoWithMusic = ${finalFilePathOfVideoWithMusic}`) if (clearTmpFilesAtEnd) { // we delete all the temporary assets console.log(`clapToTmpVideoFilePath: calling deleteFilesWithName(${outputDir}, 'tmp_asset_')`) await deleteFilesWithName(outputDir, `tmp_asset_`) } console.log(`clapToTmpVideoFilePath: returning ${JSON.stringify( { tmpWorkDir: outputDir, outputFilePath: finalFilePathOfVideoWithMusic }, null, 2)}`) return { tmpWorkDir: outputDir, outputFilePath: finalFilePathOfVideoWithMusic } }