File size: 8,044 Bytes
2cae2a9
 
93a2c43
3165afb
 
 
 
 
 
 
d5d9687
 
3165afb
 
2cae2a9
3165afb
 
d5d9687
4cb7ad9
2cae2a9
2ed47da
2cae2a9
 
 
 
46fcec6
 
4cb7ad9
46fcec6
 
 
 
2cae2a9
4cb7ad9
46fcec6
 
 
 
 
 
 
 
 
 
1083ad0
 
 
 
 
46fcec6
2cae2a9
93a2c43
 
2ed47da
 
 
 
d5d9687
 
 
 
 
 
 
 
1f38dc1
 
2ed47da
 
 
 
1f38dc1
2ed47da
 
 
 
b80b9f7
26058e7
b80b9f7
1f38dc1
2ed47da
1f38dc1
2ed47da
 
 
 
b80b9f7
26058e7
b80b9f7
1f38dc1
2ed47da
 
2cae2a9
 
1083ad0
 
 
 
6349b58
46fcec6
2cae2a9
46fcec6
2cae2a9
 
6349b58
 
2cae2a9
 
46fcec6
93a2c43
46fcec6
 
6349b58
 
 
d5d9687
 
 
 
 
 
 
 
2cae2a9
d5d9687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2cae2a9
d5d9687
 
 
 
2cae2a9
 
fc91769
 
 
 
 
d5d9687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc91769
d5d9687
fc91769
d5d9687
 
fc91769
 
 
 
1083ad0
 
 
 
 
 
 
 
 
 
46fcec6
4cb7ad9
 
fc91769
46fcec6
2cae2a9
fc91769
 
2cae2a9
1083ad0
 
6349b58
46fcec6
 
1628a6d
46fcec6
 
2cae2a9
6349b58
 
 
 
 
46fcec6
 
 
 
2cae2a9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import { join } from "node:path"

import { ClapProject, ClapSegmentCategory } from "@aitube/clap"
import { deleteFilesWithName, getRandomDirectory, writeBase64ToFile } from "@aitube/io"
import {
  concatenateAudio,
  concatenateVideos,
  concatenateVideosWithAudio,
  defaultExportFormat,
  type SupportedExportFormat,
  type ConcatenateAudioOutput,
  getMediaInfo
// } from "@aitube/ffmpeg"
} from "./bug-in-bun/aitube_ffmpeg"

import { clapWithStoryboardsToVideoFile } from "./core/exporters/clapWithStoryboardsToVideoFile"
import { clapWithVideosToVideoFile } from "./core/exporters/clapWithVideosToVideoFile"
import { extractBase64 } from "@aitube/encoders"

/**
 * Generate a .mp4 video inside a directory (if none is provided, it will be created in /tmp)
 * 
 * @param clap 
 * @returns file path to the final .mp4
 */
export async function clapToTmpVideoFilePath({
  clap,
  format = defaultExportFormat,
  outputDir = "",
  clearTmpFilesAtEnd = false
}: {
  clap: ClapProject

  format?: SupportedExportFormat
  outputDir?: string

  // if you leave this to false, you will have to clear files yourself
  // (eg. after sending the final video file over)
  clearTmpFilesAtEnd?: boolean
}): Promise<{
  tmpWorkDir: string
  outputFilePath: string
}> {

  // in case we have an issue with the format
  if (format !== "mp4" && format !== "webm") {
    format = "mp4"
  }

  outputDir = outputDir || (await getRandomDirectory())

  const videoSegments = clap.segments.filter(s => s.category === ClapSegmentCategory.VIDEO && s.assetUrl.startsWith("data:video/"))
  const storyboardSegments = clap.segments.filter(s => s.category === ClapSegmentCategory.STORYBOARD && s.assetUrl.startsWith("data:image/"))

  const canUseVideos = videoSegments.length > 0
  const canUseStoryboards = !canUseVideos && storyboardSegments.length > 0

  // we count the duration of the whole video
  let totalDurationInMs = 0
  clap.segments.forEach(s => {
    if (s.endTimeInMs > totalDurationInMs) {
      totalDurationInMs = s.endTimeInMs
    }
  })

  let videoFilePaths: string[] = []

  // two possibilities:
  // we can either generate from the video files, or from the storyboards
  // the storyboard video will be a bit more boring, but at least it should process faster
  if (canUseVideos) {
    const concatenatedData = await clapWithVideosToVideoFile({
      clap,
      videoSegments,
      outputDir,
    })

    // console.log(`clapToTmpVideoFilePath: called clapWithVideosToVideoFile, got concatenatedData = ${JSON.stringify(concatenatedData, null, 2)}`)
  
    videoFilePaths = concatenatedData.videoFilePaths
  } else if (canUseStoryboards) {
    const concatenatedData = await clapWithStoryboardsToVideoFile({
      clap,
      storyboardSegments,
      outputDir,
    })

    // console.log(`clapToTmpVideoFilePath: called clapWithStoryboardsToVideoFile, got concatenatedData = ${JSON.stringify(concatenatedData, null, 2)}`)
  
    videoFilePaths = concatenatedData.videoFilePaths
  } else {
    throw new Error(`the provided Clap doesn't contain any video or storyboard`)
  }

  console.log(`clapToTmpVideoFilePath: calling concatenateVideos over ${videoFilePaths.length} video chunks: ${JSON.stringify(videoFilePaths, null, 2)}\nconcatenateVideos(${JSON.stringify({
    videoFilePaths,
    output: join(outputDir, `tmp_asset_concatenated_videos.mp4`)
  }, null, 2)})`)
  
  const concatenatedVideosNoMusic = await concatenateVideos({
    videoFilePaths,
    output: join(outputDir, `tmp_asset_concatenated_videos.mp4`)
  })

  console.log(`clapToTmpVideoFilePath: concatenatedVideosNoMusic`, concatenatedVideosNoMusic)
  
  const audioTracks: string[] = []

  const musicSegments = clap.segments.filter(s =>
      s.category === ClapSegmentCategory.MUSIC &&
      s.assetUrl.startsWith("data:audio/")
  )

  console.log(`clapToTmpVideoFilePath: got ${musicSegments.length} music segments in total`)
  
  // note: once we start with a certain type eg. mp3, there is no going to back
  // another format like wav, we can't concatenate them together (well, not yet)
  let detectedMusicTrackFormat = ''

  // we count how much music has been generated
  // if it is not enough to fill the full video, we will loop it (using cross-fading)
  let availableMusicDurationInMs = 0

  for (const segment of musicSegments) {
    const analysis = extractBase64(segment.assetUrl)
    if (!detectedMusicTrackFormat) {
      detectedMusicTrackFormat = analysis.extension
    } else if (detectedMusicTrackFormat !== analysis.extension) {
      throw new Error(`fatal error: concatenating a mixture of ${detectedMusicTrackFormat} and ${analysis.extension} tracks isn't supported yet`)
    }

    const { durationInMs, hasAudio } = await getMediaInfo(segment.assetUrl)

    // we have to skip silent music tracks
    if (!hasAudio) {
      console.log(`skipping a silent music track`)
      continue
    }

    const newTrackFileName = await writeBase64ToFile(
      segment.assetUrl,
      join(outputDir, `tmp_asset_${segment.id}.${analysis.extension}`)
    )

    audioTracks.push(newTrackFileName)

    availableMusicDurationInMs += durationInMs
  }

  let concatenatedAudio: ConcatenateAudioOutput | undefined = undefined

  if (audioTracks.length > 0) {
    console.log(`clapToTmpVideoFilePath: calling concatenateAudio over ${audioTracks.length} audio tracks`)
    
    if (!detectedMusicTrackFormat) {
      throw new Error(`uh that's weird, we couldn't detect the audio type`)
    }

    const availableMusicTracks = [...audioTracks]

    // if we don't have enough music audio content
    while (availableMusicDurationInMs < totalDurationInMs) {
      let trackToUse = availableMusicTracks.shift()

      // abort if there are no available tracks (for some reason)
      if (!trackToUse) { break }

      availableMusicTracks.push(trackToUse)

      // we artificially duplicate it (note: this will be cross-faded)
      const { durationInMs } = await getMediaInfo(trackToUse)

      // let's abord if we have bad data
      if (!durationInMs || durationInMs < 1000) { break }
 
      audioTracks.push(trackToUse)
  
      availableMusicDurationInMs += durationInMs
    }

    concatenatedAudio = await concatenateAudio({
      output: join(outputDir, `tmp_asset_concatenated_audio.${detectedMusicTrackFormat}`),
      audioTracks,
      crossfadeDurationInSec: 2, // 2 seconds
      outputFormat: detectedMusicTrackFormat
    })
    console.log(`clapToTmpVideoFilePath: concatenatedAudio = ${concatenatedAudio}`)
  }

  console.log(`calling concatenateVideosWithAudio: `, {
    output: join(outputDir, `final_video.${format}`),
    format,
    audioFilePath: concatenatedAudio ? concatenatedAudio?.filepath : undefined,
    videoFilePaths: [concatenatedVideosNoMusic.filepath],
    // videos are silent, so they can stay at 0
    videoTracksVolume: concatenatedAudio ? 0.85 : 1.0,
    audioTrackVolume: concatenatedAudio ? 0.15 : 0.0, // let's keep the music volume low
  })

  const finalFilePathOfVideoWithMusic = await concatenateVideosWithAudio({
    output: join(outputDir, `final_video.${format}`),
    format,
    audioFilePath: concatenatedAudio ? concatenatedAudio?.filepath : undefined,
    videoFilePaths: [concatenatedVideosNoMusic.filepath],
    // videos are silent, so they can stay at 0
    videoTracksVolume: concatenatedAudio ? 0.85 : 1.0,
    audioTrackVolume: concatenatedAudio ? 0.15 : 0.0, // let's keep the music volume low
  })
  
  console.log(`clapToTmpVideoFilePath: finalFilePathOfVideoWithMusic = ${finalFilePathOfVideoWithMusic}`)
  
  if (clearTmpFilesAtEnd) {
    // we delete all the temporary assets
    console.log(`clapToTmpVideoFilePath: calling deleteFilesWithName(${outputDir}, 'tmp_asset_')`)
    await deleteFilesWithName(outputDir, `tmp_asset_`)
  }

  console.log(`clapToTmpVideoFilePath: returning ${JSON.stringify( {
    tmpWorkDir: outputDir,
    outputFilePath: finalFilePathOfVideoWithMusic
  }, null, 2)}`)

  return {
    tmpWorkDir: outputDir,
    outputFilePath: finalFilePathOfVideoWithMusic
  }
}