jbilcke-hf HF staff commited on
Commit
d5d9687
·
1 Parent(s): a5053d8

trying to make a more reliable music mixer

Browse files
src/bug-in-bun/aitube_ffmpeg/concatenate/concatenateAudio.ts CHANGED
@@ -27,7 +27,7 @@ export async function concatenateAudio({
27
  audioTracks = [],
28
  audioFilePaths = [],
29
  crossfadeDurationInSec = 10,
30
- outputFormat = "wav"
31
  }: ConcatenateAudioOptions): Promise<ConcatenateAudioOutput> {
32
  if (!Array.isArray(audioTracks)) {
33
  throw new Error("Audios must be provided in an array");
@@ -41,7 +41,7 @@ export async function concatenateAudio({
41
  if (audioTracks.length === 1 && audioTracks[0]) {
42
  const audioTrack = audioTracks[0]
43
  const outputFilePath = path.join(tempDir, `audio_0.${outputFormat}`)
44
- await writeBase64ToFile(addBase64Header(audioTrack, "wav"), outputFilePath)
45
 
46
  // console.log(" |- there is only one track! so.. returning that")
47
  const { durationInSec } = await getMediaInfo(outputFilePath)
@@ -57,8 +57,8 @@ export async function concatenateAudio({
57
  let i = 0
58
  for (const track of audioTracks) {
59
  if (!track) { continue }
60
- const audioFilePath = path.join(tempDir, `audio_${++i}.wav`);
61
- await writeBase64ToFile(addBase64Header(track, "wav"), audioFilePath)
62
 
63
  audioFilePaths.push(audioFilePath);
64
  }
 
27
  audioTracks = [],
28
  audioFilePaths = [],
29
  crossfadeDurationInSec = 10,
30
+ outputFormat = "mp3"
31
  }: ConcatenateAudioOptions): Promise<ConcatenateAudioOutput> {
32
  if (!Array.isArray(audioTracks)) {
33
  throw new Error("Audios must be provided in an array");
 
41
  if (audioTracks.length === 1 && audioTracks[0]) {
42
  const audioTrack = audioTracks[0]
43
  const outputFilePath = path.join(tempDir, `audio_0.${outputFormat}`)
44
+ await writeBase64ToFile(addBase64Header(audioTrack, outputFormat), outputFilePath)
45
 
46
  // console.log(" |- there is only one track! so.. returning that")
47
  const { durationInSec } = await getMediaInfo(outputFilePath)
 
57
  let i = 0
58
  for (const track of audioTracks) {
59
  if (!track) { continue }
60
+ const audioFilePath = path.join(tempDir, `audio_${++i}.${outputFormat}`);
61
+ await writeBase64ToFile(addBase64Header(track, outputFormat), audioFilePath)
62
 
63
  audioFilePaths.push(audioFilePath);
64
  }
src/bug-in-bun/aitube_ffmpeg/concatenate/concatenateVideosWithAudio.ts CHANGED
@@ -136,6 +136,7 @@ export const concatenateVideosWithAudio = async ({
136
  }
137
 
138
 
 
139
  console.log("concatenateVideosWithAudio: DEBUG:", {
140
  videoTracksVolume,
141
  audioTrackVolume,
@@ -147,6 +148,7 @@ export const concatenateVideosWithAudio = async ({
147
  // additionalAudioVolume,
148
  finalOutputFilePath
149
  })
 
150
 
151
  // Set up event handlers for ffmpeg processing
152
  const promise = new Promise<string>((resolve, reject) => {
 
136
  }
137
 
138
 
139
+ /*
140
  console.log("concatenateVideosWithAudio: DEBUG:", {
141
  videoTracksVolume,
142
  audioTrackVolume,
 
148
  // additionalAudioVolume,
149
  finalOutputFilePath
150
  })
151
+ */
152
 
153
  // Set up event handlers for ffmpeg processing
154
  const promise = new Promise<string>((resolve, reject) => {
src/main.ts CHANGED
@@ -8,12 +8,14 @@ import {
8
  concatenateVideosWithAudio,
9
  defaultExportFormat,
10
  type SupportedExportFormat,
11
- type ConcatenateAudioOutput
 
12
  // } from "@aitube/ffmpeg"
13
  } from "./bug-in-bun/aitube_ffmpeg"
14
 
15
  import { clapWithStoryboardsToVideoFile } from "./core/exporters/clapWithStoryboardsToVideoFile"
16
  import { clapWithVideosToVideoFile } from "./core/exporters/clapWithVideosToVideoFile"
 
17
 
18
  /**
19
  * Generate a .mp4 video inside a directory (if none is provided, it will be created in /tmp)
@@ -53,6 +55,14 @@ export async function clapToTmpVideoFilePath({
53
  const canUseVideos = videoSegments.length > 0
54
  const canUseStoryboards = !canUseVideos && storyboardSegments.length > 0
55
 
 
 
 
 
 
 
 
 
56
  let videoFilePaths: string[] = []
57
 
58
  // two possibilities:
@@ -103,13 +113,38 @@ export async function clapToTmpVideoFilePath({
103
 
104
  console.log(`clapToTmpVideoFilePath: got ${musicSegments.length} music segments in total`)
105
 
 
 
 
 
 
 
 
 
106
  for (const segment of musicSegments) {
107
- audioTracks.push(
108
- await writeBase64ToFile(
109
- segment.assetUrl,
110
- join(outputDir, `tmp_asset_${segment.id}.wav`)
111
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  )
 
 
 
 
113
  }
114
 
115
  let concatenatedAudio: ConcatenateAudioOutput | undefined = undefined
@@ -117,10 +152,37 @@ export async function clapToTmpVideoFilePath({
117
  if (audioTracks.length > 0) {
118
  console.log(`clapToTmpVideoFilePath: calling concatenateAudio over ${audioTracks.length} audio tracks`)
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  concatenatedAudio = await concatenateAudio({
121
- output: join(outputDir, `tmp_asset_concatenated_audio.wav`),
122
  audioTracks,
123
- crossfadeDurationInSec: 2 // 2 seconds
 
124
  })
125
  console.log(`clapToTmpVideoFilePath: concatenatedAudio = ${concatenatedAudio}`)
126
  }
 
8
  concatenateVideosWithAudio,
9
  defaultExportFormat,
10
  type SupportedExportFormat,
11
+ type ConcatenateAudioOutput,
12
+ getMediaInfo
13
  // } from "@aitube/ffmpeg"
14
  } from "./bug-in-bun/aitube_ffmpeg"
15
 
16
  import { clapWithStoryboardsToVideoFile } from "./core/exporters/clapWithStoryboardsToVideoFile"
17
  import { clapWithVideosToVideoFile } from "./core/exporters/clapWithVideosToVideoFile"
18
+ import { extractBase64 } from "@aitube/encoders"
19
 
20
  /**
21
  * Generate a .mp4 video inside a directory (if none is provided, it will be created in /tmp)
 
55
  const canUseVideos = videoSegments.length > 0
56
  const canUseStoryboards = !canUseVideos && storyboardSegments.length > 0
57
 
58
+ // we count the duration of the whole video
59
+ let totalDurationInMs = 0
60
+ clap.segments.forEach(s => {
61
+ if (s.endTimeInMs > totalDurationInMs) {
62
+ totalDurationInMs = s.endTimeInMs
63
+ }
64
+ })
65
+
66
  let videoFilePaths: string[] = []
67
 
68
  // two possibilities:
 
113
 
114
  console.log(`clapToTmpVideoFilePath: got ${musicSegments.length} music segments in total`)
115
 
116
+ // note: once we start with a certain type eg. mp3, there is no going to back
117
+ // another format like wav, we can't concatenate them together (well, not yet)
118
+ let detectedMusicTrackFormat = ''
119
+
120
+ // we count how much music has been generated
121
+ // if it is not enough to fill the full video, we will loop it (using cross-fading)
122
+ let availableMusicDurationInMs = 0
123
+
124
  for (const segment of musicSegments) {
125
+ const analysis = extractBase64(segment.assetUrl)
126
+ if (!detectedMusicTrackFormat) {
127
+ detectedMusicTrackFormat = analysis.extension
128
+ } else if (detectedMusicTrackFormat !== analysis.extension) {
129
+ throw new Error(`fatal error: concatenating a mixture of ${detectedMusicTrackFormat} and ${analysis.extension} tracks isn't supported yet`)
130
+ }
131
+
132
+ const { durationInMs, hasAudio } = await getMediaInfo(segment.assetUrl)
133
+
134
+ // we have to skip silent music tracks
135
+ if (!hasAudio) {
136
+ console.log(`skipping a silent music track`)
137
+ continue
138
+ }
139
+
140
+ const newTrackFileName = await writeBase64ToFile(
141
+ segment.assetUrl,
142
+ join(outputDir, `tmp_asset_${segment.id}.${analysis.extension}`)
143
  )
144
+
145
+ audioTracks.push(newTrackFileName)
146
+
147
+ availableMusicDurationInMs += durationInMs
148
  }
149
 
150
  let concatenatedAudio: ConcatenateAudioOutput | undefined = undefined
 
152
  if (audioTracks.length > 0) {
153
  console.log(`clapToTmpVideoFilePath: calling concatenateAudio over ${audioTracks.length} audio tracks`)
154
 
155
+ if (!detectedMusicTrackFormat) {
156
+ throw new Error(`uh that's weird, we couldn't detect the audio type`)
157
+ }
158
+
159
+ const availableMusicTracks = [...audioTracks]
160
+
161
+ // if we don't have enough music audio content
162
+ while (availableMusicDurationInMs < totalDurationInMs) {
163
+ let trackToUse = availableMusicTracks.shift()
164
+
165
+ // abort if there are no available tracks (for some reason)
166
+ if (!trackToUse) { break }
167
+
168
+ availableMusicTracks.push(trackToUse)
169
+
170
+ // we artificially duplicate it (note: this will be cross-faded)
171
+ const { durationInMs } = await getMediaInfo(trackToUse)
172
+
173
+ // let's abord if we have bad data
174
+ if (!durationInMs || durationInMs < 1000) { break }
175
+
176
+ audioTracks.push(trackToUse)
177
+
178
+ availableMusicDurationInMs += durationInMs
179
+ }
180
+
181
  concatenatedAudio = await concatenateAudio({
182
+ output: join(outputDir, `tmp_asset_concatenated_audio.${detectedMusicTrackFormat}`),
183
  audioTracks,
184
+ crossfadeDurationInSec: 2, // 2 seconds
185
+ outputFormat: detectedMusicTrackFormat
186
  })
187
  console.log(`clapToTmpVideoFilePath: concatenatedAudio = ${concatenatedAudio}`)
188
  }