Tonic commited on
Commit
12da9ab
1 Parent(s): e7e8d00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -32,7 +32,6 @@ text_examples = [
32
  ["<en> The big difference between Europe <fr> et les Etats Unis <pl> jest to, że mamy tak wiele języków <uk> тут, в Європі"]
33
  ]
34
 
35
- # Function to parse the multilingual input text
36
  def parse_multilingual_text(input_text):
37
  pattern = r"<(\w+)>\s(.*?)\s(?=<\w+>|$)"
38
  segments = re.findall(pattern, input_text)
@@ -46,11 +45,11 @@ def generate_segment_audio(text, lang, speaker_url, pipe):
46
  resample_audio = resampler(newsr=24000)
47
  audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
48
  audio_np = audio_data_resampled.cpu().numpy()
49
- print("Shape after resampling:", audio_np.shape) # Debug statement
50
  return audio_np
51
 
52
  def concatenate_audio_segments(segments):
53
- concatenated_audio = np.concatenate(segments , axis=0) #axis experimentation
54
  return concatenated_audio
55
 
56
 
@@ -67,11 +66,11 @@ def whisper_speech_demo(multilingual_text, speaker_audio):
67
  for lang, text in segments:
68
  text_str = text if isinstance(text, str) else str(text)
69
  audio_np = generate_segment_audio(text_str, lang, speaker_url, pipe)
70
- print("Audio segment shape:", audio_np.shape) # Debug statement
71
  audio_segments.append(audio_np)
72
 
73
  concatenated_audio = concatenate_audio_segments(audio_segments)
74
- print("Final concatenated audio shape:", concatenated_audio.shape) # Debug statement
75
  concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
76
 
77
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
 
32
  ["<en> The big difference between Europe <fr> et les Etats Unis <pl> jest to, że mamy tak wiele języków <uk> тут, в Європі"]
33
  ]
34
 
 
35
  def parse_multilingual_text(input_text):
36
  pattern = r"<(\w+)>\s(.*?)\s(?=<\w+>|$)"
37
  segments = re.findall(pattern, input_text)
 
45
  resample_audio = resampler(newsr=24000)
46
  audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
47
  audio_np = audio_data_resampled.cpu().numpy()
48
+ # Debug statement print("Shape after resampling:", audio_np.shape)
49
  return audio_np
50
 
51
  def concatenate_audio_segments(segments):
52
+ concatenated_audio = np.concatenate(segments , axis=1)
53
  return concatenated_audio
54
 
55
 
 
66
  for lang, text in segments:
67
  text_str = text if isinstance(text, str) else str(text)
68
  audio_np = generate_segment_audio(text_str, lang, speaker_url, pipe)
69
+ # Debug statement print("Audio segment shape:", audio_np.shape)
70
  audio_segments.append(audio_np)
71
 
72
  concatenated_audio = concatenate_audio_segments(audio_segments)
73
+ # Debug statement print("Final concatenated audio shape:", concatenated_audio.shape)
74
  concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
75
 
76
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file: