File size: 3,145 Bytes
07d370a
628521d
 
 
 
e0cc52c
 
628521d
e0cc52c
 
628521d
 
e0cc52c
07d370a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0cc52c
7585d18
07d370a
 
 
 
 
e0cc52c
07d370a
7585d18
07d370a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628521d
07d370a
 
628521d
07d370a
 
e0cc52c
07d370a
 
 
74f66bb
07d370a
 
 
74f66bb
07d370a
 
 
 
628521d
07d370a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import torch
import soundfile as sf
import io

from model import load_model, invert_audio

# Load the model and processor
preloaded = {}
preloaded["model"], preloaded["processor"] = load_model()
model = preloaded["model"]
processor = preloaded["processor"]

def gr_invert_audio(input_audio):
    # Extract the file content and sampling rate
    audio, sr = sf.read(input_audio)
    
    # Convert audio to tensor
    audio_tensor = torch.tensor(audio).float()

    # Invert the audio
    inverted_audio_tensor = invert_audio(model, processor, audio_tensor, sr)
    inverted_audio_np = inverted_audio_tensor.numpy()

    # Save the inverted audio to a temporary file and return it
    with io.BytesIO() as out_io:
        sf.write(out_io, inverted_audio_np, sr, format="wav")
        out_io.seek(0)
        return out_io.read()

# Gradio interface
iface = gr.Interface(
    fn=gr_invert_audio,
    inputs=gr.inputs.Audio(type="file", label="Upload an Audio File"),
    outputs=gr.outputs.Audio(label="Inverted Audio"),
    live=True
)

iface.launch()


# import streamlit as st
# import torch
# import julius
# import soundfile as sf
# import io

# from model import load_model, invert_audio

# # Load the model and processor
# preloaded = {}
# preloaded["model"], preloaded["processor"] = load_model()
# model = preloaded["model"]
# processor = preloaded["processor"]

# st.title("Audio Inversion with HuggingFace & Streamlit")

# # If this is the first run, create a new session state attribute for uploaded file
# if 'uploaded_file' not in st.session_state:
#     st.session_state.uploaded_file = None

# # Get the uploaded file
# uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "flac"])

# # Update the session state only if a new file is uploaded
# if uploaded_file is not None:
#     st.session_state.uploaded_file = uploaded_file.getvalue()  # store content, not the file object

# if st.session_state.uploaded_file:
#     # Play the uploaded audio
#     audio_byte_content = st.session_state.uploaded_file
#     st.audio(audio_byte_content, format="audio/wav")
    
#     # Read the audio file
#     audio, sr = sf.read(io.BytesIO(audio_byte_content))

#     # Convert audio to tensor
#     audio_tensor = torch.tensor(audio).float()

#     @st.cache(allow_output_mutation=True, suppress_st_warning=True)
#     def cache_inverted_audio(audio_tensor):
#         return invert_audio(model, processor, audio_tensor, sr)

#     # Use cached result
#     inverted_audio_tensor = cache_inverted_audio(audio_tensor)
#     inverted_audio_np = inverted_audio_tensor.numpy()

#     # Play inverted audio
#     with io.BytesIO() as out_io:
#         sf.write(out_io, inverted_audio_np, sr, format="wav")
#         st.audio(out_io.getvalue(), format="audio/wav")

#     # Offer a download button for the inverted audio
#     if st.button("Download Inverted Audio"):
#         with io.BytesIO() as out_io:
#             sf.write(out_io, inverted_audio_np, sr, format="wav")
#             st.download_button("Download Inverted Audio", data=out_io.getvalue(), file_name="inverted_output.wav", mime="audio/wav")