KITT / kitt.py
sasan's picture
new entry
6b9d2e8
raw
history blame
6.64 kB
import gradio as gr
import plotly.express as px
import requests
# INTERFACE WITH AUDIO TO AUDIO
def calculate_route():
api_key = "api_key"
origin = "49.631997,6.171029"
destination = "49.586745,6.140002"
url = f"https://api.tomtom.com/routing/1/calculateRoute/{origin}:{destination}/json?key={api_key}"
response = requests.get(url)
data = response.json()
lats = []
lons = []
for point in data['routes'][0]['legs'][0]['points']:
lats.append(point['latitude'])
lons.append(point['longitude'])
# fig = px.line_geo(lat=lats, lon=lons)
# fig.update_geos(fitbounds="locations")
fig = px.line_mapbox(lat=lats, lon=lons, zoom=12, height=600)
fig.update_layout(mapbox_style="open-street-map", mapbox_zoom=12, mapbox_center_lat=lats[0], mapbox_center_lon=lons[0])
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
return fig
def transcript(
general_context, link_to_audio, voice, emotion, place, time, delete_history, state
):
"""this function manages speech-to-text to input Fnanswer function and text-to-speech with the Fnanswer output"""
# load audio from a specific path
audio_path = link_to_audio
audio_array, sampling_rate = librosa.load(
link_to_audio, sr=16000
) # "sr=16000" ensures that the sampling rate is as required
# process the audio array
input_features = processor(
audio_array, sampling_rate, return_tensors="pt"
).input_features
predicted_ids = modelw.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
quest_processing = FnAnswer(
general_context, transcription, place, time, delete_history, state
)
state = quest_processing[2]
print("langue " + quest_processing[3])
tts.tts_to_file(
text=str(quest_processing[0]),
file_path="output.wav",
speaker_wav=f"Audio_Files/{voice}.wav",
language=quest_processing[3],
emotion="angry",
)
audio_path = "output.wav"
return audio_path, state["context"], state
# to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
# in "Insecure origins treated as secure", enable it and relaunch chrome
# example question:
# what's the weather like outside?
# What's the closest restaurant from here?
import gradio as gr
shortcut_js = """
<script>
function shortcuts(e) {
var event = document.all ? window.event : e;
switch (e.target.tagName.toLowerCase()) {
case "input":
case "textarea":
break;
default:
if (e.key.toLowerCase() == "r" && e.ctrlKey) {
console.log("recording")
document.getElementById("recorder").start_recording();
}
if (e.key.toLowerCase() == "s" && e.ctrlKey) {
console.log("stopping")
document.getElementById("recorder").stop_recording();
}
}
}
document.addEventListener('keypress', shortcuts, false);
</script>
"""
# with gr.Blocks(head=shortcut_js) as demo:
# action_button = gr.Button(value="Name", elem_id="recorder")
# textbox = gr.Textbox()
# action_button.click(lambda : "button pressed", None, textbox)
# demo.launch()
# Generate options for hours (00-23)
hour_options = [f"{i:02d}:00:00" for i in range(24)]
model_answer = ""
general_context = ""
# Define the initial state with some initial context.
print(general_context)
initial_state = {"context": general_context}
initial_context = initial_state["context"]
# Create the Gradio interface.
with gr.Blocks(theme=gr.themes.Default()) as demo:
with gr.Row():
with gr.Column(scale=1, min_width=300):
time_picker = gr.Dropdown(
choices=hour_options, label="What time is it?", value="08:00:00"
)
history = gr.Radio(
["Yes", "No"], label="Maintain the conversation history?", value="No"
)
voice_character = gr.Radio(
choices=[
"Rick Sanches",
"Eddie Murphy",
"David Attenborough",
"Morgan Freeman",
],
label="Choose a voice",
value="Rick Sancher",
show_label=True,
)
emotion = gr.Radio(
choices=["Cheerful", "Grumpy"],
label="Choose an emotion",
value="Cheerful",
show_label=True,
)
# place = gr.Radio(
# choices=[
# "Luxembourg Gare, Luxembourg",
# "Kirchberg Campus, Kirchberg",
# "Belval Campus, Belval",
# "Eiffel Tower, Paris",
# "Thionville, France",
# ],
# label="Choose a location for your car",
# value="Kirchberg Campus, Kirchberg",
# show_label=True,
# )
origin = gr.Textbox(value="Luxembourg Gare, Luxembourg", label="Origin", interactive=True)
destination = gr.Textbox(
value="Kirchberg Campus, Kirchberg", label="Destination", interactive=True)
recorder = gr.Audio(type="filepath", label="input audio", elem_id="recorder")
with gr.Column(scale=2, min_width=600):
map_plot = gr.Plot()
origin.submit(fn=calculate_route, outputs=map_plot)
destination.submit(fn=calculate_route, outputs=map_plot)
output_audio = gr.Audio(label="output audio")
# map_if = gr.Interface(fn=plot_map, inputs=year_input, outputs=map_plot)
# iface = gr.Interface(
# fn=transcript,
# inputs=[
# gr.Textbox(value=initial_context, visible=False),
# gr.Audio(type="filepath", label="input audio", elem_id="recorder"),
# voice_character,
# emotion,
# place,
# time_picker,
# history,
# gr.State(), # This will keep track of the context state across interactions.
# ],
# outputs=[gr.Audio(label="output audio"), gr.Textbox(visible=False), gr.State()],
# head=shortcut_js,
# )
# close all interfaces open to make the port available
gr.close_all()
# Launch the interface.
demo.queue().launch(
debug=True, server_name="0.0.0.0", server_port=7860, ssl_verify=False
)
# iface.launch(debug=True, share=False, server_name="0.0.0.0", server_port=7860, ssl_verify=False)