import gradio as gr import plotly.express as px import requests # INTERFACE WITH AUDIO TO AUDIO def calculate_route(): api_key = "api_key" origin = "49.631997,6.171029" destination = "49.586745,6.140002" url = f"https://api.tomtom.com/routing/1/calculateRoute/{origin}:{destination}/json?key={api_key}" response = requests.get(url) data = response.json() lats = [] lons = [] for point in data['routes'][0]['legs'][0]['points']: lats.append(point['latitude']) lons.append(point['longitude']) # fig = px.line_geo(lat=lats, lon=lons) # fig.update_geos(fitbounds="locations") fig = px.line_mapbox(lat=lats, lon=lons, zoom=12, height=600) fig.update_layout(mapbox_style="open-street-map", mapbox_zoom=12, mapbox_center_lat=lats[0], mapbox_center_lon=lons[0]) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}) return fig def transcript( general_context, link_to_audio, voice, emotion, place, time, delete_history, state ): """this function manages speech-to-text to input Fnanswer function and text-to-speech with the Fnanswer output""" # load audio from a specific path audio_path = link_to_audio audio_array, sampling_rate = librosa.load( link_to_audio, sr=16000 ) # "sr=16000" ensures that the sampling rate is as required # process the audio array input_features = processor( audio_array, sampling_rate, return_tensors="pt" ).input_features predicted_ids = modelw.generate(input_features) transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) quest_processing = FnAnswer( general_context, transcription, place, time, delete_history, state ) state = quest_processing[2] print("langue " + quest_processing[3]) tts.tts_to_file( text=str(quest_processing[0]), file_path="output.wav", speaker_wav=f"Audio_Files/{voice}.wav", language=quest_processing[3], emotion="angry", ) audio_path = "output.wav" return audio_path, state["context"], state # to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/ # in "Insecure origins treated as secure", enable it and relaunch chrome # example question: # what's the weather like outside? # What's the closest restaurant from here? import gradio as gr shortcut_js = """ """ # with gr.Blocks(head=shortcut_js) as demo: # action_button = gr.Button(value="Name", elem_id="recorder") # textbox = gr.Textbox() # action_button.click(lambda : "button pressed", None, textbox) # demo.launch() # Generate options for hours (00-23) hour_options = [f"{i:02d}:00:00" for i in range(24)] model_answer = "" general_context = "" # Define the initial state with some initial context. print(general_context) initial_state = {"context": general_context} initial_context = initial_state["context"] # Create the Gradio interface. with gr.Blocks(theme=gr.themes.Default()) as demo: with gr.Row(): with gr.Column(scale=1, min_width=300): time_picker = gr.Dropdown( choices=hour_options, label="What time is it?", value="08:00:00" ) history = gr.Radio( ["Yes", "No"], label="Maintain the conversation history?", value="No" ) voice_character = gr.Radio( choices=[ "Rick Sanches", "Eddie Murphy", "David Attenborough", "Morgan Freeman", ], label="Choose a voice", value="Rick Sancher", show_label=True, ) emotion = gr.Radio( choices=["Cheerful", "Grumpy"], label="Choose an emotion", value="Cheerful", show_label=True, ) # place = gr.Radio( # choices=[ # "Luxembourg Gare, Luxembourg", # "Kirchberg Campus, Kirchberg", # "Belval Campus, Belval", # "Eiffel Tower, Paris", # "Thionville, France", # ], # label="Choose a location for your car", # value="Kirchberg Campus, Kirchberg", # show_label=True, # ) origin = gr.Textbox(value="Luxembourg Gare, Luxembourg", label="Origin", interactive=True) destination = gr.Textbox( value="Kirchberg Campus, Kirchberg", label="Destination", interactive=True) recorder = gr.Audio(type="filepath", label="input audio", elem_id="recorder") with gr.Column(scale=2, min_width=600): map_plot = gr.Plot() origin.submit(fn=calculate_route, outputs=map_plot) destination.submit(fn=calculate_route, outputs=map_plot) output_audio = gr.Audio(label="output audio") # map_if = gr.Interface(fn=plot_map, inputs=year_input, outputs=map_plot) # iface = gr.Interface( # fn=transcript, # inputs=[ # gr.Textbox(value=initial_context, visible=False), # gr.Audio(type="filepath", label="input audio", elem_id="recorder"), # voice_character, # emotion, # place, # time_picker, # history, # gr.State(), # This will keep track of the context state across interactions. # ], # outputs=[gr.Audio(label="output audio"), gr.Textbox(visible=False), gr.State()], # head=shortcut_js, # ) # close all interfaces open to make the port available gr.close_all() # Launch the interface. demo.queue().launch( debug=True, server_name="0.0.0.0", server_port=7860, ssl_verify=False ) # iface.launch(debug=True, share=False, server_name="0.0.0.0", server_port=7860, ssl_verify=False)