Spaces:

HareemFatima
/

HareemFatima-distilhubert-finetuned-stutterdetection

Runtime error

App Files Files Community

HareemFatima commited on May 1, 2024

Commit

eda6be5

verified ·

1 Parent(s): 2da2570

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -45

app.py CHANGED Viewed

@@ -1,46 +1,48 @@
 import streamlit as st
-from transformers import pipeline
-# Load audio classification model
-audio_classifier = pipeline(
-    "audio-classification", model="HareemFatima/distilhubert-finetuned-stutterdetection"
-)
-# Load text-to-speech model (replace with your TTS model details)
-# Placeholder text-to-speech function (replace with your actual implementation)
-def tts(text):
-    # Replace this with your text-to-speech processing logic
-    # This is a placeholder to demonstrate the concept
-    return f"Synthesized speech for therapy: {text}"
-# Define therapy text for different stutter types (replace with your specific content)
-therapy_text = {
-    "Repetition": "Your speech sounds great! Keep practicing!",
-    "Blocks": "Take a deep breath and try speaking slowly. You can do it!",
-    "Prolongation": "Focus on relaxing your mouth muscles and speaking smoothly.",
-    # Add more stutter types and therapy text here
-}
-st.title("Stuttering Therapy Assistant")
-st.write("This app helps you identify stuttering types and provides personalized therapy suggestions.")
-uploaded_audio = st.file_uploader("Upload Audio Clip")
-if uploaded_audio is not None:
-    # Read audio data
-    audio_bytes = uploaded_audio.read()
-    # Classify stuttering type
-    prediction = audio_classifier(audio_bytes)
-    stutter_type = prediction[0]["label"]
-    # Retrieve therapy text
-    therapy = therapy_text.get(stutter_type, "General therapy tip: Practice slow, relaxed speech.")
-    # Generate synthesized speech (placeholder for now)
-    synthesized_speech = tts(therapy)
-    st.write(f"Predicted Stutter Type: {stutter_type}")
-    st.write(f"Therapy Tip: {therapy}")
-    st.audio(synthesized_speech)  # Placeholder audio output (replace with actual synthesized speech)

 import streamlit as st
+from transformers import pipeline, AutoTokenizer, AutoModelForTextToWaveform
+# Load the audio classification model
+audio_classification_model = pipeline("audio-classification", model="HareemFatima/distilhubert-finetuned-stutterdetection")
+# Load the TTS tokenizer and model
+tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+tts_model = AutoModelForTextToWaveform.from_pretrained("facebook/mms-tts-eng")
+# Define a function to classify audio and generate speech
+def classify_and_speak(audio_input):
+    # Classify the audio
+    classification_result = audio_classification_model(audio_input)
+    predicted_class = classification_result[0]["label"]
+    # Map predicted class to corresponding speech text
+    speech_text = map_class_to_speech(predicted_class)
+    # Generate speech
+    input_ids = tts_tokenizer(speech_text, return_tensors="pt").input_ids
+    speech = tts_model.generate(input_ids)
+    # Display classification result and play speech
+    st.write("Predicted Stutter Type:", predicted_class)
+    st.audio(speech, format="audio/wav")
+# Define a function to map predicted class to speech text
+def map_class_to_speech(predicted_class):
+    # Define speech text for each class
+    speech_texts = {
+        "nonstutter": "You are speaking fluently without any stutter.",
+        "prolongation": "You are experiencing prolongation stutter. Try to relax and speak slowly.",
+        "repetition": "You are experiencing repetition stutter. Focus on your breathing and try to speak smoothly.",
+        "blocks": "You are experiencing block stutter. Take a deep breath and try to speak slowly and calmly."
+    }
+    return speech_texts.get(predicted_class, "Unknown stutter type")
+# Streamlit app
+def main():
+    st.title("Stutter Classification and Therapy App")
+    audio_input = st.audio("Capture Audio", format="audio/wav", start_recording=True, channels=1)
+    if st.button("Stop Recording"):
+        with st.spinner("Classifying and speaking..."):
+            classify_and_speak(audio_input)
+if __name__ == "__main__":
+    main()