from huggingface_hub import InferenceClient import gradio as gr import nltk import edge_tts import tempfile import asyncio # Download the 'punkt' tokenizer for the NLTK library nltk.download("punkt") client = InferenceClient( "mistralai/Mistral-7B-Instruct-v0.3" ) def format_prompt(message, history): system_message = f""" You are an empathetic, insightful, and supportive training coach who helps people deal with challenges and celebrate achievements. You help people feel better by asking questions to reflect on and evoke feelings of positivity, gratitude, joy, and love. You show radical candor and tough love. Respond in a casual and friendly tone. Sprinkle in filler words, contractions, idioms, and other casual speech that we use in conversation. Emulate the user’s speaking style and be concise in your response. """ prompt = ( "[INST]" + system_message + "[/INST]" ) for user_prompt, bot_response in history: if user_prompt is not None: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " if message=="": message="Hello" prompt += f"[INST] {message} [/INST]" return prompt def generate_llm_output( prompt, history, llm, temperature=0.8, max_tokens=256, top_p=0.95, stop_words=["~~","[/INST]", "~~"] ): temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_tokens, top_p=top_p, repetition_penalty=1.0, do_sample=True, seed=42, ) formatted_prompt = format_prompt(prompt, history) stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text yield output return output # tts interface function def tts_interface(text, voice): audio = asyncio.run(text_to_speech(text, voice)) return audio # Text-to-speech function async def text_to_speech(text, voice): rate = 10 pitch = 10 rate_str = f"{rate:+d}%" pitch_str = f"{pitch:+d}Hz" voice_short_name = voice.split(" - ")[0] communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) return tmp_path def get_sentence(history, llm): history = [["", None]] if history is None else history history[-1][1] = "" sentence_list = [] sentence_hash_list = [] text_to_generate = "" stored_sentence = None stored_sentence_hash = None for character in generate_llm_output(history[-1][0], history[:-1], llm): history[-1][1] = character.replace("<|assistant|>","") # It is coming word by word text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip()) if len(text_to_generate) > 1: dif = len(text_to_generate) - len(sentence_list) if dif == 1 and len(sentence_list) != 0: continue if dif == 2 and len(sentence_list) != 0 and stored_sentence is not None: continue # All this complexity due to trying append first short sentence to next one for proper language auto-detect if stored_sentence is not None and stored_sentence_hash is None and dif>1: #means we consumed stored sentence and should look at next sentence to generate sentence = text_to_generate[len(sentence_list)+1] elif stored_sentence is not None and len(text_to_generate)>2 and stored_sentence_hash is not None: print("Appending stored") sentence = stored_sentence + text_to_generate[len(sentence_list)+1] stored_sentence_hash = None else: sentence = text_to_generate[len(sentence_list)] # too short sentence just append to next one if there is any # this is for proper language detection if len(sentence)<=15 and stored_sentence_hash is None and stored_sentence is None: if sentence[-1] in [".","!","?"]: if stored_sentence_hash != hash(sentence): stored_sentence = sentence stored_sentence_hash = hash(sentence) print("Storing:",stored_sentence) continue sentence_hash = hash(sentence) if stored_sentence_hash is not None and sentence_hash == stored_sentence_hash: continue if sentence_hash not in sentence_hash_list: sentence_hash_list.append(sentence_hash) sentence_list.append(sentence) print("New Sentence: ", sentence) yield (sentence, history) # return that final sentence token try: last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1] sentence_hash = hash(last_sentence) if sentence_hash not in sentence_hash_list: if stored_sentence is not None and stored_sentence_hash is not None: last_sentence = stored_sentence + last_sentence stored_sentence = stored_sentence_hash = None print("Last Sentence with stored:",last_sentence) sentence_hash_list.append(sentence_hash) sentence_list.append(last_sentence) print("Last Sentence: ", last_sentence) yield (last_sentence, history) except: print("ERROR on last sentence history is :", history)