Spaces:
Sleeping
Sleeping
from huggingface_hub import InferenceClient | |
import gradio as gr | |
import nltk | |
import edge_tts | |
import tempfile | |
import asyncio | |
# Download the 'punkt' tokenizer for the NLTK library | |
nltk.download("punkt") | |
client = InferenceClient( | |
"mistralai/Mistral-7B-Instruct-v0.3" | |
) | |
def format_prompt(message, history): | |
system_message = f""" | |
You are an empathetic, insightful, and supportive training coach who helps people deal with challenges and celebrate achievements. | |
You help people feel better by asking questions to reflect on and evoke feelings of positivity, gratitude, joy, and love. | |
You show radical candor and tough love. | |
Respond in a casual and friendly tone. | |
Sprinkle in filler words, contractions, idioms, and other casual speech that we use in conversation. | |
Emulate the user’s speaking style and be concise in your response. | |
""" | |
prompt = ( | |
"<s>[INST]" + system_message + "[/INST]" | |
) | |
for user_prompt, bot_response in history: | |
if user_prompt is not None: | |
prompt += f"[INST] {user_prompt} [/INST]" | |
prompt += f" {bot_response}</s> " | |
if message=="": | |
message="Hello" | |
prompt += f"[INST] {message} [/INST]" | |
return prompt | |
def generate_llm_output( | |
prompt, | |
history, | |
llm, | |
temperature=0.8, | |
max_tokens=256, | |
top_p=0.95, | |
stop_words=["<s>","[/INST]", "</s>"] | |
): | |
temperature = float(temperature) | |
if temperature < 1e-2: | |
temperature = 1e-2 | |
top_p = float(top_p) | |
generate_kwargs = dict( | |
temperature=temperature, | |
max_new_tokens=max_tokens, | |
top_p=top_p, | |
repetition_penalty=1.0, | |
do_sample=True, | |
seed=42, | |
) | |
formatted_prompt = format_prompt(prompt, history) | |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) | |
output = "" | |
for response in stream: | |
output += response.token.text | |
yield output | |
return output | |
# tts interface function | |
def tts_interface(text, voice): | |
audio = asyncio.run(text_to_speech(text, voice)) | |
return audio | |
# Text-to-speech function | |
async def text_to_speech(text, voice): | |
rate = 10 | |
pitch = 10 | |
rate_str = f"{rate:+d}%" | |
pitch_str = f"{pitch:+d}Hz" | |
voice_short_name = voice.split(" - ")[0] | |
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
tmp_path = tmp_file.name | |
await communicate.save(tmp_path) | |
return tmp_path | |
def get_sentence(history, llm): | |
history = [["", None]] if history is None else history | |
history[-1][1] = "" | |
sentence_list = [] | |
sentence_hash_list = [] | |
text_to_generate = "" | |
stored_sentence = None | |
stored_sentence_hash = None | |
for character in generate_llm_output(history[-1][0], history[:-1], llm): | |
history[-1][1] = character.replace("<|assistant|>","") | |
# It is coming word by word | |
text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip()) | |
if len(text_to_generate) > 1: | |
dif = len(text_to_generate) - len(sentence_list) | |
if dif == 1 and len(sentence_list) != 0: | |
continue | |
if dif == 2 and len(sentence_list) != 0 and stored_sentence is not None: | |
continue | |
# All this complexity due to trying append first short sentence to next one for proper language auto-detect | |
if stored_sentence is not None and stored_sentence_hash is None and dif>1: | |
#means we consumed stored sentence and should look at next sentence to generate | |
sentence = text_to_generate[len(sentence_list)+1] | |
elif stored_sentence is not None and len(text_to_generate)>2 and stored_sentence_hash is not None: | |
print("Appending stored") | |
sentence = stored_sentence + text_to_generate[len(sentence_list)+1] | |
stored_sentence_hash = None | |
else: | |
sentence = text_to_generate[len(sentence_list)] | |
# too short sentence just append to next one if there is any | |
# this is for proper language detection | |
if len(sentence)<=15 and stored_sentence_hash is None and stored_sentence is None: | |
if sentence[-1] in [".","!","?"]: | |
if stored_sentence_hash != hash(sentence): | |
stored_sentence = sentence | |
stored_sentence_hash = hash(sentence) | |
print("Storing:",stored_sentence) | |
continue | |
sentence_hash = hash(sentence) | |
if stored_sentence_hash is not None and sentence_hash == stored_sentence_hash: | |
continue | |
if sentence_hash not in sentence_hash_list: | |
sentence_hash_list.append(sentence_hash) | |
sentence_list.append(sentence) | |
print("New Sentence: ", sentence) | |
yield (sentence, history) | |
# return that final sentence token | |
try: | |
last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1] | |
sentence_hash = hash(last_sentence) | |
if sentence_hash not in sentence_hash_list: | |
if stored_sentence is not None and stored_sentence_hash is not None: | |
last_sentence = stored_sentence + last_sentence | |
stored_sentence = stored_sentence_hash = None | |
print("Last Sentence with stored:",last_sentence) | |
sentence_hash_list.append(sentence_hash) | |
sentence_list.append(last_sentence) | |
print("Last Sentence: ", last_sentence) | |
yield (last_sentence, history) | |
except: | |
print("ERROR on last sentence history is :", history) | |