import gradio as gr from huggingface_hub import InferenceClient # Expanded list of models that should run efficiently on Hugging Face and hardware MODEL_CHOICES = { "GPT-Neo 1.3B": "EleutherAI/gpt-neo-1.3B", # Highly efficient and creative "GPT-Neo 2.7B": "EleutherAI/gpt-neo-2.7B", # A bit larger but manageable "GPT-J 6B": "EleutherAI/gpt-j-6B", # Larger but still reasonable "DialoGPT Medium": "microsoft/DialoGPT-medium", # Fine-tuned for conversations "DialoGPT Large": "microsoft/DialoGPT-large", # Fine-tuned for conversations "GPT-2": "gpt2", # Lightweight and fast "GPT-2 Medium": "gpt2-medium", # Lightweight and faster option "Zephyr-7B Beta": "HuggingFaceH4/zephyr-7b-beta", # Large, high-quality model "BART Base": "facebook/bart-base", # Effective for text generation and summarization "T5 Small": "t5-small", # Fast and efficient } def get_model(model_name: str): # Select the model based on user choice return InferenceClient(MODEL_CHOICES.get(model_name, "EleutherAI/gpt-neo-1.3B")) # Default model current_client = get_model("GPT-Neo 1.3B") def filter_response(response: str) -> str: inappropriate_keywords = [ "love you", "cute", "i love u", "sweetheart", "baby", "porn", "flirt", "kiss", "hug", "freaky", "relationship", "personal" ] for word in inappropriate_keywords: if word.lower() in response.lower(): return "I'm here to help with your questions in a professional manner." return response def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, model_name, ): global current_client current_client = get_model(model_name) # Update the model based on selection messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in current_client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token # Apply the filter before returning the response response = filter_response(response) yield response # Adding the model chooser input model_dropdown = gr.Dropdown( choices=list(MODEL_CHOICES.keys()), value="GPT-Neo 1.3B", # Default model label="Choose Model" ) demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a helpful assistant and should remain professional and polite at all times.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), model_dropdown, # Include model selection ], ) if __name__ == "__main__": demo.launch()