import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer # Initialize the model and tokenizer cuda = "cuda:0" if torch.cuda.is_available() else "cpu" model = AutoModelForCausalLM.from_pretrained("goendalf666/salesGPT_v2", trust_remote_code=True).to(cuda) tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5") def interact_with_model(user_input): # Construct conversation text for the model conversation_text = ( "You are in the role of a Salesman. " "Here is a conversation: " f"Customer: {user_input} Salesman: " ) # Tokenize inputs inputs = tokenizer(conversation_text, return_tensors="pt").to(cuda) # Generate response outputs = model.generate(**inputs, max_length=512) response_text = tokenizer.batch_decode(outputs)[0] # Extract only the newly generated text new_text_start = len(conversation_text) new_generated_text = response_text[new_text_start:].strip() # Find where the next "Customer:" is, and truncate the text there end_index = new_generated_text.find("Customer:") if end_index != -1: new_generated_text = new_generated_text[:end_index].strip() # Ignore if the model puts "Salesman: " itself at the beginning if new_generated_text.startswith("Salesman:"): new_generated_text = new_generated_text[len("Salesman:"):].strip() # Return the model's response return new_generated_text # Create Gradio Interface and launch it iface = gr.Interface(fn=interact_with_model, inputs="text", outputs="text") iface.launch()