import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained("TuringsSolutions/Gemma2LegalEdition", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("TuringsSolutions/Gemma2LegalEdition", trust_remote_code=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def predict(prompt, temperature, max_tokens): inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Create Gradio interface iface = gr.Interface( fn=predict, inputs=[ gr.Textbox(lines=2, placeholder="Enter your prompt here..."), gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature"), gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Number of Output Tokens") ], outputs="text", title="Phi3 Law Case Management Model", description="A model to assist with law case management. Adjust the temperature and number of output tokens as needed." ) # Launch the Gradio app iface.launch()