import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Replace with your model name #MODEL_NAME = "ssirikon/Gemma7b-bnb-Unsloth" #MODEL_NAME = "unsloth/gemma-7b-bnb-4bit" MODEL_NAME = "unsloth/mistral-7b-bnb-4bit" # Load the model and tokenizer model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto", torch_dtype=torch.float16, load_in_4bit=True, # Load the model in 4-bit precision # Removed the unsupported argument ) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # **Change 1: Set `llm_int8_skip_modules` to avoid deep copy** #model.quantization_config.llm_int8_skip_modules = ['lm_head'] # Create a pipeline for text generation generator = pipeline( task="summarization", model=model, tokenizer=tokenizer, max_new_tokens=50, # Adjust as needed do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, ) def generate_text(email): result = generator("Generate a subject line for the following email.\n"+email) return result[0]["generated_text"] # Create a Gradio interface demo = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=5, label="Enter your Email here:"), outputs=gr.Textbox(label="Generated Subject"), title="Email Subject Generation demo", description="Enter an email and let the model generate the subject for you!", ) demo.launch(debug=True)