intervention-demo-finetuned

Sleeping

File size: 1,986 Bytes

e3dce0b
 
 
 
6869534
e3dce0b
 
 
 
aaae591
6869534
e3dce0b
6869534
e3dce0b
 
 
 
 
 
 
 
599d40b
 
 
 
0f4c433
e3dce0b
7eb73dd
e3dce0b

import os
import gradio as gr
import torch
import spaces
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer if a GPU is available
if torch.cuda.is_available():
    model_id = "allenai/OLMo-7B-hf"
    adapters_name = "yilunzhao/olmo-finetuned"
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
    model = PeftModel.from_pretrained(model, adapters_name)
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
else:
    raise EnvironmentError("CUDA device not available. Please run on a GPU-enabled environment.")

# Basic function to generate response based on passage and question
@spaces.GPU
def generate_response(passage: str, question: str) -> str:
    # Prepare the input text by combining the passage and question
    chat = [{"role": "user", "content": f"Passage: {passage}\nQuestion: {question}"}]

    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
    response = model.generate(input_ids=inputs.to(model.device), max_new_tokens=100)
    
    response = tokenizer.batch_decode(response, skip_special_tokens=True)[0].split("<|assistant|>")[-1].strip()
    
    
    return response


# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Passage and Question Response Generator")
    
    passage_input = gr.Textbox(label="Passage", placeholder="Enter the passage here", lines=5)
    question_input = gr.Textbox(label="Question", placeholder="Enter the question here", lines=2)
    
    output_box = gr.Textbox(label="Response", placeholder="Model's response will appear here")
    
    submit_button = gr.Button("Generate Response")
    submit_button.click(fn=generate_response, inputs=[passage_input, question_input], outputs=output_box)

# Run the app
if __name__ == "__main__":
    demo.launch()