intervention-demo-finetuned

Sleeping

File size: 1,885 Bytes

e3dce0b
 
 
 
6869534
e3dce0b
 
 
 
aaae591
6869534
e3dce0b
6869534
e3dce0b
 
 
 
 
 
 
 
7f523d7
bf2f5f2
599d40b
bf2f5f2
e3dce0b
bf2f5f2
 
1f5730a
e3dce0b

import os
import gradio as gr
import torch
import spaces
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer if a GPU is available
if torch.cuda.is_available():
    model_id = "allenai/OLMo-7B-hf"
    adapters_name = "yilunzhao/olmo-finetuned"
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
    model = PeftModel.from_pretrained(model, adapters_name)
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
else:
    raise EnvironmentError("CUDA device not available. Please run on a GPU-enabled environment.")

# Basic function to generate response based on passage and question
@spaces.GPU
def generate_response(passage: str, question: str) -> str:
    # Prepare the input text by combining the passage and question
    message = [f"Passage: {passage}\nQuestion: {question}\nAnswer:"]
    inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False).to('cuda')

    response = model.generate(**inputs, max_new_tokens=100)
    
    response = tokenizer.batch_decode(response, skip_special_tokens=True)[0]

    response = response[len(message[0]):].strip().split('\n')[0]
    
    return response


# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Passage and Question Response Generator")
    
    passage_input = gr.Textbox(label="Passage", placeholder="Enter the passage here", lines=5)
    question_input = gr.Textbox(label="Question", placeholder="Enter the question here", lines=2)
    
    output_box = gr.Textbox(label="Response", placeholder="Model's response will appear here")
    
    submit_button = gr.Button("Generate Response")
    submit_button.click(fn=generate_response, inputs=[passage_input, question_input], outputs=output_box)

# Run the app
if __name__ == "__main__":
    demo.launch()