File size: 2,458 Bytes
6edbbf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -*- coding: utf-8 -*-
"""BioGPT-QA-PubMedQA-BioGPT-Large.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1grmKyUETABgEsC3hO7CKgnSG1JGBbmHV
"""


from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import torch
import re

tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large-PubMedQA")
model = AutoModelForCausalLM.from_pretrained("microsoft/BioGPT-Large-PubMedQA")

# Check if GPU is available and move model to GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    model.to(device)
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

def answer_bio_question(question, context):
  """Answers a biological question using BioGPT-QA-PubMedQA-BioGPT-Large.

  Args:
      question: The question to be answered.
      context: The context or passage containing the answer (concatenated with the question).

  Returns:
      The answer extracted from the context based on the question.
  """
  try:
    # Concatenate question and context with a separator
    input_ids = tokenizer(question + " [SEP] " + context, return_tensors="pt").to(device)

    # Move the input to the chosen device (GPU or CPU)
    input_ids = input_ids.to(device)

    outputs = model.generate(**input_ids,
                                    max_new_tokens=1024
                             ,
                                    num_beams=1,
                                    early_stopping=False,
                                    do_sample=False,
                                    )


    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(answer)
    segs = re.search(r"the answer to the question given the context is(.*)", answer)
    ans = "unknown"
    if segs is not None:
        segs = segs.groups()
        ans = segs[0].strip()

    return "The answer to this question is " + ans
  except Exception as e:
    print(f"Error during question answering: {e}")
    return "An error occurred during question answering. Please try again."



iface = gr.Interface(
    fn=answer_bio_question,
    inputs=[
        gr.Textbox(label="Question", lines=2),
        gr.Textbox(label="Context (Passage)", lines=5),
    ],
    outputs="textbox",
    title="BioGPT-QA: Answer Biological Questions",
)

# Launch the Gradio interface
iface.launch( debug=True, share=True)