import streamlit as st from transformers import pipeline import requests from bs4 import BeautifulSoup SCITE_API_KEY = st.secrets["SCITE_API_KEY"] def remove_html(x): soup = BeautifulSoup(x, 'html.parser') text = soup.get_text() return text def search(term, limit=10): search = f"https://api.scite.ai/search?mode=citations&term={term}&limit={limit}&offset=0&user_slug=domenic-rosati-keW5&compute_aggregations=false" req = requests.get( search, headers={ 'Authorization': f'Bearer {SCITE_API_KEY}' } ) return ( remove_html('\n'.join(['\n'.join([cite['snippet'] for cite in doc['citations']]) for doc in req.json()['hits']])), [(doc['doi'], doc['citations'], doc['title']) for doc in req.json()['hits']] ) def find_source(text, docs): for doc in docs: if text in remove_html(doc[1][0]['snippet']): new_text = text for snip in remove_html(doc[1][0]['snippet']).split('.'): if text in snip: new_text = snip return { 'citation_statement': doc[1][0]['snippet'].replace('', '').replace('', ''), 'text': new_text, 'from': doc[1][0]['source'], 'supporting': doc[1][0]['target'], 'source_title': doc[2], 'source_link': f"https://scite.ai/reports/{doc[0]}" } return { 'citation_statement': '', 'text': text, 'from': '', 'supporting': '' } @st.experimental_singleton def init_models(): question_answerer = pipeline("question-answering", model='sultan/BioM-ELECTRA-Large-SQuAD2-BioASQ8B') return question_answerer qa_model = init_models() def card(title, context, score, link): return st.markdown(f"""

{context} [Score: {score}]
From {title}

""", unsafe_allow_html=True) st.title("Scientific Question Answering with Citations") st.write(""" Ask a scientific question and get an answer drawn from [scite.ai](https://scite.ai) corpus of over 1.1bn citation statements. Answers are linked to source documents containing citations where users can explore further evidence from scientific literature for the answer. """) st.markdown(""" """, unsafe_allow_html=True) def run_query(query): context, orig_docs = search(query) if not context.strip(): return st.markdown("""

Sorry... no results for that question! Try another.

""") results = [] model_results = qa_model(question=query, context=context, top_k=10) for result in model_results: support = find_source(result['answer'], orig_docs) results.append({ "answer": support['text'], "title": support['source_title'], "link": support['source_link'], "context": support['citation_statement'], "score": result['score'] }) sorted_result = sorted(results, key=lambda x: x['score'], reverse=True) sorted_result = list({ result['context']: result for result in sorted_result }.values()) for r in sorted_result: answer = r["answer"] ctx = r["context"].replace(answer, f"{answer}").replace('