Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import re | |
import os | |
import base64 | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
import math | |
# Realistic placeholder dataframe (added Abstract field) | |
data = { | |
"Title": [ | |
"The impact of climate change on biodiversity", | |
"Deep learning algorithms for image classification", | |
"Quantum computing and its applications in cryptography", | |
"Machine learning approaches for natural language processing", | |
"Modeling the effects of climate change on agricultural production", | |
"Graph neural networks for social network analysis", | |
"Biodiversity conservation strategies in the face of climate change", | |
"Exploring the potential of quantum computing in drug discovery", | |
"A survey of reinforcement learning algorithms and applications", | |
"The role of artificial intelligence in combating climate change", | |
]*10, | |
"Authors": [ | |
"Smith, J.; Doe, J.; Brown, M.", | |
"Garcia, L.; Johnson, N.; Patel, K.", | |
"Kim, D.; Taylor, R.; Yamamoto, Y.", | |
"Roberts, A.; Jackson, T.; Davis, M.", | |
"Turner, B.; Adams, C.; Evans, D.", | |
"Baker, E.; Stewart, F.; Roberts, G.", | |
"Nelson, H.; Mitchell, I.; Cooper, J.", | |
"Parker, K.; Lewis, L.; Jenkins, M.", | |
"Edwards, N.; Harrison, O.; Simmons, P.", | |
"Fisher, Q.; Grant, R.; Turner, S.", | |
]*10, | |
"Year": [2020, 2019, 2018, 2021, 2019, 2020, 2018, 2021, 2019, 2020]*10, | |
"Keywords": [ | |
"climate change, biodiversity, ecosystems", | |
"deep learning, image classification, convolutional neural networks", | |
"quantum computing, cryptography, Shor's algorithm", | |
"machine learning, natural language processing, text analysis", | |
"climate change, agriculture, crop modeling", | |
"graph neural networks, social network analysis, machine learning", | |
"biodiversity conservation, climate change, environmental management", | |
"quantum computing, drug discovery, computational chemistry", | |
"reinforcement learning, algorithms, applications", | |
"artificial intelligence, climate change, mitigation strategies", | |
]*10, | |
"Subject_Area": [ | |
"Environmental Science", | |
"Computer Science", | |
"Physics", | |
"Computer Science", | |
"Environmental Science", | |
"Computer Science", | |
"Environmental Science", | |
"Physics", | |
"Computer Science", | |
"Environmental Science", | |
]*10, | |
"Journal": [ | |
"Nature", | |
"IEEE Transactions on Pattern Analysis and Machine Intelligence", | |
"Physical Review Letters", | |
"Journal of Machine Learning Research", | |
"Agricultural Systems", | |
"IEEE Transactions on Neural Networks and Learning Systems", | |
"Conservation Biology", | |
"Journal of Chemical Information and Modeling", | |
"Neural Computing and Applications", | |
"Science", | |
]*10, | |
"Is_Open_Access": [True, False, True, False, True, False, True, False, True, False]*10, | |
"Abstract": [ | |
"This study analyzes the impact of climate change on biodiversity and ecosystem health...", | |
"We present novel deep learning algorithms for image classification using convolutional neural networks...", | |
"Quantum computing has the potential to revolutionize cryptography, and in this paper, we discuss...", | |
"Natural language processing is a growing field in machine learning, and in this review, we explore...", | |
"Climate change poses significant challenges to agriculture, and this paper investigates...", | |
"Graph neural networks have gained popularity in recent years for their ability to model complex...", | |
"Biodiversity conservation is crucial in the face of climate change, and this study outlines...", | |
"Quantum computing offers new opportunities for drug discovery, and in this paper, we analyze...", | |
"Reinforcement learning is a powerful machine learning paradigm, and in this survey, we...", | |
"Artificial intelligence has the potential to help combat climate change by providing new...", | |
]*10, | |
} | |
def rank_results(query, filtered_papers): | |
# Generate embeddings for user query and filtered paper abstracts | |
abstracts = [abstract for abstract in filtered_papers['Abstract']] | |
features = tokenizer([query for _ in range(len(abstracts))], abstracts, padding=True, truncation=True, return_tensors="pt") | |
with torch.no_grad(): | |
scores = model(**features).logits | |
# Rank papers based on similarity scores | |
filtered_papers['Similarity Score'] = scores.numpy() | |
ranked_papers = filtered_papers.sort_values(by='Similarity Score', ascending=False) | |
return ranked_papers | |
# Function to generate a download link for a PDF file | |
def generate_pdf_link(pdf_file_path, link_text): | |
with open(pdf_file_path, "rb") as f: | |
pdf_data = f.read() | |
b64_pdf_data = base64.b64encode(pdf_data).decode() | |
href = f'<a href="data:application/octet-stream;base64,{b64_pdf_data}" download="{os.path.basename(pdf_file_path)}">{link_text}</a>' | |
return href | |
# Function to filter papers based on user input | |
def filter_papers(papers,year_range, is_open_access, abstract_query): | |
if year_range: | |
papers = papers[(papers['Year'] >= year_range[0]) & (papers['Year'] <= year_range[1])] | |
if is_open_access is not None: | |
papers = papers[papers['Is_Open_Access'] == is_open_access] | |
return papers | |
# Function to perform complex boolean search | |
def complex_boolean_search(text, query): | |
query = re.sub(r'(?<=[A-Za-z0-9])\s+(?=[A-Za-z0-9])', 'AND', query) | |
query = re.sub(r'\b(AND|OR)\b', r'\\\1', query) | |
query = re.sub(r'(?<=\s)\bNOT\b(?=\s)', ' -', query) | |
query = re.sub(r'(?<=\b)\bNOT\b(?=\s)', '-', query) | |
try: | |
return bool(re.search(query, text, flags=re.IGNORECASE)) | |
except re.error: | |
return False | |
papers_df = pd.DataFrame(data) | |
if "model" not in locals(): | |
model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2') | |
tokenizer = AutoTokenizer.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2') | |
model.eval() | |
# Streamlit interface | |
st.set_page_config(page_title="Scientific Article Search", layout="wide") | |
hide_menu_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_menu_style, unsafe_allow_html=True) | |
# Add custom CSS to scale the sidebar | |
scale = 0.4 | |
custom_css = """ | |
<style> | |
.filterbar .sidebar-content {{ | |
transform: scale({scale}); | |
transform-origin: top left; | |
}} | |
</style>""" | |
st.markdown(custom_css, unsafe_allow_html=True) | |
page=1 | |
per_page=10 | |
title = "" | |
filtered_papers = papers_df | |
# Sidebar for filters | |
with st.sidebar: | |
st.header("Filters") | |
search_query= st.text_input("Query") | |
so = st.multiselect( | |
label='Search Over', | |
options=['Abstract','Everything','Authors'], | |
default=['Everything'], | |
help='Search and select multiple options from the dropdown menu') | |
sites = st.multiselect( | |
label='Search Over', | |
options=['OpenAlex','Google Scholar','Base Search','All Sites'], | |
default=['All Sites'], | |
help='Search and select multiple options from the dropdown menu') | |
year_range = st.slider("Year Range", min_value=1900, max_value=2022, value=(1990, 2022), step=1) | |
is_open_access = st.multiselect( | |
label='Open Access', | |
options=["All","Yes","No"], | |
default="All", | |
help='Search and select multiple options from the dropdown menu') | |
# Convert is_open_access to boolean or None | |
if is_open_access == "Yes": | |
is_open_access = True | |
elif is_open_access == "No": | |
is_open_access = False | |
else: | |
is_open_access = None | |
# Filter button | |
if st.button("Search"): | |
filtered_papers = filter_papers(papers_df, year_range, is_open_access,search_query) | |
else: | |
filtered_papers = papers_df # Empty dataframe | |
filtered_papers = rank_results(search_query, filtered_papers) | |
if not filtered_papers.empty: | |
# Pagination | |
no_pages = math.ceil(len(filtered_papers)/per_page) | |
# Generate pagination buttons | |
if no_pages == 1: | |
pagination_buttons = [] | |
elif no_pages == 2: | |
pagination_buttons = [st.button('1'), st.write('2'), ] | |
else: | |
pagination_buttons = [st.button(str(page-1) if page > 1 else '1'), | |
st.write(str(page)), | |
st.button(str(page+1) if page < no_pages else str(no_pages))] | |
# Display results with a more advanced look | |
col1, col2 = st.columns([3, 1]) | |
title, authors, year, journal = st.columns([5, 5, 2, 3]) | |
with title: | |
st.subheader("Title") | |
with year: | |
st.subheader("Year") | |
with journal: | |
st.subheader("Journal") | |
# Display paginated results | |
start_idx = (page - 1) * per_page | |
end_idx = start_idx + per_page | |
paginated_papers = filtered_papers.iloc[start_idx:end_idx] | |
for idx, paper in paginated_papers.iterrows(): | |
st.write("---") | |
title, authors, year, journal = st.columns([5, 5, 2, 3]) | |
with col1: | |
with title: | |
st.write(f"{paper['Title']}") | |
with authors: | |
st.write(f"{paper['Authors']}") | |
with year: | |
st.write(f"{paper['Year']}") | |
with journal: | |
st.write(f"{paper['Journal']}") | |
abstract = st.expander("Abstract") | |
abstract.write(f"{paper['Abstract']}") | |
with col2: | |
pdf_file_path = "/content/ADVS-6-1801195.pdf" # Replace with the actual path to the PDF file associated with the paper | |
# st.markdown(generate_pdf_link(pdf_file_path, "Show PDF"), unsafe_allow_html=True) | |
st.write("---") | |
# Display pagination buttons | |
per_page = st.selectbox("Results per page", [10, 20, 30], index=0) | |
pagination_bar = st.columns(3) | |
if no_pages > 1: | |
with pagination_bar[1]: | |
for button in pagination_buttons: | |
button | |
else: | |
st.header("No papers found.") | |