Spaces:

shrivarshan
/

ai-retrieval

Sleeping

File size: 4,589 Bytes

51cb013
bbc2820
 
b9b4349
1bafbdd
bbc2820
 
 
 
 
1bafbdd
bbc2820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bafbdd
 
bbc2820
 
1bafbdd
 
bbc2820
 
1bafbdd
 
 
bbc2820
 
 
 
 
 
 
 
 
 
 
4cd415c
bbc2820


import streamlit as st
import requests
from transformers import pipeline
#import spacy

# Initialize the summarizer pipeline using Hugging Face Transformers
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Load spaCy model
#nlp = spacy.load("en_core_web_sm")

# Function to perform search using Google Custom Search API
def perform_search(query):
    api_key = 'AIzaSyAgKac39wfstboizc1StYGjqlT2rdQqVQ4'
    cx = "7394b4ca2ca1040ef"
    search_url = f"https://www.googleapis.com/customsearch/v1?q={query}&key={api_key}&cx={cx}"
    response = requests.get(search_url)
    return response.json()

# Function to summarize the overall combined content (make it longer)
def summarize_overall_content(content):
    if len(content) > 3000:  # Summarize up to 3000 characters for a larger summary
        content = content[:3000]
    summary = summarizer(content, max_length=300, min_length=100, do_sample=False)  # Larger overall summary
    return summary[0]['summary_text']

# Function to summarize individual search results (keep shorter)
def summarize_individual_content(content):
    if len(content) > 1000:  # Summarize first 1000 characters for brevity
        content = content[:1000]
    summary = summarizer(content, max_length=50, min_length=30, do_sample=False)  # Shorter summary
    return summary[0]['summary_text']

# Function to rank search results based on custom criteria
def rank_sources(results):
    # For now, assume sources are ranked by default order from API
    return results

# Function to extract related topics using spaCy
def extract_related_topics(query_list):
    #combined_query = " ".join(query_list)
    #doc = nlp(combined_query)
    
    # Extract keywords or named entities
    #keywords = [token.text for token in doc if token.is_alpha and not token.is_stop]
    #entities = [ent.text for ent in doc.ents]
    
    # Combine and deduplicate keywords and entities
    #related_topics = list(set(keywords + entities))
    #related_topics.insert(0,"Deep Learning")
    return ["Machine","AI","GenAI"]  # Limit to 3 related topics

# Function to display search results and summaries
def display_results(query):
    st.write(f"Searching for: {query}")
    
    # Perform search and get results
    search_results = perform_search(query)
    
    # Extract relevant items from search results
    if 'items' in search_results:
        ranked_results = rank_sources(search_results['items'])
        ranked_results=ranked_results[:3]
        
        # Overall summary (bigger)
        st.write("### Overall Summary:")
        combined_content = " ".join([item['snippet'] for item in ranked_results])
        overall_summary = summarize_overall_content(combined_content)  # Use larger summary function
        st.write(overall_summary)
        
        # Individual results (shorter)
        st.write("### Individual Results:")
        for item in ranked_results:
            st.write(f"**[{item['title']}]({item['link']})**")
            st.write(summarize_individual_content(item['snippet']))  # Use shorter summary function
            st.write("---")
    else:
        st.write("No results found.")

# Main Streamlit App UI
st.title("AI-Powered Information Retrieval and Summarization")

# Initialize query list to store search queries
if 'querylist' not in st.session_state:
    st.session_state.querylist = []

# Search input by user
query = st.text_input("Enter your search query:")

# If query is provided, display results and update query list
if query:
    st.session_state.querylist.append(query)
    display_results(query)
    
    # Generate related topics based on query list
    related_topics = extract_related_topics(st.session_state.querylist)
    
    st.write("### Related Topics:")
    for topic in related_topics:
        st.write(f"- **[{topic}]({requests.utils.requote_uri(f'https://www.google.com/search?q={topic}')})**")

# Trending Topics Section with clickable links
st.sidebar.title("Trending Topics")
trending_topics = ["AI", "Machine Learning", "Sustainability", "Technology Trends"]
for idx, topic in enumerate(trending_topics):
    if st.sidebar.button(topic, key=f'topic_button_{idx}'):
        query = topic  # Automatically search for this topic when clicked

# Feedback Section (Visible after results)
if query or any(st.sidebar.button(topic) for topic in trending_topics):
    st.write("### Feedback")
    feedback = st.radio("Was this summary helpful?", ["Yes", "No"])
    if feedback == "Yes":
        st.write("Thank you for your feedback!")
    else:
        st.write("We will try to improve!")