File size: 2,316 Bytes
2cf870b
5bcbfd6
2cf870b
 
 
5bcbfd6
 
2cf870b
 
 
 
 
 
 
 
 
 
 
 
 
 
5c10f04
2cf870b
 
 
 
 
 
dc42e31
7e91b9c
dc42e31
7e91b9c
2cf870b
 
5bcbfd6
2cf870b
 
 
 
 
5bcbfd6
2cf870b
5bcbfd6
 
2cf870b
 
 
 
 
 
 
 
 
 
 
 
37603bc
2cf870b
5bcbfd6
2cf870b
 
 
 
 
 
 
5bcbfd6
2cf870b
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from sentence_transformers import SentenceTransformer
from utils_st import load_models,load_data,clean_whitespace
import streamlit as st


embed_model, reranker = load_models()
hybrid_retriever = load_data(embed_model)


def stream(reranked_nodes,text_size=700):
    nodes_dict={}
    for nod in reranked_nodes:
        file_name = nod.metadata["file_name"]
        if file_name not in nodes_dict:  nodes_dict[file_name]=[]
        nodes_dict[file_name].append(nod)
    for rank,i_di in enumerate(nodes_dict):
        i_di=nodes_dict[i_di]
        title = i_di[0].metadata['title']
        file_name = i_di[0].metadata['file_name']
        summary = i_di[0].metadata['text']
        url = i_di[0].metadata['doc_url'] 
        st.write(f"**Rank {rank+1}:** {file_name} ")
        st.write(f"- Tittle: [{title}](%s)"% url)
        #st.write("check out this [link](%s)" % url)
        with st.expander(f"Summary"):
            st.write(f"{summary}")
        with st.expander(f"Extra Text(s) "):
            for n_extra,t in enumerate(i_di[:5]):
                page_n=t.metadata['page_label'] if "page_label" in t.metadata else 'Unknown'
                st.write(f"- **Found in page n°{page_n}** - Extra text **n°{n_extra+1}:**")
                st.write(f"\t     {clean_whitespace(t.text[:text_size])}...")
                st.markdown("""---""")
        st.markdown("""---""")



# Function to perform search and return sorted documents
def perform_search(query):
    if query:
        retrieved_nodes = hybrid_retriever.retrieve(query)
        reranked_nodes = reranker.predict(
            retrieved_nodes,
            query_bundle=query
            )
        return reranked_nodes
    else:
        return []

# Main Streamlit app
def main():
    #st.sidebar.title("Pagination")
    #st.sidebar.write( f"Totale Pages :{total_pages}")
   
    st.title("Information Retrieval System")
    query = st.text_input("Enter your question:")

    if st.button("Search"):
        sorted_docs = perform_search(query)
        st.session_state.sorted_docs = sorted_docs
        
    else:
        sorted_docs = st.session_state.get("sorted_docs", [])



    if sorted_docs:
        stream(sorted_docs,700)
        #st.write(f"Current Page Number: {page_number}")
        



# Run the app
if __name__ == "__main__":
    main()