Spaces:
Running
Running
ragv1
#2
by
psv901
- opened
- app-memora.py +0 -171
- app-news-content.py +0 -105
- app.py +136 -21
- rag_sec/news_content_generator.py +0 -81
- rag_sec/requirements.txt +0 -1
- requirements.txt +1 -5
app-memora.py
DELETED
@@ -1,171 +0,0 @@
|
|
1 |
-
from pathlib import Path
|
2 |
-
|
3 |
-
import streamlit as st
|
4 |
-
from googlesearch import search
|
5 |
-
import pandas as pd
|
6 |
-
import os
|
7 |
-
from rag_sec.document_search_system import DocumentSearchSystem
|
8 |
-
from chainguard.blockchain_logger import BlockchainLogger
|
9 |
-
from PIL import Image
|
10 |
-
from itertools import cycle
|
11 |
-
|
12 |
-
# Blockchain Logger
|
13 |
-
blockchain_logger = BlockchainLogger()
|
14 |
-
|
15 |
-
# Directory for storing uploaded files
|
16 |
-
UPLOAD_DIR = "uploaded_files"
|
17 |
-
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
18 |
-
|
19 |
-
# Initialize DocumentSearchSystem
|
20 |
-
@st.cache_resource
|
21 |
-
def initialize_system():
|
22 |
-
"""Initialize the DocumentSearchSystem and load documents."""
|
23 |
-
system = DocumentSearchSystem(
|
24 |
-
neo4j_uri="neo4j+s://0ca71b10.databases.neo4j.io",
|
25 |
-
neo4j_user="neo4j",
|
26 |
-
neo4j_password="HwGDOxyGS1-79nLeTiX5bx5ohoFSpvHCmTv8IRgt-lY"
|
27 |
-
)
|
28 |
-
system.retriever.load_documents()
|
29 |
-
return system
|
30 |
-
|
31 |
-
# Initialize the system
|
32 |
-
system = initialize_system()
|
33 |
-
|
34 |
-
st.title("Memora: Secure File Upload and Search with Blockchain & Neo4j")
|
35 |
-
st.subheader("Personalized news and global updates at your fingertips")
|
36 |
-
# File Upload Section
|
37 |
-
uploaded_files = st.file_uploader("Upload your files", accept_multiple_files=True, type=['jpg', 'jpeg', 'png', 'mp4', 'avi'])
|
38 |
-
|
39 |
-
if uploaded_files:
|
40 |
-
for uploaded_file in uploaded_files:
|
41 |
-
# Save file locally
|
42 |
-
file_path = os.path.join(UPLOAD_DIR, uploaded_file.name)
|
43 |
-
with open(file_path, "wb") as f:
|
44 |
-
f.write(uploaded_file.getbuffer())
|
45 |
-
st.success(f"File saved locally: {file_path}")
|
46 |
-
|
47 |
-
# Display uploaded file details
|
48 |
-
if uploaded_file.type.startswith('image'):
|
49 |
-
image = Image.open(uploaded_file)
|
50 |
-
st.image(image, caption=uploaded_file.name, use_column_width=True)
|
51 |
-
|
52 |
-
# Metadata Input
|
53 |
-
album = st.text_input(f"Album for {uploaded_file.name}", "Default Album")
|
54 |
-
tags = st.text_input(f"Tags for {uploaded_file.name} (comma-separated)", "")
|
55 |
-
|
56 |
-
# Log Metadata and Transaction
|
57 |
-
if st.button(f"Log Metadata for {uploaded_file.name}"):
|
58 |
-
metadata = {"file_name": uploaded_file.name, "tags": tags.split(','), "album": album}
|
59 |
-
blockchain_details = blockchain_logger.log_data(metadata)
|
60 |
-
blockchain_hash = blockchain_details.get("block_hash", "N/A")
|
61 |
-
|
62 |
-
# Use Neo4jHandler from DocumentSearchSystem to log the transaction
|
63 |
-
system.neo4j_handler.log_relationships(uploaded_file.name, tags, blockchain_hash, [album])
|
64 |
-
st.write(f"Metadata logged successfully! Blockchain Details: {blockchain_details}")
|
65 |
-
|
66 |
-
# Blockchain Integrity Validation
|
67 |
-
if st.button("Validate Blockchain Integrity"):
|
68 |
-
is_valid = blockchain_logger.is_blockchain_valid()
|
69 |
-
st.write("Blockchain Integrity:", "Valid ✅" if is_valid else "Invalid ❌")
|
70 |
-
|
71 |
-
# Document Search Section
|
72 |
-
st.subheader("Search Documents")
|
73 |
-
|
74 |
-
# Google Search: User-Specific News
|
75 |
-
st.subheader("1. Latest News About You")
|
76 |
-
user_name = st.text_input("Enter your name or handle to search for recent news", value="Talex Maxim")
|
77 |
-
|
78 |
-
if st.button("Search News About Me"):
|
79 |
-
if user_name:
|
80 |
-
st.write(f"Searching Google for news about **{user_name}**...")
|
81 |
-
try:
|
82 |
-
results = list(search(user_name, num_results=5))
|
83 |
-
if results:
|
84 |
-
st.success(f"Top {len(results)} results for '{user_name}':")
|
85 |
-
user_news_data = {"URL": results}
|
86 |
-
df_user_news = pd.DataFrame(user_news_data)
|
87 |
-
st.dataframe(df_user_news)
|
88 |
-
else:
|
89 |
-
st.warning("No recent news found about you.")
|
90 |
-
except Exception as e:
|
91 |
-
st.error(f"An error occurred during the search: {str(e)}")
|
92 |
-
else:
|
93 |
-
st.warning("Please enter your name or handle to search.")
|
94 |
-
|
95 |
-
# Google Search: Global News Categories
|
96 |
-
categories = ["Technology", "Sports", "Politics", "Entertainment", "Science"]
|
97 |
-
|
98 |
-
st.title("Global News Insights")
|
99 |
-
|
100 |
-
# News Results Dictionary
|
101 |
-
news_results = {}
|
102 |
-
|
103 |
-
try:
|
104 |
-
# Fetch News for Each Category
|
105 |
-
for category in categories:
|
106 |
-
try:
|
107 |
-
news_results[category] = list(search(f"latest {category} news", num_results=3))
|
108 |
-
except Exception as e:
|
109 |
-
news_results[category] = [f"Error fetching news: {str(e)}"]
|
110 |
-
|
111 |
-
# Display Results with Styled Buttons
|
112 |
-
for category, articles in news_results.items():
|
113 |
-
st.subheader(f"{category} News")
|
114 |
-
cols = st.columns(3) # Create 3 columns for the layout
|
115 |
-
|
116 |
-
if articles and "Error fetching news" not in articles[0]:
|
117 |
-
for idx, article in enumerate(articles):
|
118 |
-
with cols[idx % 3]: # Cycle through columns
|
119 |
-
st.markdown(
|
120 |
-
f"""
|
121 |
-
<div style="padding: 10px; border: 1px solid #ccc; border-radius: 5px; margin: 10px; text-align: center;">
|
122 |
-
<a href="{article}" target="_blank" style="text-decoration: none;">
|
123 |
-
<button style="background-color: #c4ccc8; color: white; border: none; padding: 10px 20px; text-align: center; display: inline-block; font-size: 16px; border-radius: 5px;">
|
124 |
-
{category}-{idx + 1}
|
125 |
-
</button>
|
126 |
-
</a>
|
127 |
-
</div>
|
128 |
-
""",
|
129 |
-
unsafe_allow_html=True,
|
130 |
-
)
|
131 |
-
else:
|
132 |
-
st.warning(f"Could not fetch news for **{category}**.")
|
133 |
-
except Exception as e:
|
134 |
-
st.error(f"An unexpected error occurred: {str(e)}")
|
135 |
-
|
136 |
-
|
137 |
-
# # Display results
|
138 |
-
# for category, articles in news_results.items():
|
139 |
-
# st.write(f"### Top News in {category}:")
|
140 |
-
# for idx, article in enumerate(articles, start=1):
|
141 |
-
# st.write(f"{idx}. [Read here]({article})")
|
142 |
-
# except Exception as e:
|
143 |
-
# st.error(f"An error occurred while fetching global news: {str(e)}")
|
144 |
-
|
145 |
-
# Document Search
|
146 |
-
st.subheader("3. Search Documents")
|
147 |
-
query = st.text_input("Enter your query (e.g., 'sports news', 'machine learning')")
|
148 |
-
|
149 |
-
if st.button("Search Documents"):
|
150 |
-
if query:
|
151 |
-
result = system.process_query(query)
|
152 |
-
if result["status"] == "success":
|
153 |
-
st.success(f"Query processed successfully!")
|
154 |
-
st.write("### Query Response:")
|
155 |
-
st.write(result["response"])
|
156 |
-
st.write("### Retrieved Documents:")
|
157 |
-
for idx, doc in enumerate(result["retrieved_documents"], start=1):
|
158 |
-
st.write(f"**Document {idx}:**")
|
159 |
-
st.write(doc[:500]) # Display the first 500 characters
|
160 |
-
st.write("### Blockchain Details:")
|
161 |
-
st.json(result["blockchain_details"])
|
162 |
-
elif result["status"] == "no_results":
|
163 |
-
st.warning("No relevant documents found for your query.")
|
164 |
-
elif result["status"] == "rejected":
|
165 |
-
st.error(result["message"])
|
166 |
-
else:
|
167 |
-
st.warning("Please enter a query to search.")
|
168 |
-
|
169 |
-
# Debugging Section
|
170 |
-
if st.checkbox("Show Debug Information"):
|
171 |
-
st.write(f"Total documents loaded: {len(system.retriever.documents)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app-news-content.py
DELETED
@@ -1,105 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import requests
|
3 |
-
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
4 |
-
from collections import OrderedDict
|
5 |
-
|
6 |
-
# Load Models
|
7 |
-
@st.cache_resource
|
8 |
-
def load_models():
|
9 |
-
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
10 |
-
model = T5ForConditionalGeneration.from_pretrained("t5-small")
|
11 |
-
return tokenizer, model
|
12 |
-
|
13 |
-
t5_tokenizer, t5_model = load_models()
|
14 |
-
|
15 |
-
# API Key for NewsAPI
|
16 |
-
NEWS_API_KEY = "66db8e116ae24c49beef53e8b879600a"
|
17 |
-
|
18 |
-
# Fetch news articles based on the user query
|
19 |
-
@st.cache_data
|
20 |
-
def fetch_news(user_query):
|
21 |
-
NEWS_API_URL = "https://newsapi.org/v2/everything"
|
22 |
-
params = {
|
23 |
-
'q': user_query,
|
24 |
-
'apiKey': NEWS_API_KEY,
|
25 |
-
'language': 'en',
|
26 |
-
'pageSize': 10, # Fetch 10 articles
|
27 |
-
'sortBy': 'relevance',
|
28 |
-
}
|
29 |
-
response = requests.get(NEWS_API_URL, params=params)
|
30 |
-
if response.status_code == 200:
|
31 |
-
articles = response.json().get('articles', [])
|
32 |
-
return [
|
33 |
-
{
|
34 |
-
'title': article.get('title', 'No Title'),
|
35 |
-
'description': article.get('description', 'No Description')
|
36 |
-
}
|
37 |
-
for article in articles if article.get('description')
|
38 |
-
]
|
39 |
-
return []
|
40 |
-
|
41 |
-
# Summarize articles
|
42 |
-
def summarize_articles(articles):
|
43 |
-
summaries = []
|
44 |
-
for article in articles:
|
45 |
-
input_text = f"summarize: Title: {article['title']}. Description: {article['description']}"
|
46 |
-
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
|
47 |
-
outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
|
48 |
-
summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
49 |
-
summaries.append(remove_redundancy(summary))
|
50 |
-
return summaries
|
51 |
-
|
52 |
-
# Remove redundancy in summaries
|
53 |
-
def remove_redundancy(summary):
|
54 |
-
sentences = summary.split('. ')
|
55 |
-
seen = OrderedDict()
|
56 |
-
return '. '.join([seen.setdefault(s, s) for s in sentences if s not in seen])
|
57 |
-
|
58 |
-
# Generate catchy content based on all 10 summaries
|
59 |
-
def generate_catchy_content(summarized_content):
|
60 |
-
combined_prompt = f"""
|
61 |
-
Write a blog post based on these insights:
|
62 |
-
{', '.join(summarized_content)}
|
63 |
-
"""
|
64 |
-
inputs = t5_tokenizer.encode(combined_prompt, return_tensors="pt", max_length=512, truncation=True)
|
65 |
-
outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True)
|
66 |
-
return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
67 |
-
|
68 |
-
# Main App
|
69 |
-
st.title("News Summarizer & Blog Generator")
|
70 |
-
st.subheader("Enter a topic to fetch news, summarize and generate engaging content!")
|
71 |
-
|
72 |
-
# Query Input
|
73 |
-
user_query = st.text_input("Enter a query (e.g., 'AI trends', 'Climate change impact'):")
|
74 |
-
|
75 |
-
if st.button("Fetch, Summarize and Generate"):
|
76 |
-
if user_query:
|
77 |
-
st.info(f"Fetching articles related to: {user_query}")
|
78 |
-
with st.spinner("Fetching news articles..."):
|
79 |
-
articles = fetch_news(user_query)
|
80 |
-
if articles:
|
81 |
-
st.success(f"Fetched {len(articles)} articles!")
|
82 |
-
|
83 |
-
# Display only the first 4 articles
|
84 |
-
st.subheader("Fetched Articles")
|
85 |
-
for i, article in enumerate(articles[:4], 1):
|
86 |
-
st.write(f"**Article {i}:** {article['title']}")
|
87 |
-
st.write(f"*Description:* {article['description']}")
|
88 |
-
|
89 |
-
# Summarize All Articles
|
90 |
-
st.info("Summarizing articles...")
|
91 |
-
summaries = summarize_articles(articles) # Summarize all 10 articles
|
92 |
-
st.subheader("Summarized Articles")
|
93 |
-
for i, summary in enumerate(summaries[:4], 1): # Display summaries for first 4 articles
|
94 |
-
st.write(f"**Summary {i}:** {summary}")
|
95 |
-
|
96 |
-
# Generate Blog Post
|
97 |
-
st.info("Generating blog post...")
|
98 |
-
generated_content = generate_catchy_content(summaries) # Use all 10 summaries
|
99 |
-
st.subheader("Generated Blog Post")
|
100 |
-
st.write(generated_content)
|
101 |
-
|
102 |
-
else:
|
103 |
-
st.warning("No articles found. Try a different query.")
|
104 |
-
else:
|
105 |
-
st.error("Please enter a query to proceed!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,28 +1,143 @@
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
#
|
5 |
-
|
6 |
-
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
run_script(SECOND_APP_PATH)
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
import streamlit as st
|
4 |
+
from googlesearch import search
|
5 |
+
import pandas as pd
|
6 |
+
import os
|
7 |
+
from rag_sec.document_search_system import DocumentSearchSystem
|
8 |
+
from chainguard.blockchain_logger import BlockchainLogger
|
9 |
+
from PIL import Image
|
10 |
+
|
11 |
+
# Blockchain Logger
|
12 |
+
blockchain_logger = BlockchainLogger()
|
13 |
+
|
14 |
+
# Directory for storing uploaded files
|
15 |
+
UPLOAD_DIR = "uploaded_files"
|
16 |
+
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
17 |
+
|
18 |
+
# Initialize DocumentSearchSystem
|
19 |
+
@st.cache_resource
|
20 |
+
def initialize_system():
|
21 |
+
"""Initialize the DocumentSearchSystem and load documents."""
|
22 |
+
system = DocumentSearchSystem(
|
23 |
+
neo4j_uri="neo4j+s://0ca71b10.databases.neo4j.io",
|
24 |
+
neo4j_user="neo4j",
|
25 |
+
neo4j_password="HwGDOxyGS1-79nLeTiX5bx5ohoFSpvHCmTv8IRgt-lY"
|
26 |
+
)
|
27 |
+
system.retriever.load_documents()
|
28 |
+
return system
|
29 |
+
|
30 |
+
# Initialize the system
|
31 |
+
system = initialize_system()
|
32 |
+
|
33 |
+
st.title("Memora: Secure File Upload and Search with Blockchain & Neo4j")
|
34 |
+
st.subheader("Personalized news and global updates at your fingertips")
|
35 |
+
# File Upload Section
|
36 |
+
uploaded_files = st.file_uploader("Upload your files", accept_multiple_files=True, type=['jpg', 'jpeg', 'png', 'mp4', 'avi'])
|
37 |
+
|
38 |
+
if uploaded_files:
|
39 |
+
for uploaded_file in uploaded_files:
|
40 |
+
# Save file locally
|
41 |
+
file_path = os.path.join(UPLOAD_DIR, uploaded_file.name)
|
42 |
+
with open(file_path, "wb") as f:
|
43 |
+
f.write(uploaded_file.getbuffer())
|
44 |
+
st.success(f"File saved locally: {file_path}")
|
45 |
+
|
46 |
+
# Display uploaded file details
|
47 |
+
if uploaded_file.type.startswith('image'):
|
48 |
+
image = Image.open(uploaded_file)
|
49 |
+
st.image(image, caption=uploaded_file.name, use_column_width=True)
|
50 |
+
|
51 |
+
# Metadata Input
|
52 |
+
album = st.text_input(f"Album for {uploaded_file.name}", "Default Album")
|
53 |
+
tags = st.text_input(f"Tags for {uploaded_file.name} (comma-separated)", "")
|
54 |
+
|
55 |
+
# Log Metadata and Transaction
|
56 |
+
if st.button(f"Log Metadata for {uploaded_file.name}"):
|
57 |
+
metadata = {"file_name": uploaded_file.name, "tags": tags.split(','), "album": album}
|
58 |
+
blockchain_details = blockchain_logger.log_data(metadata)
|
59 |
+
blockchain_hash = blockchain_details.get("block_hash", "N/A")
|
60 |
+
|
61 |
+
# Use Neo4jHandler from DocumentSearchSystem to log the transaction
|
62 |
+
system.neo4j_handler.log_relationships(uploaded_file.name, tags, blockchain_hash, [album])
|
63 |
+
st.write(f"Metadata logged successfully! Blockchain Details: {blockchain_details}")
|
64 |
+
|
65 |
+
# Blockchain Integrity Validation
|
66 |
+
if st.button("Validate Blockchain Integrity"):
|
67 |
+
is_valid = blockchain_logger.is_blockchain_valid()
|
68 |
+
st.write("Blockchain Integrity:", "Valid ✅" if is_valid else "Invalid ❌")
|
69 |
+
|
70 |
+
# Document Search Section
|
71 |
+
st.subheader("Search Documents")
|
72 |
+
|
73 |
+
# Google Search: User-Specific News
|
74 |
+
st.subheader("1. Latest News About You")
|
75 |
+
user_name = st.text_input("Enter your name or handle to search for recent news", value="Talex Maxim")
|
76 |
+
|
77 |
+
if st.button("Search News About Me"):
|
78 |
+
if user_name:
|
79 |
+
st.write(f"Searching Google for news about **{user_name}**...")
|
80 |
+
try:
|
81 |
+
results = list(search(user_name, num_results=5))
|
82 |
+
if results:
|
83 |
+
st.success(f"Top {len(results)} results for '{user_name}':")
|
84 |
+
user_news_data = {"URL": results}
|
85 |
+
df_user_news = pd.DataFrame(user_news_data)
|
86 |
+
st.dataframe(df_user_news)
|
87 |
+
else:
|
88 |
+
st.warning("No recent news found about you.")
|
89 |
+
except Exception as e:
|
90 |
+
st.error(f"An error occurred during the search: {str(e)}")
|
91 |
+
else:
|
92 |
+
st.warning("Please enter your name or handle to search.")
|
93 |
|
94 |
+
# Google Search: Global News Categories
|
95 |
+
st.subheader("2. Global News Insights")
|
96 |
+
categories = ["Technology", "Sports", "Politics", "Entertainment", "Science"]
|
97 |
+
news_results = {}
|
98 |
|
99 |
+
if st.button("Fetch Global News"):
|
100 |
+
try:
|
101 |
+
for category in categories:
|
102 |
+
st.write(f"Fetching news for **{category}**...")
|
103 |
+
try:
|
104 |
+
category_results = list(search(f"latest {category} news", num_results=3))
|
105 |
+
news_results[category] = category_results
|
106 |
+
except Exception as e:
|
107 |
+
news_results[category] = [f"Error fetching news: {str(e)}"]
|
108 |
|
109 |
+
# Display results
|
110 |
+
for category, articles in news_results.items():
|
111 |
+
st.write(f"### Top News in {category}:")
|
112 |
+
for idx, article in enumerate(articles, start=1):
|
113 |
+
st.write(f"{idx}. [Read here]({article})")
|
114 |
+
except Exception as e:
|
115 |
+
st.error(f"An error occurred while fetching global news: {str(e)}")
|
116 |
|
117 |
+
# Document Search
|
118 |
+
st.subheader("3. Search Documents")
|
119 |
+
query = st.text_input("Enter your query (e.g., 'sports news', 'machine learning')")
|
|
|
120 |
|
121 |
+
if st.button("Search Documents"):
|
122 |
+
if query:
|
123 |
+
result = system.process_query(query)
|
124 |
+
if result["status"] == "success":
|
125 |
+
st.success(f"Query processed successfully!")
|
126 |
+
st.write("### Query Response:")
|
127 |
+
st.write(result["response"])
|
128 |
+
st.write("### Retrieved Documents:")
|
129 |
+
for idx, doc in enumerate(result["retrieved_documents"], start=1):
|
130 |
+
st.write(f"**Document {idx}:**")
|
131 |
+
st.write(doc[:500]) # Display the first 500 characters
|
132 |
+
st.write("### Blockchain Details:")
|
133 |
+
st.json(result["blockchain_details"])
|
134 |
+
elif result["status"] == "no_results":
|
135 |
+
st.warning("No relevant documents found for your query.")
|
136 |
+
elif result["status"] == "rejected":
|
137 |
+
st.error(result["message"])
|
138 |
+
else:
|
139 |
+
st.warning("Please enter a query to search.")
|
140 |
|
141 |
+
# Debugging Section
|
142 |
+
if st.checkbox("Show Debug Information"):
|
143 |
+
st.write(f"Total documents loaded: {len(system.retriever.documents)}")
|
rag_sec/news_content_generator.py
DELETED
@@ -1,81 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
|
3 |
-
|
4 |
-
# Dummy Data: Detailed news articles
|
5 |
-
news_articles = [
|
6 |
-
"""Artificial Intelligence (AI) is revolutionizing industries by enhancing automation and boosting operational efficiency.
|
7 |
-
Companies are leveraging AI to analyze data at scale, optimize logistics, and improve customer experiences.
|
8 |
-
One notable development is the integration of AI in healthcare, where it aids in diagnosing diseases and personalizing treatment plans.
|
9 |
-
Experts believe that these advancements will continue to transform how businesses operate in the coming years.""",
|
10 |
-
|
11 |
-
"""The field of AI has seen remarkable breakthroughs in natural language understanding, making it possible for machines to comprehend and generate human-like text.
|
12 |
-
Researchers are pushing boundaries with transformer-based architectures, enabling applications like conversational agents, language translation, and content creation.
|
13 |
-
These advancements are not only enhancing user interactions but also opening doors for innovative applications across various domains.""",
|
14 |
-
|
15 |
-
"""AI trends are shaping the future of technology and business by enabling smarter decision-making and predictive analytics.
|
16 |
-
Industries such as finance, manufacturing, and retail are adopting AI-driven solutions to optimize processes and gain a competitive edge.
|
17 |
-
As AI tools become more accessible, even small businesses are leveraging these technologies to scale operations and deliver better services to customers.""",
|
18 |
-
]
|
19 |
-
|
20 |
-
# Load T5 Model and Tokenizer
|
21 |
-
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
22 |
-
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
|
23 |
-
|
24 |
-
|
25 |
-
# Step 1: Input
|
26 |
-
def get_user_prompt():
|
27 |
-
return input("Enter your prompt (e.g., 'Create a LinkedIn post about AI trends'): ")
|
28 |
-
|
29 |
-
|
30 |
-
# Step 2: Summarization (Document Retrieval + Summarization)
|
31 |
-
def summarize_articles(articles):
|
32 |
-
summaries = []
|
33 |
-
for article in articles:
|
34 |
-
input_text = f"summarize: {article}"
|
35 |
-
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
|
36 |
-
outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4,
|
37 |
-
early_stopping=True)
|
38 |
-
summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
39 |
-
summaries.append(summary)
|
40 |
-
return summaries
|
41 |
-
|
42 |
-
|
43 |
-
# Step 3: Content Generation
|
44 |
-
def generate_content(prompt, summarized_content):
|
45 |
-
combined_prompt = f"{prompt}\n\nSummarized Insights:\n" + "\n".join(summarized_content)
|
46 |
-
input_text = f"generate: {combined_prompt}"
|
47 |
-
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
|
48 |
-
outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True)
|
49 |
-
generated_text = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
50 |
-
return generated_text
|
51 |
-
|
52 |
-
|
53 |
-
# Step 4: Logging with Chagu (Dummy Implementation)
|
54 |
-
def log_with_chagu(stage, content):
|
55 |
-
print(f"\n[CHAGU LOG - {stage}]:\n{content}\n")
|
56 |
-
|
57 |
-
|
58 |
-
# Step 5: Output
|
59 |
-
def display_output(content):
|
60 |
-
print("\nGenerated Content:")
|
61 |
-
print(content)
|
62 |
-
print("\nTransparency Report:")
|
63 |
-
print("All transformations logged in Chagu for auditability.")
|
64 |
-
|
65 |
-
|
66 |
-
# Main Workflow
|
67 |
-
def main():
|
68 |
-
user_prompt = get_user_prompt() # Properly take user input
|
69 |
-
log_with_chagu("Input Prompt", user_prompt)
|
70 |
-
|
71 |
-
summarized_content = summarize_articles(news_articles)
|
72 |
-
log_with_chagu("Summarized Articles", "\n".join(summarized_content))
|
73 |
-
|
74 |
-
final_output = generate_content(user_prompt, summarized_content)
|
75 |
-
log_with_chagu("Generated Content", final_output)
|
76 |
-
|
77 |
-
display_output(final_output)
|
78 |
-
|
79 |
-
|
80 |
-
if __name__ == "__main__":
|
81 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rag_sec/requirements.txt
CHANGED
@@ -5,4 +5,3 @@ scikit-learn
|
|
5 |
faiss-cpu
|
6 |
pandas
|
7 |
transformers
|
8 |
-
torch
|
|
|
5 |
faiss-cpu
|
6 |
pandas
|
7 |
transformers
|
|
requirements.txt
CHANGED
@@ -6,8 +6,4 @@ scikit-learn
|
|
6 |
faiss-cpu
|
7 |
pandas
|
8 |
transformers
|
9 |
-
googlesearch-python
|
10 |
-
torch
|
11 |
-
requests
|
12 |
-
pyspellchecker
|
13 |
-
sentencepiece
|
|
|
6 |
faiss-cpu
|
7 |
pandas
|
8 |
transformers
|
9 |
+
googlesearch-python
|
|
|
|
|
|
|
|