Spaces:
Runtime error
Runtime error
eljanmahammadli
commited on
Commit
·
fa3e7dd
1
Parent(s):
8c8c07f
#feat added simplest scholar mode
Browse files- app.py +59 -29
- google_search.py +3 -5
app.py
CHANGED
@@ -21,9 +21,9 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
21 |
from google.cloud import storage
|
22 |
|
23 |
if gr.NO_RELOAD:
|
24 |
-
from humanize import humanize_text, device
|
25 |
-
|
26 |
-
|
27 |
from utils import remove_special_characters, split_text_allow_complete_sentences_nltk
|
28 |
from google_search import google_search, months, domain_list, build_date
|
29 |
from ai_generate import generate, citations_to_html, remove_citations, display_cited_text, llm_wrapper
|
@@ -796,6 +796,24 @@ def save_humanizer_feedback_to_cloud_storage(data, humanizer_feedback):
|
|
796 |
gr.Warning("Nothing humanized to save yet!")
|
797 |
|
798 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
799 |
def generate_and_format(
|
800 |
input_role,
|
801 |
topic,
|
@@ -812,6 +830,7 @@ def generate_and_format(
|
|
812 |
num_examples,
|
813 |
conclusion_type,
|
814 |
google_search_check,
|
|
|
815 |
year_from,
|
816 |
month_from,
|
817 |
day_from,
|
@@ -838,14 +857,18 @@ def generate_and_format(
|
|
838 |
final_query = llm_wrapper(
|
839 |
input_role, topic, context, model="OpenAI GPT 4o", task_type="internet", temperature=0.7
|
840 |
)
|
841 |
-
if
|
842 |
-
|
843 |
-
final_query += " " + " OR ".join(
|
844 |
-
|
845 |
-
|
846 |
-
|
|
|
|
|
|
|
|
|
847 |
print(f"Google Search Query: {final_query}")
|
848 |
-
url_content = google_search(final_query, sorted_date, domains_to_include)
|
849 |
|
850 |
yt_content = {}
|
851 |
if yt_url:
|
@@ -1077,24 +1100,27 @@ with gr.Blocks(
|
|
1077 |
)
|
1078 |
with gr.Group(visible=google_default) as search_options:
|
1079 |
with gr.Row():
|
1080 |
-
|
1081 |
-
|
1082 |
-
|
1083 |
-
|
1084 |
-
|
1085 |
-
|
1086 |
-
|
1087 |
-
|
1088 |
-
|
1089 |
-
|
1090 |
-
|
1091 |
-
|
1092 |
-
|
1093 |
-
|
1094 |
-
|
1095 |
-
|
1096 |
-
|
1097 |
-
|
|
|
|
|
|
|
1098 |
with gr.Row():
|
1099 |
month_from = gr.Dropdown(
|
1100 |
choices=months,
|
@@ -1224,7 +1250,9 @@ with gr.Blocks(
|
|
1224 |
else:
|
1225 |
return gr.update(visible=False)
|
1226 |
|
1227 |
-
google_search_check.change(
|
|
|
|
|
1228 |
# ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
1229 |
# output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
|
1230 |
# ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
|
@@ -1257,6 +1285,7 @@ with gr.Blocks(
|
|
1257 |
# ai_generator,
|
1258 |
# input_api,
|
1259 |
google_search_check,
|
|
|
1260 |
year_from,
|
1261 |
month_from,
|
1262 |
day_from,
|
@@ -1293,6 +1322,7 @@ with gr.Blocks(
|
|
1293 |
# ai_generator,
|
1294 |
# input_api,
|
1295 |
google_search_check,
|
|
|
1296 |
year_from,
|
1297 |
month_from,
|
1298 |
day_from,
|
|
|
21 |
from google.cloud import storage
|
22 |
|
23 |
if gr.NO_RELOAD:
|
24 |
+
# from humanize import humanize_text, device
|
25 |
+
humanize_text = None
|
26 |
+
device = None
|
27 |
from utils import remove_special_characters, split_text_allow_complete_sentences_nltk
|
28 |
from google_search import google_search, months, domain_list, build_date
|
29 |
from ai_generate import generate, citations_to_html, remove_citations, display_cited_text, llm_wrapper
|
|
|
796 |
gr.Warning("Nothing humanized to save yet!")
|
797 |
|
798 |
|
799 |
+
scholar_urls = [
|
800 |
+
"arxiv.org",
|
801 |
+
"aclanthology.org",
|
802 |
+
"ieeexplore.ieee.org",
|
803 |
+
"researchgate.net",
|
804 |
+
# "scholar.google.com",
|
805 |
+
"springer.com",
|
806 |
+
# "sciencedirect.com", # 400
|
807 |
+
# "onlinelibrary.wiley.com", # 400
|
808 |
+
"jstor.org", # 400
|
809 |
+
"semanticscholar.org",
|
810 |
+
"biorxiv.org",
|
811 |
+
"medrxiv.org",
|
812 |
+
"ssrn.com",
|
813 |
+
"pubmed.ncbi.nlm.nih.gov",
|
814 |
+
"cochranelibrary.com",
|
815 |
+
]
|
816 |
+
|
817 |
def generate_and_format(
|
818 |
input_role,
|
819 |
topic,
|
|
|
830 |
num_examples,
|
831 |
conclusion_type,
|
832 |
google_search_check,
|
833 |
+
scholar_mode_check,
|
834 |
year_from,
|
835 |
month_from,
|
836 |
day_from,
|
|
|
857 |
final_query = llm_wrapper(
|
858 |
input_role, topic, context, model="OpenAI GPT 4o", task_type="internet", temperature=0.7
|
859 |
)
|
860 |
+
if scholar_mode_check:
|
861 |
+
scholar_site_queries = [f"site:{site.strip()}" for site in scholar_urls]
|
862 |
+
final_query += " " + " OR ".join(scholar_site_queries)
|
863 |
+
else:
|
864 |
+
if include_sites:
|
865 |
+
site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")]
|
866 |
+
final_query += " " + " OR ".join(site_queries)
|
867 |
+
if exclude_sites:
|
868 |
+
exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")]
|
869 |
+
final_query += " " + " ".join(exclude_queries)
|
870 |
print(f"Google Search Query: {final_query}")
|
871 |
+
url_content = google_search(final_query, sorted_date, domains_to_include, scholar_mode_check)
|
872 |
|
873 |
yt_content = {}
|
874 |
if yt_url:
|
|
|
1100 |
)
|
1101 |
with gr.Group(visible=google_default) as search_options:
|
1102 |
with gr.Row():
|
1103 |
+
scholar_mode_check = gr.Checkbox(label="Enable Scholar Mode", value=False)
|
1104 |
+
with gr.Group(visible=True) as site_options:
|
1105 |
+
with gr.Row():
|
1106 |
+
include_sites = gr.Textbox(
|
1107 |
+
label="Include Specific Websites",
|
1108 |
+
placeholder="Enter comma-separated keywords",
|
1109 |
+
elem_classes="input-highlight-yellow",
|
1110 |
+
)
|
1111 |
+
with gr.Row():
|
1112 |
+
exclude_sites = gr.Textbox(
|
1113 |
+
label="Exclude Specific Websites",
|
1114 |
+
placeholder="Enter comma-separated keywords",
|
1115 |
+
elem_classes="input-highlight-yellow",
|
1116 |
+
)
|
1117 |
+
with gr.Row():
|
1118 |
+
domains_to_include = gr.Dropdown(
|
1119 |
+
domain_list,
|
1120 |
+
value=domain_list,
|
1121 |
+
multiselect=True,
|
1122 |
+
label="Domains To Include",
|
1123 |
+
)
|
1124 |
with gr.Row():
|
1125 |
month_from = gr.Dropdown(
|
1126 |
choices=months,
|
|
|
1250 |
else:
|
1251 |
return gr.update(visible=False)
|
1252 |
|
1253 |
+
google_search_check.change(
|
1254 |
+
lambda toggle: gr.update(visible=toggle), inputs=google_search_check, outputs=search_options
|
1255 |
+
)
|
1256 |
# ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
1257 |
# output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
|
1258 |
# ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
|
|
|
1285 |
# ai_generator,
|
1286 |
# input_api,
|
1287 |
google_search_check,
|
1288 |
+
scholar_mode_check,
|
1289 |
year_from,
|
1290 |
month_from,
|
1291 |
day_from,
|
|
|
1322 |
# ai_generator,
|
1323 |
# input_api,
|
1324 |
google_search_check,
|
1325 |
+
scholar_mode_check,
|
1326 |
year_from,
|
1327 |
month_from,
|
1328 |
day_from,
|
google_search.py
CHANGED
@@ -193,14 +193,12 @@ def google_search_urls(
|
|
193 |
return url_list
|
194 |
|
195 |
|
196 |
-
def google_search(
|
197 |
-
topic,
|
198 |
-
sorted_date,
|
199 |
-
domains_to_include,
|
200 |
-
):
|
201 |
api_key = os.environ.get("GOOGLE_SEARCH_API_KEY")
|
202 |
cse_id = os.environ.get("GOOGLE_SEARCH_CSE_ID")
|
203 |
start_time = time.perf_counter()
|
|
|
|
|
204 |
url_list = google_search_urls(
|
205 |
topic,
|
206 |
sorted_date,
|
|
|
193 |
return url_list
|
194 |
|
195 |
|
196 |
+
def google_search(topic, sorted_date, domains_to_include, scholar_mode_check):
|
|
|
|
|
|
|
|
|
197 |
api_key = os.environ.get("GOOGLE_SEARCH_API_KEY")
|
198 |
cse_id = os.environ.get("GOOGLE_SEARCH_CSE_ID")
|
199 |
start_time = time.perf_counter()
|
200 |
+
if scholar_mode_check:
|
201 |
+
topic += " -filetype:pdf"
|
202 |
url_list = google_search_urls(
|
203 |
topic,
|
204 |
sorted_date,
|