Spaces:
Runtime error
Runtime error
linktimecloud
commited on
Upload folder using huggingface_hub
Browse files
.env.tpl
CHANGED
@@ -3,4 +3,8 @@ SEARCH_API_KEY=your-google-search-api-key
|
|
3 |
SEARCH_PROJECT_KEY=your-google-cx-key
|
4 |
|
5 |
# right now we use OpenAI API
|
6 |
-
LLM_API_KEY=your-openai-api-key
|
|
|
|
|
|
|
|
|
|
3 |
SEARCH_PROJECT_KEY=your-google-cx-key
|
4 |
|
5 |
# right now we use OpenAI API
|
6 |
+
LLM_API_KEY=your-openai-api-key
|
7 |
+
|
8 |
+
# Run and share Gradio UI
|
9 |
+
RUN_GRADIO_UI=Faslse
|
10 |
+
SHARE_GRADIO_UI=False
|
README.md
CHANGED
@@ -11,6 +11,14 @@ sdk_version: 5.3.0
|
|
11 |
A single Python program to implement the search-extract-summarize flow, similar to AI search
|
12 |
engines such as Perplexity.
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
> [!NOTE]
|
15 |
> Our main goal is to illustrate the basic concepts of AI search engines with the raw constructs.
|
16 |
> Performance or scalability is not in the scope of this program.
|
@@ -64,17 +72,17 @@ Usage: ask.py [OPTIONS]
|
|
64 |
Search web for the query and summarize the results
|
65 |
|
66 |
Options:
|
67 |
-
-
|
68 |
-
|
69 |
-
target URL list and answer the query based
|
70 |
-
on the content [default:
|
71 |
-
instructions/links.txt]
|
72 |
-d, --date-restrict INTEGER Restrict search results to a specific date
|
73 |
range, default is no restriction
|
74 |
-s, --target-site TEXT Restrict search results to a specific site,
|
75 |
default is no restriction
|
76 |
--output-language TEXT Output language for the answer
|
77 |
--output-length INTEGER Output length for the answer
|
|
|
|
|
|
|
78 |
-m, --model-name TEXT Model name to use for inference
|
79 |
-l, --log-level [DEBUG|INFO|WARNING|ERROR]
|
80 |
Set the logging level [default: INFO]
|
@@ -87,7 +95,12 @@ Options:
|
|
87 |
- [OpenAI API](https://beta.openai.com/docs/api-reference/completions/create)
|
88 |
- [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/)
|
89 |
- [bs4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
|
90 |
-
- [
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
## Sample output
|
93 |
|
|
|
11 |
A single Python program to implement the search-extract-summarize flow, similar to AI search
|
12 |
engines such as Perplexity.
|
13 |
|
14 |
+
> [UPDATE]
|
15 |
+
>
|
16 |
+
> - 2024-10-22: add GradIO integation
|
17 |
+
> - 2024-10-21: use DuckDB for the vector search and use API for embedding
|
18 |
+
> - 2024-10-20: allow to specify a list of input urls
|
19 |
+
> - 2024-10-18: output-language and output-length parameters for LLM
|
20 |
+
> - 2024-10-18: date-restrict and target-site parameters for seach
|
21 |
+
|
22 |
> [!NOTE]
|
23 |
> Our main goal is to illustrate the basic concepts of AI search engines with the raw constructs.
|
24 |
> Performance or scalability is not in the scope of this program.
|
|
|
72 |
Search web for the query and summarize the results
|
73 |
|
74 |
Options:
|
75 |
+
--web-ui Launch the web interface
|
76 |
+
-q, --query TEXT Query to search
|
|
|
|
|
|
|
77 |
-d, --date-restrict INTEGER Restrict search results to a specific date
|
78 |
range, default is no restriction
|
79 |
-s, --target-site TEXT Restrict search results to a specific site,
|
80 |
default is no restriction
|
81 |
--output-language TEXT Output language for the answer
|
82 |
--output-length INTEGER Output length for the answer
|
83 |
+
--url-list-file TEXT Instead of doing web search, scrape the
|
84 |
+
target URL list and answer the query based
|
85 |
+
on the content
|
86 |
-m, --model-name TEXT Model name to use for inference
|
87 |
-l, --log-level [DEBUG|INFO|WARNING|ERROR]
|
88 |
Set the logging level [default: INFO]
|
|
|
95 |
- [OpenAI API](https://beta.openai.com/docs/api-reference/completions/create)
|
96 |
- [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/)
|
97 |
- [bs4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
|
98 |
+
- [DuckDB](https://github.com/duckdb/duckdb)
|
99 |
+
- [GradIO](https://grad.io)
|
100 |
+
|
101 |
+
## Screenshot for the GradIO integration
|
102 |
+
|
103 |
+
![image](https://github.com/user-attachments/assets/0483e6a2-75d7-4fbd-813f-bfa13839c836)
|
104 |
|
105 |
## Sample output
|
106 |
|
ask.py
CHANGED
@@ -410,8 +410,6 @@ def _run_query(
|
|
410 |
) -> str:
|
411 |
logger = get_logger(log_level)
|
412 |
|
413 |
-
load_dotenv(dotenv_path=default_env_file, override=False)
|
414 |
-
|
415 |
ask = Ask(logger=logger)
|
416 |
|
417 |
if url_list_str is None or url_list_str.strip() == "":
|
@@ -474,6 +472,7 @@ def launch_gradio(
|
|
474 |
url_list_str: str,
|
475 |
model_name: str,
|
476 |
log_level: str,
|
|
|
477 |
) -> None:
|
478 |
iface = gr.Interface(
|
479 |
fn=_run_query,
|
@@ -513,7 +512,7 @@ def launch_gradio(
|
|
513 |
description="Search the web with the query and summarize the results. Source code: https://github.com/pengfeng/ask.py",
|
514 |
)
|
515 |
|
516 |
-
iface.launch()
|
517 |
|
518 |
|
519 |
@click.command(help="Search web for the query and summarize the results")
|
@@ -586,7 +585,13 @@ def search_extract_summarize(
|
|
586 |
model_name: str,
|
587 |
log_level: str,
|
588 |
):
|
589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
launch_gradio(
|
591 |
query=query,
|
592 |
date_restrict=date_restrict,
|
@@ -596,6 +601,7 @@ def search_extract_summarize(
|
|
596 |
url_list_str=_read_url_list(url_list_file),
|
597 |
model_name=model_name,
|
598 |
log_level=log_level,
|
|
|
599 |
)
|
600 |
else:
|
601 |
if query is None:
|
|
|
410 |
) -> str:
|
411 |
logger = get_logger(log_level)
|
412 |
|
|
|
|
|
413 |
ask = Ask(logger=logger)
|
414 |
|
415 |
if url_list_str is None or url_list_str.strip() == "":
|
|
|
472 |
url_list_str: str,
|
473 |
model_name: str,
|
474 |
log_level: str,
|
475 |
+
share_ui: bool,
|
476 |
) -> None:
|
477 |
iface = gr.Interface(
|
478 |
fn=_run_query,
|
|
|
512 |
description="Search the web with the query and summarize the results. Source code: https://github.com/pengfeng/ask.py",
|
513 |
)
|
514 |
|
515 |
+
iface.launch(share=share_ui)
|
516 |
|
517 |
|
518 |
@click.command(help="Search web for the query and summarize the results")
|
|
|
585 |
model_name: str,
|
586 |
log_level: str,
|
587 |
):
|
588 |
+
load_dotenv(dotenv_path=default_env_file, override=False)
|
589 |
+
|
590 |
+
if web_ui or os.environ.get("RUN_GRADIO_UI", "false").lower() != "false":
|
591 |
+
if os.environ.get("SHARE_GRADIO_UI", "false").lower() == "true":
|
592 |
+
share_ui = True
|
593 |
+
else:
|
594 |
+
share_ui = False
|
595 |
launch_gradio(
|
596 |
query=query,
|
597 |
date_restrict=date_restrict,
|
|
|
601 |
url_list_str=_read_url_list(url_list_file),
|
602 |
model_name=model_name,
|
603 |
log_level=log_level,
|
604 |
+
share_ui=share_ui,
|
605 |
)
|
606 |
else:
|
607 |
if query is None:
|