linktimecloud commited on
Commit
4d73da2
·
verified ·
1 Parent(s): a228dd5

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. .env.tpl +5 -1
  2. README.md +19 -6
  3. ask.py +10 -4
.env.tpl CHANGED
@@ -3,4 +3,8 @@ SEARCH_API_KEY=your-google-search-api-key
3
  SEARCH_PROJECT_KEY=your-google-cx-key
4
 
5
  # right now we use OpenAI API
6
- LLM_API_KEY=your-openai-api-key
 
 
 
 
 
3
  SEARCH_PROJECT_KEY=your-google-cx-key
4
 
5
  # right now we use OpenAI API
6
+ LLM_API_KEY=your-openai-api-key
7
+
8
+ # Run and share Gradio UI
9
+ RUN_GRADIO_UI=Faslse
10
+ SHARE_GRADIO_UI=False
README.md CHANGED
@@ -11,6 +11,14 @@ sdk_version: 5.3.0
11
  A single Python program to implement the search-extract-summarize flow, similar to AI search
12
  engines such as Perplexity.
13
 
 
 
 
 
 
 
 
 
14
  > [!NOTE]
15
  > Our main goal is to illustrate the basic concepts of AI search engines with the raw constructs.
16
  > Performance or scalability is not in the scope of this program.
@@ -64,17 +72,17 @@ Usage: ask.py [OPTIONS]
64
  Search web for the query and summarize the results
65
 
66
  Options:
67
- -q, --query TEXT Query to search [required]
68
- --url-list TEXT Instead of doing web search, scrape the
69
- target URL list and answer the query based
70
- on the content [default:
71
- instructions/links.txt]
72
  -d, --date-restrict INTEGER Restrict search results to a specific date
73
  range, default is no restriction
74
  -s, --target-site TEXT Restrict search results to a specific site,
75
  default is no restriction
76
  --output-language TEXT Output language for the answer
77
  --output-length INTEGER Output length for the answer
 
 
 
78
  -m, --model-name TEXT Model name to use for inference
79
  -l, --log-level [DEBUG|INFO|WARNING|ERROR]
80
  Set the logging level [default: INFO]
@@ -87,7 +95,12 @@ Options:
87
  - [OpenAI API](https://beta.openai.com/docs/api-reference/completions/create)
88
  - [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/)
89
  - [bs4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
90
- - [duckdb](https://github.com/duckdb/duckdb)
 
 
 
 
 
91
 
92
  ## Sample output
93
 
 
11
  A single Python program to implement the search-extract-summarize flow, similar to AI search
12
  engines such as Perplexity.
13
 
14
+ > [UPDATE]
15
+ >
16
+ > - 2024-10-22: add GradIO integation
17
+ > - 2024-10-21: use DuckDB for the vector search and use API for embedding
18
+ > - 2024-10-20: allow to specify a list of input urls
19
+ > - 2024-10-18: output-language and output-length parameters for LLM
20
+ > - 2024-10-18: date-restrict and target-site parameters for seach
21
+
22
  > [!NOTE]
23
  > Our main goal is to illustrate the basic concepts of AI search engines with the raw constructs.
24
  > Performance or scalability is not in the scope of this program.
 
72
  Search web for the query and summarize the results
73
 
74
  Options:
75
+ --web-ui Launch the web interface
76
+ -q, --query TEXT Query to search
 
 
 
77
  -d, --date-restrict INTEGER Restrict search results to a specific date
78
  range, default is no restriction
79
  -s, --target-site TEXT Restrict search results to a specific site,
80
  default is no restriction
81
  --output-language TEXT Output language for the answer
82
  --output-length INTEGER Output length for the answer
83
+ --url-list-file TEXT Instead of doing web search, scrape the
84
+ target URL list and answer the query based
85
+ on the content
86
  -m, --model-name TEXT Model name to use for inference
87
  -l, --log-level [DEBUG|INFO|WARNING|ERROR]
88
  Set the logging level [default: INFO]
 
95
  - [OpenAI API](https://beta.openai.com/docs/api-reference/completions/create)
96
  - [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/)
97
  - [bs4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
98
+ - [DuckDB](https://github.com/duckdb/duckdb)
99
+ - [GradIO](https://grad.io)
100
+
101
+ ## Screenshot for the GradIO integration
102
+
103
+ ![image](https://github.com/user-attachments/assets/0483e6a2-75d7-4fbd-813f-bfa13839c836)
104
 
105
  ## Sample output
106
 
ask.py CHANGED
@@ -410,8 +410,6 @@ def _run_query(
410
  ) -> str:
411
  logger = get_logger(log_level)
412
 
413
- load_dotenv(dotenv_path=default_env_file, override=False)
414
-
415
  ask = Ask(logger=logger)
416
 
417
  if url_list_str is None or url_list_str.strip() == "":
@@ -474,6 +472,7 @@ def launch_gradio(
474
  url_list_str: str,
475
  model_name: str,
476
  log_level: str,
 
477
  ) -> None:
478
  iface = gr.Interface(
479
  fn=_run_query,
@@ -513,7 +512,7 @@ def launch_gradio(
513
  description="Search the web with the query and summarize the results. Source code: https://github.com/pengfeng/ask.py",
514
  )
515
 
516
- iface.launch()
517
 
518
 
519
  @click.command(help="Search web for the query and summarize the results")
@@ -586,7 +585,13 @@ def search_extract_summarize(
586
  model_name: str,
587
  log_level: str,
588
  ):
589
- if web_ui:
 
 
 
 
 
 
590
  launch_gradio(
591
  query=query,
592
  date_restrict=date_restrict,
@@ -596,6 +601,7 @@ def search_extract_summarize(
596
  url_list_str=_read_url_list(url_list_file),
597
  model_name=model_name,
598
  log_level=log_level,
 
599
  )
600
  else:
601
  if query is None:
 
410
  ) -> str:
411
  logger = get_logger(log_level)
412
 
 
 
413
  ask = Ask(logger=logger)
414
 
415
  if url_list_str is None or url_list_str.strip() == "":
 
472
  url_list_str: str,
473
  model_name: str,
474
  log_level: str,
475
+ share_ui: bool,
476
  ) -> None:
477
  iface = gr.Interface(
478
  fn=_run_query,
 
512
  description="Search the web with the query and summarize the results. Source code: https://github.com/pengfeng/ask.py",
513
  )
514
 
515
+ iface.launch(share=share_ui)
516
 
517
 
518
  @click.command(help="Search web for the query and summarize the results")
 
585
  model_name: str,
586
  log_level: str,
587
  ):
588
+ load_dotenv(dotenv_path=default_env_file, override=False)
589
+
590
+ if web_ui or os.environ.get("RUN_GRADIO_UI", "false").lower() != "false":
591
+ if os.environ.get("SHARE_GRADIO_UI", "false").lower() == "true":
592
+ share_ui = True
593
+ else:
594
+ share_ui = False
595
  launch_gradio(
596
  query=query,
597
  date_restrict=date_restrict,
 
601
  url_list_str=_read_url_list(url_list_file),
602
  model_name=model_name,
603
  log_level=log_level,
604
+ share_ui=share_ui,
605
  )
606
  else:
607
  if query is None: