arabellastrange commited on
Commit
60693bb
·
1 Parent(s): 3f060b2

still trying to fix the chrome

Browse files
Files changed (2) hide show
  1. requirements.txt +3 -2
  2. web_search.py +4 -1
requirements.txt CHANGED
@@ -11,5 +11,6 @@ llama-index-readers-file
11
  selenium
12
  unstructured
13
  requests
14
- chromedriver-autoinstaller
15
- chromium
 
 
11
  selenium
12
  unstructured
13
  requests
14
+ chromium
15
+ chromedriver
16
+ chromedriver-py
web_search.py CHANGED
@@ -12,6 +12,7 @@ import selenium.common.exceptions
12
  from selenium import webdriver
13
  from selenium.webdriver.chrome.options import Options
14
  from unstructured.partition.html import partition_html
 
15
 
16
  from llmsearch import site_stats
17
  # this import style works in pycharm
@@ -68,8 +69,10 @@ def process_url(url, timeout):
68
  options = Options()
69
  options.page_load_strategy = "eager"
70
  options.add_argument("--headless")
 
71
  result = ""
72
- with webdriver.Firefox(options=options) as dr:
 
73
  logger.info(f"*****setting page load timeout {timeout}")
74
  dr.set_page_load_timeout(timeout)
75
  try:
 
12
  from selenium import webdriver
13
  from selenium.webdriver.chrome.options import Options
14
  from unstructured.partition.html import partition_html
15
+ from chromedriver_py import binary_path
16
 
17
  from llmsearch import site_stats
18
  # this import style works in pycharm
 
69
  options = Options()
70
  options.page_load_strategy = "eager"
71
  options.add_argument("--headless")
72
+ options.add_argument("--no-sandbox")
73
  result = ""
74
+ svc = webdriver.ChromeService(executable_path=binary_path)
75
+ with webdriver.Chrome(options=options, service=svc) as dr:
76
  logger.info(f"*****setting page load timeout {timeout}")
77
  dr.set_page_load_timeout(timeout)
78
  try: