from flask import Flask from flask import request import undetected_chromedriver as uc import re import os from threading import Timer # /start_browser # /stop_browser # /text?url=https://api.investing.com/api/financialdata/8849/historical/chart/?interval=PT1M&pointscount=60 # encoded: https%3A%2F%2Fapi.investing.com%2Fapi%2Ffinancialdata%2F8849%2Fhistorical%2Fchart%2F%3Finterval%3DPT1M%26pointscount%3D60 # /fetch?url=https://api.investing.com/api/financialdata/historical/1?start-date=2024-02-15&end-date=2029-02-15&time-frame=Daily&add-missing-rows=false # encoded: https%3A%2F%2Fapi.investing.com%2Fapi%2Ffinancialdata%2Fhistorical%2F1%3Fstart-date%3D2024-02-15%26end-date%3D2029-02-15%26time-frame%3DDaily%26add-missing-rows%3Dfalse driver = None XVFB_DISPLAY = None USER_AGENT = None CHROME_EXE_PATH = None timer = None app = Flask(__name__) def _start_xvfb_display(): global XVFB_DISPLAY if XVFB_DISPLAY is None: from xvfbwrapper import Xvfb XVFB_DISPLAY = Xvfb() XVFB_DISPLAY.start() def get_chrome_exe_path() -> str: global CHROME_EXE_PATH if CHROME_EXE_PATH is not None: return CHROME_EXE_PATH # linux pyinstaller bundle chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome") if os.path.exists(chrome_path): CHROME_EXE_PATH = chrome_path return CHROME_EXE_PATH # system CHROME_EXE_PATH = uc.find_chrome_executable() return CHROME_EXE_PATH def _start_browser(): global driver, USER_AGENT if driver != None: driver.quit() driver = None _start_xvfb_display() # https://github.com/FlareSolverr/FlareSolverr/blob/043f18b231b4f409080b2b5c4421ce0f4cac7dec/src/utils.py options = uc.ChromeOptions() options.add_argument('--no-sandbox') options.add_argument('--window-size=1920,1080') # todo: this param shows a warning in chrome head-full options.add_argument('--disable-setuid-sandbox') options.add_argument('--disable-dev-shm-usage') # this option removes the zygote sandbox (it seems that the resolution is a bit faster) options.add_argument('--no-zygote') # attempt to fix Docker ARM32 build options.add_argument('--disable-gpu-sandbox') options.add_argument('--disable-software-rasterizer') options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') # fix GL errors in ASUSTOR NAS # https://github.com/FlareSolverr/FlareSolverr/issues/782 # https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069 # https://peter.sh/experiments/chromium-command-line-switches/#use-gl options.add_argument('--use-gl=swiftshader') #options.add_argument("--headless=new") if USER_AGENT is not None: options.add_argument('--user-agent=%s' % USER_AGENT) language = os.environ.get('LANG', None) if language is not None: options.add_argument('--lang=%s' % language) # added by me options.add_argument(' --disable-web-security') # allow cross origin driver = uc.Chrome(options=options, headless=False, version_main=None, driver_executable_path="/app/chromedriver", browser_executable_path=get_chrome_exe_path()) if USER_AGENT is None: USER_AGENT = driver.execute_script("return navigator.userAgent") USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE) app.logger.info(USER_AGENT) driver.quit() #restart with user agent options = uc.ChromeOptions() options.add_argument('--no-sandbox') options.add_argument('--window-size=1920,1080') # todo: this param shows a warning in chrome head-full options.add_argument('--disable-setuid-sandbox') options.add_argument('--disable-dev-shm-usage') # this option removes the zygote sandbox (it seems that the resolution is a bit faster) options.add_argument('--no-zygote') # attempt to fix Docker ARM32 build options.add_argument('--disable-gpu-sandbox') options.add_argument('--disable-software-rasterizer') options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') # fix GL errors in ASUSTOR NAS # https://github.com/FlareSolverr/FlareSolverr/issues/782 # https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069 # https://peter.sh/experiments/chromium-command-line-switches/#use-gl options.add_argument('--use-gl=swiftshader') if USER_AGENT is not None: options.add_argument('--user-agent=%s' % USER_AGENT) language = os.environ.get('LANG', None) if language is not None: options.add_argument('--lang=%s' % language) # added by me options.add_argument(' --disable-web-security') # allow cross origin driver = uc.Chrome(options=options, headless=False, version_main=None, driver_executable_path="/app/chromedriver", browser_executable_path=get_chrome_exe_path()) _reset_stop_timer() app.logger.info("browser started") def _stop_browser(): global driver driver.quit() driver = None app.logger.info("browser stopped") def _reset_stop_timer(): global timer if timer is not None: timer.cancel() timer = Timer(5*60, _stop_browser) # _stop_browser executed in another thread... timer.start() @app.route("/") def hello_world(): return "

Hello, World!

" @app.route("/start_browser") def start_browser(): _start_browser() return "ok" @app.route("/close_browser") def stop_browser(): _stop_browser() return "ok" @app.route("/text") def text(): global driver if driver == None: _start_browser() else: _reset_stop_timer() url = request.args.get('url', '') driver.get(url) text = driver.page_source #driver.close() return text @app.route("/screenshot") def screenshot(): return "todo" @app.route("/evaluate") def evaluate(): return "todo" @app.route("/fetch") def fetch(): global driver if driver == None: _start_browser() else: _reset_stop_timer() url = request.args.get('url', '') #driver.get('https://example.com') driver.get('https://i-invdn-com.investing.com/redesign/images/seo/investing_300X300.png') script = """ var callback = arguments[arguments.length - 1]; // this is the callback to call when you are done (async function(){ try { let res = await fetch('%s', {headers:{'domain-id':'www'}}); let text = await res.text(); callback(text); } catch (e) { callback('error: ' + e); } })()""" % (url) result = driver.execute_async_script(script) return result if __name__ == '__main__': app.run(host="0.0.0.0", port=7860, debug=True)