clefourrier HF staff derek-thomas HF staff commited on
Commit
8b88d2c
1 Parent(s): 1b74edb

adding_log_visualizer (#1)

Browse files

- Adding logging visualizer (a130fc382412b1c3f664e294da1aeb43ed30fa00)
- Keeping default of checking every 10 min (ccb39b1f08fe0f04412d9e0401a1dc556b149fb0)
- Increasing log lines to visualize (ef89b19123267a7694493f28cd25a8ac8ed4942b)
- Adding better console handling (d800d714b0f2ae12ffaa23c053413f1951123238)


Co-authored-by: Derek Thomas <derek-thomas@users.noreply.huggingface.co>

.gitignore CHANGED
@@ -5,6 +5,7 @@ __pycache__/
5
  .ipynb_checkpoints
6
  *ipynb
7
  .vscode/
 
8
 
9
  eval-queue/
10
  eval-results/
 
5
  .ipynb_checkpoints
6
  *ipynb
7
  .vscode/
8
+ .idea/
9
 
10
  eval-queue/
11
  eval-results/
app.py CHANGED
@@ -1,27 +1,26 @@
1
- import sys
2
  import logging
3
- import subprocess
4
- import gradio as gr
5
- from apscheduler.schedulers.background import BackgroundScheduler
6
 
7
- logging.basicConfig(level=logging.ERROR)
 
 
 
 
8
 
9
- from src.logging import LOGGER, read_logs
10
 
11
- sys.stdout = LOGGER
12
- sys.stderr = LOGGER
13
 
14
- #subprocess.run(["python", "scripts/fix_harness_import.py"])
 
 
 
 
15
 
16
- def launch_backend():
17
- _ = subprocess.run(["python", "main_backend_lighteval.py"])
 
 
 
18
 
19
- demo = gr.Blocks()
20
- with demo:
21
- logs = gr.Code(interactive=False)
22
- demo.load(read_logs, None, logs, every=1)
23
-
24
- scheduler = BackgroundScheduler()
25
- scheduler.add_job(launch_backend, "interval", seconds=60) # will only allow one job to be run at the same time
26
- scheduler.start()
27
- demo.queue(default_concurrency_limit=40).launch()
 
 
1
  import logging
2
+ import sys
 
 
3
 
4
+ import gradio as gr
5
+ from main_backend_lighteval import run_auto_eval
6
+ from src.display.log_visualizer import log_file_to_html_string
7
+ from src.display.css_html_js import dark_mode_gradio_js
8
+ from src.envs import REFRESH_RATE
9
 
10
+ logging.basicConfig(level=logging.INFO)
11
 
 
 
12
 
13
+ intro_md = f"""
14
+ # Intro
15
+ This is just a visual for the auto evaluator. Note that the lines of the log visual are reversed.
16
+ # Logs
17
+ """
18
 
19
+ with gr.Blocks(js=dark_mode_gradio_js) as demo:
20
+ with gr.Tab("Application"):
21
+ gr.Markdown(intro_md)
22
+ dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
23
+ output = gr.HTML(log_file_to_html_string, every=10)
24
 
25
+ if __name__ == '__main__':
26
+ demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
 
 
 
 
 
 
 
main_backend_harness.py CHANGED
@@ -11,9 +11,11 @@ from src.backend.sort_queue import sort_models_by_priority
11
 
12
  from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT, TOKEN
13
  from src.about import Tasks, NUM_FEWSHOT
 
14
  TASKS_HARNESS = [task.value.benchmark for task in Tasks]
15
 
16
- logging.basicConfig(level=logging.ERROR)
 
17
  pp = pprint.PrettyPrinter(width=80)
18
 
19
  PENDING_STATUS = "PENDING"
@@ -51,7 +53,7 @@ def run_auto_eval():
51
  return
52
 
53
  eval_request = eval_requests[0]
54
- pp.pprint(eval_request)
55
 
56
  set_eval_request(
57
  api=API,
 
11
 
12
  from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT, TOKEN
13
  from src.about import Tasks, NUM_FEWSHOT
14
+ from src.logging import setup_logger
15
  TASKS_HARNESS = [task.value.benchmark for task in Tasks]
16
 
17
+ # logging.basicConfig(level=logging.ERROR)
18
+ logger = setup_logger(__name__)
19
  pp = pprint.PrettyPrinter(width=80)
20
 
21
  PENDING_STATUS = "PENDING"
 
53
  return
54
 
55
  eval_request = eval_requests[0]
56
+ logger.info(pp.pformat(eval_request))
57
 
58
  set_eval_request(
59
  api=API,
main_backend_lighteval.py CHANGED
@@ -11,8 +11,11 @@ from src.backend.sort_queue import sort_models_by_priority
11
 
12
  from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API, LIMIT, TOKEN, ACCELERATOR, VENDOR, REGION
13
  from src.about import TASKS_LIGHTEVAL
 
14
 
15
- logging.basicConfig(level=logging.ERROR)
 
 
16
  pp = pprint.PrettyPrinter(width=80)
17
 
18
  PENDING_STATUS = "PENDING"
@@ -44,13 +47,14 @@ def run_auto_eval():
44
  # Sort the evals by priority (first submitted first run)
45
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
46
 
47
- print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
48
 
49
  if len(eval_requests) == 0:
50
  return
51
 
52
  eval_request = eval_requests[0]
53
- pp.pprint(eval_request)
 
54
 
55
  set_eval_request(
56
  api=API,
 
11
 
12
  from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API, LIMIT, TOKEN, ACCELERATOR, VENDOR, REGION
13
  from src.about import TASKS_LIGHTEVAL
14
+ from src.logging import setup_logger
15
 
16
+ logger = setup_logger(__name__)
17
+
18
+ # logging.basicConfig(level=logging.ERROR)
19
  pp = pprint.PrettyPrinter(width=80)
20
 
21
  PENDING_STATUS = "PENDING"
 
47
  # Sort the evals by priority (first submitted first run)
48
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
49
 
50
+ logger.info(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
51
 
52
  if len(eval_requests) == 0:
53
  return
54
 
55
  eval_request = eval_requests[0]
56
+ logger.info(pp.pformat(eval_request))
57
+
58
 
59
  set_eval_request(
60
  api=API,
requirements.txt CHANGED
@@ -16,4 +16,9 @@ tokenizers>=0.15.0
16
  git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval
17
  git+https://github.com/huggingface/lighteval.git#egg=lighteval
18
  accelerate==0.24.1
19
- sentencepiece
 
 
 
 
 
 
16
  git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval
17
  git+https://github.com/huggingface/lighteval.git#egg=lighteval
18
  accelerate==0.24.1
19
+ sentencepiece
20
+
21
+ # Log Visualizer
22
+ beautifulsoup4==4.12.2
23
+ lxml==4.9.3
24
+ rich==13.3.4
src/backend/manage_requests.py CHANGED
@@ -5,6 +5,9 @@ from typing import Optional
5
 
6
  from huggingface_hub import HfApi, snapshot_download
7
  from src.envs import TOKEN
 
 
 
8
 
9
  @dataclass
10
  class EvalRequest:
@@ -103,20 +106,20 @@ def check_completed_evals(
103
 
104
  for eval_request in running_evals:
105
  model = eval_request.model
106
- print("====================================")
107
- print(f"Checking {model}")
108
 
109
  output_path = model
110
  output_file = f"{local_dir_results}/{output_path}/results*.json"
111
  output_file_exists = len(glob.glob(output_file)) > 0
112
 
113
  if output_file_exists:
114
- print(
115
  f"EXISTS output file exists for {model} setting it to {completed_status}"
116
  )
117
  set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
118
  else:
119
- print(
120
  f"No result file found for {model} setting it to {failed_status}"
121
  )
122
  set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)
 
5
 
6
  from huggingface_hub import HfApi, snapshot_download
7
  from src.envs import TOKEN
8
+ from src.logging import setup_logger
9
+
10
+ logger = setup_logger(__name__)
11
 
12
  @dataclass
13
  class EvalRequest:
 
106
 
107
  for eval_request in running_evals:
108
  model = eval_request.model
109
+ logger.info("====================================")
110
+ logger.info(f"Checking {model}")
111
 
112
  output_path = model
113
  output_file = f"{local_dir_results}/{output_path}/results*.json"
114
  output_file_exists = len(glob.glob(output_file)) > 0
115
 
116
  if output_file_exists:
117
+ logger.info(
118
  f"EXISTS output file exists for {model} setting it to {completed_status}"
119
  )
120
  set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
121
  else:
122
+ logger.info(
123
  f"No result file found for {model} setting it to {failed_status}"
124
  )
125
  set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)
src/backend/run_eval_suite_harness.py CHANGED
@@ -7,18 +7,20 @@ from lm_eval import tasks, evaluator, utils
7
 
8
  from src.envs import RESULTS_REPO, API
9
  from src.backend.manage_requests import EvalRequest
 
10
 
11
  logging.getLogger("openai").setLevel(logging.WARNING)
 
12
 
13
  def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
14
  if limit:
15
- print(
16
  "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
17
  )
18
 
19
  task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
20
 
21
- print(f"Selected Tasks: {task_names}")
22
 
23
  results = evaluator.simple_evaluate(
24
  model="hf-causal-experimental", # "hf-causal"
@@ -38,14 +40,14 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
38
  results["config"]["model_sha"] = eval_request.revision
39
 
40
  dumped = json.dumps(results, indent=2)
41
- print(dumped)
42
 
43
  output_path = os.path.join(local_dir, *eval_request.model.split("/"), f"results_{datetime.now()}.json")
44
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
45
  with open(output_path, "w") as f:
46
  f.write(dumped)
47
 
48
- print(evaluator.make_table(results))
49
 
50
  API.upload_file(
51
  path_or_fileobj=output_path,
 
7
 
8
  from src.envs import RESULTS_REPO, API
9
  from src.backend.manage_requests import EvalRequest
10
+ from src.logging import setup_logger
11
 
12
  logging.getLogger("openai").setLevel(logging.WARNING)
13
+ logger = setup_logger(__name__)
14
 
15
  def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
16
  if limit:
17
+ logger.info(
18
  "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
19
  )
20
 
21
  task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
22
 
23
+ logger.info(f"Selected Tasks: {task_names}")
24
 
25
  results = evaluator.simple_evaluate(
26
  model="hf-causal-experimental", # "hf-causal"
 
40
  results["config"]["model_sha"] = eval_request.revision
41
 
42
  dumped = json.dumps(results, indent=2)
43
+ logger.info(dumped)
44
 
45
  output_path = os.path.join(local_dir, *eval_request.model.split("/"), f"results_{datetime.now()}.json")
46
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
47
  with open(output_path, "w") as f:
48
  f.write(dumped)
49
 
50
+ logger.info(evaluator.make_table(results))
51
 
52
  API.upload_file(
53
  path_or_fileobj=output_path,
src/backend/run_eval_suite_lighteval.py CHANGED
@@ -7,12 +7,14 @@ from lighteval.main_accelerate import main, EnvConfig, create_model_config, load
7
 
8
  from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN
9
  from src.backend.manage_requests import EvalRequest
 
10
 
11
  logging.getLogger("openai").setLevel(logging.WARNING)
 
12
 
13
  def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int, local_dir: str, accelerator: str, region: str, vendor: str, instance_size: str, instance_type: str, limit=None):
14
  if limit:
15
- print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
16
 
17
  args = {
18
  "endpoint_model_name":f"{eval_request.model}_{eval_request.precision}".lower(),
@@ -43,7 +45,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
43
  results["config"]["model_sha"] = eval_request.revision
44
 
45
  dumped = json.dumps(results, indent=2)
46
- print(dumped)
47
  except Exception: # if eval failed, we force a cleanup
48
  env_config = EnvConfig(token=TOKEN, cache_dir=args['cache_dir'])
49
 
 
7
 
8
  from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN
9
  from src.backend.manage_requests import EvalRequest
10
+ from src.logging import setup_logger
11
 
12
  logging.getLogger("openai").setLevel(logging.WARNING)
13
+ logger = setup_logger(__name__)
14
 
15
  def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int, local_dir: str, accelerator: str, region: str, vendor: str, instance_size: str, instance_type: str, limit=None):
16
  if limit:
17
+ logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
18
 
19
  args = {
20
  "endpoint_model_name":f"{eval_request.model}_{eval_request.precision}".lower(),
 
45
  results["config"]["model_sha"] = eval_request.revision
46
 
47
  dumped = json.dumps(results, indent=2)
48
+ logger.info(dumped)
49
  except Exception: # if eval failed, we force a cleanup
50
  env_config = EnvConfig(token=TOKEN, cache_dir=args['cache_dir'])
51
 
src/display/css_html_js.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ style_content = """
2
+ pre, code {
3
+ background-color: #272822;
4
+ }
5
+ .scrollable {
6
+ font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
7
+ height: 500px;
8
+ overflow: auto;
9
+ }
10
+ """
11
+ dark_mode_gradio_js = """
12
+ function refresh() {
13
+ const url = new URL(window.location);
14
+
15
+ if (url.searchParams.get('__theme') !== 'dark') {
16
+ url.searchParams.set('__theme', 'dark');
17
+ window.location.href = url.href;
18
+ }
19
+ }
20
+ """
src/display/log_visualizer.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import StringIO
2
+ from pathlib import Path
3
+
4
+ from bs4 import BeautifulSoup
5
+ from rich.console import Console
6
+ from rich.syntax import Syntax
7
+
8
+ from src.display.css_html_js import style_content
9
+ from src.envs import NUM_LINES_VISUALIZE
10
+ from src.logging import log_file
11
+
12
+ proj_dir = Path(__name__).parent
13
+
14
+
15
+ def log_file_to_html_string():
16
+ with open(log_file, "rt") as f:
17
+ # Seek to the end of the file minus 300 lines
18
+ # Read the last 300 lines of the file
19
+ lines = f.readlines()
20
+ lines = lines[-NUM_LINES_VISUALIZE:]
21
+
22
+ # Syntax-highlight the last 300 lines of the file using the Python lexer and Monokai style
23
+ output = "".join(reversed(lines))
24
+ syntax = Syntax(output, "python", theme="monokai", word_wrap=True)
25
+
26
+ console = Console(record=True, width=150, style="#272822", file=StringIO())
27
+ console.print(syntax)
28
+ html_content = console.export_html(inline_styles=True)
29
+
30
+ # Parse the HTML content using BeautifulSoup
31
+ soup = BeautifulSoup(html_content, 'lxml')
32
+
33
+ # Modify the <pre> tag
34
+ pre_tag = soup.pre
35
+ pre_tag['class'] = 'scrollable'
36
+ del pre_tag['style']
37
+
38
+ # Add your custom styles and the .scrollable CSS to the <style> tag
39
+ style_tag = soup.style
40
+ style_tag.append(style_content)
41
+
42
+ return soup.prettify()
src/envs.py CHANGED
@@ -31,5 +31,8 @@ EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
31
  EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
32
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
33
 
 
 
 
34
  API = HfApi(token=TOKEN)
35
 
 
31
  EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
32
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
33
 
34
+ REFRESH_RATE = 10 * 60 # 10 min
35
+ NUM_LINES_VISUALIZE = 300
36
+
37
  API = HfApi(token=TOKEN)
38
 
src/logging.py CHANGED
@@ -1,32 +1,54 @@
1
  import sys
2
- from src.envs import API
3
-
4
- class Logger:
5
- def __init__(self, filename):
6
- self.terminal = sys.stdout
7
- self.log = open(filename, "a+")
8
-
9
- def write(self, message):
10
- self.terminal.write(message)
11
- self.log.write(message)
12
-
13
- def flush(self):
14
- self.terminal.flush()
15
- self.log.flush()
16
-
17
- def isatty(self):
18
- return False
19
-
20
- def read_logs():
21
- sys.stdout.flush()
22
- #API.upload_file(
23
- # path_or_fileobj="output.log",
24
- # path_in_repo="demo-backend.log",
25
- # repo_id="demo-leaderboard-backend/logs",
26
- # repo_type="dataset",
27
- #)
28
-
29
- with open("output.log", "r") as f:
30
- return f.read()
31
-
32
- LOGGER = Logger("output.log")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import sys
2
+ from pathlib import Path
3
+
4
+ proj_dir = Path(__file__).parents[1]
5
+
6
+ log_file = proj_dir/"output.log"
7
+
8
+
9
+ import logging
10
+
11
+
12
+ def setup_logger(name: str):
13
+ logger = logging.getLogger(name)
14
+ logger.setLevel(logging.INFO)
15
+
16
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
17
+
18
+ # Create a file handler to write logs to a file
19
+ file_handler = logging.FileHandler(log_file)
20
+ file_handler.setLevel(logging.INFO)
21
+ file_handler.setFormatter(formatter)
22
+ logger.addHandler(file_handler)
23
+
24
+ return logger
25
+
26
+ # class Logger:
27
+ # def __init__(self):
28
+ # self.terminal = sys.stdout
29
+ # self.log = open(log_file, "a+")
30
+ #
31
+ # def write(self, message):
32
+ # self.terminal.write(message)
33
+ # self.log.write(message)
34
+ #
35
+ # def flush(self):
36
+ # self.terminal.flush()
37
+ # self.log.flush()
38
+ #
39
+ # def isatty(self):
40
+ # return False
41
+ #
42
+ # def read_logs():
43
+ # sys.stdout.flush()
44
+ # #API.upload_file(
45
+ # # path_or_fileobj="output.log",
46
+ # # path_in_repo="demo-backend.log",
47
+ # # repo_id="demo-leaderboard-backend/logs",
48
+ # # repo_type="dataset",
49
+ # #)
50
+ #
51
+ # with open(log_file, "r") as f:
52
+ # return f.read()
53
+ #
54
+ # LOGGER = Logger()