meg-huggingface commited on
Commit
79fff16
1 Parent(s): c5729e2

Simplifying logic of handling failures & successes...it seemed like it was getting a bit ovelry complicated.

Browse files
Files changed (2) hide show
  1. create_results.py +11 -13
  2. entrypoint.sh +8 -4
create_results.py CHANGED
@@ -9,8 +9,10 @@ TOKEN = os.environ.get("DEBUG")
9
  api = HfApi(token=TOKEN)
10
 
11
  out_dir = sys.argv[1]
 
 
12
 
13
- # Uploading results
14
  api.upload_folder(
15
  folder_path=out_dir,
16
  repo_id="AIEnergyScore/results_debug",
@@ -22,19 +24,15 @@ requests = load_dataset("AIEnergyScore/requests_debug", split="test",
22
  token=TOKEN)
23
  requests_dset = requests.to_pandas()
24
 
25
- models_ran = []
26
- for f in os.scandir(out_dir):
27
- if f.is_dir():
28
- for s in os.scandir(f):
29
- if s.is_dir() and s.name not in ['hooks', 'info', 'objects', 'refs',
30
- 'logs']:
31
- for m in os.scandir(s):
32
- models_ran.append(s.name + '/' + m.name)
33
 
34
- print("Models ran are: " + str(models_ran))
35
-
36
- requests_dset.loc[
37
- requests_dset["model"].isin(models_ran), ['status']] = "COMPLETED"
38
  updated_dset = Dataset.from_pandas(requests_dset)
39
  updated_dset.push_to_hub("AIEnergyScore/requests_debug", split="test",
40
  token=TOKEN)
 
9
  api = HfApi(token=TOKEN)
10
 
11
  out_dir = sys.argv[1]
12
+ all_attempts_read = open("attempts.txt", "r+").readlines()
13
+ failed_attempts_read = open("failed_attempts.txt", "r+").readlines()
14
 
15
+ # Uploading output to the results dataset.
16
  api.upload_folder(
17
  folder_path=out_dir,
18
  repo_id="AIEnergyScore/results_debug",
 
24
  token=TOKEN)
25
  requests_dset = requests.to_pandas()
26
 
27
+ for line in all_attempts_read:
28
+ experiment_name, model = line.strip().split(',')
29
+ if line not in failed_attempts_read:
30
+ requests_dset.loc[
31
+ requests_dset["model"] == model, ['status']] = "COMPLETED"
32
+ else:
33
+ requests_dset.loc[
34
+ requests_dset["model"] == model, ['status']] = "FAILED"
35
 
 
 
 
 
36
  updated_dset = Dataset.from_pandas(requests_dset)
37
  updated_dset.push_to_hub("AIEnergyScore/requests_debug", split="test",
38
  token=TOKEN)
entrypoint.sh CHANGED
@@ -7,6 +7,8 @@ echo "Not checking h100 -- already know it's not there."
7
  echo "Attempting to run."
8
  #if [[ $? = 0 ]]; then
9
 
 
 
10
  # For each line in the requests dataset....
11
  python /parse_requests.py | while read -r line; do
12
  # Read the name of the model and the experiment.
@@ -17,16 +19,18 @@ python /parse_requests.py | while read -r line; do
17
  now=$(date +%Y-%m-%d-%H-%M-%S)
18
  run_dir="./runs/${experiment_name}/${backend_model}/${now}"
19
  mkdir -p "$run_dir"
 
20
 
21
  # Let the benchmarking begin!
22
- optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" || (python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir)
 
23
  done
24
 
25
- echo "Finished; uploading dataset results"
26
  python /create_results.py ./runs
27
 
28
- echo "Uploading all output from the /runs folder."
29
- python /upload_run_folder.py --run_dir "/runs"
30
 
31
  # Pausing space
32
  echo "Pausing space."
 
7
  echo "Attempting to run."
8
  #if [[ $? = 0 ]]; then
9
 
10
+ touch attempts.txt
11
+ touch failed_attempts.txt
12
  # For each line in the requests dataset....
13
  python /parse_requests.py | while read -r line; do
14
  # Read the name of the model and the experiment.
 
19
  now=$(date +%Y-%m-%d-%H-%M-%S)
20
  run_dir="./runs/${experiment_name}/${backend_model}/${now}"
21
  mkdir -p "$run_dir"
22
+ echo "${experiment_name},${backend_model}" >> attempts.txt
23
 
24
  # Let the benchmarking begin!
25
+ optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" ||
26
+ echo "${experiment_name},${backend_model}" >> failed_attempts.txt #(python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir)
27
  done
28
 
29
+ echo "Finished; updating requests dataset and results dataset."
30
  python /create_results.py ./runs
31
 
32
+ #echo "Uploading all output from the /runs folder."
33
+ #python /upload_run_folder.py --run_dir "/runs"
34
 
35
  # Pausing space
36
  echo "Pausing space."