Experimenting.
Browse files- entrypoint.sh +14 -6
entrypoint.sh
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
set -e
|
5 |
|
6 |
export SPACE="EnergyStarAI/launch-computation-example"
|
|
|
7 |
|
8 |
echo "Not checking h100 -- already know it's not there."
|
9 |
#python /check_h100.py
|
@@ -14,11 +15,15 @@ python /parse_requests.py | while read line; do
|
|
14 |
IFS="," read backend_model experiment_name <<< $(echo ${line})
|
15 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
16 |
now=$(date +%Y-%m-%d-%H-%M-%S)
|
17 |
-
export run_dir="./runs
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error-${now}.log
|
20 |
done || {
|
21 |
echo "Error."
|
|
|
22 |
# TODO: Although this works, `curl` appears to run indefinitely because it is recording itself (the logs are recording the curl operation.)
|
23 |
#echo "Using curl to retrieve the space run log."
|
24 |
#logs_name=./runs/logs-${now}.txt
|
@@ -26,14 +31,17 @@ done || {
|
|
26 |
#python /failed_run.py --run_dir $run_dir --model_name $backend_model --logs_name $logs_name
|
27 |
}
|
28 |
|
29 |
-
if [ -s $run_dir/error
|
30 |
# error.log is not-empty, an error was raised
|
31 |
echo "An error was raised while benchmarking the model..."
|
32 |
python /failed_run.py --run_dir $run_dir --model_name $backend_model
|
33 |
-
|
34 |
-
#
|
|
|
|
|
|
|
35 |
else
|
36 |
-
# The file is empty,
|
37 |
echo "Finished; uploading dataset results"
|
38 |
python /create_results.py ./runs
|
39 |
fi
|
|
|
4 |
set -e
|
5 |
|
6 |
export SPACE="EnergyStarAI/launch-computation-example"
|
7 |
+
failed=0
|
8 |
|
9 |
echo "Not checking h100 -- already know it's not there."
|
10 |
#python /check_h100.py
|
|
|
15 |
IFS="," read backend_model experiment_name <<< $(echo ${line})
|
16 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
17 |
now=$(date +%Y-%m-%d-%H-%M-%S)
|
18 |
+
export run_dir="./runs/${experiment_name}/${backend_model}/${now}"
|
19 |
+
mkdir -p $run_dir
|
20 |
+
|
21 |
+
# Let the benchmarking begin!
|
22 |
+
optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error.log
|
23 |
|
|
|
24 |
done || {
|
25 |
echo "Error."
|
26 |
+
failed=1
|
27 |
# TODO: Although this works, `curl` appears to run indefinitely because it is recording itself (the logs are recording the curl operation.)
|
28 |
#echo "Using curl to retrieve the space run log."
|
29 |
#logs_name=./runs/logs-${now}.txt
|
|
|
31 |
#python /failed_run.py --run_dir $run_dir --model_name $backend_model --logs_name $logs_name
|
32 |
}
|
33 |
|
34 |
+
if [ -s $run_dir/error.log ]; then
|
35 |
# error.log is not-empty, an error was raised
|
36 |
echo "An error was raised while benchmarking the model..."
|
37 |
python /failed_run.py --run_dir $run_dir --model_name $backend_model
|
38 |
+
# TODO: Is this necessary?
|
39 |
+
# Delete the current run directory so that it is not pushed by create_results.py later
|
40 |
+
rm -rf $run_dir
|
41 |
+
elif [ "$failed" -eq 1 ]; then
|
42 |
+
echo "Failed, but was not able to retrieve error log."
|
43 |
else
|
44 |
+
# The error log file is empty, and we didn't catch an error.
|
45 |
echo "Finished; uploading dataset results"
|
46 |
python /create_results.py ./runs
|
47 |
fi
|