Using the cluster_main
Decorator¶
This is an example how to use the cluster_main()
decorator.
import os
import time
import numpy as np
from cluster_utils import cluster_main, exit_for_resume
def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
"""
A dummy function to test hpo.
:param u: real variable
:param v: integer variable living on logscale
:param w: integer variable
:param x: real variable
:param y: real variable living on log-scale
:param sharp_penalty: discrete variable
:param tuple_input: a tuple (we only use its length here)
:return: result of some random computation
"""
tuple_input = tuple_input or tuple()
tuple_len = len(tuple_input)
y_log = np.log(np.abs(y + 1e-7))
v_log = np.log(np.abs(v + 1e-7))
assert isinstance(w, int), "w has to be integer"
assert isinstance(v, int), "v has to be integer"
result = (
(x - 3.14) ** 2
+ (y_log - 2.78) ** 2
+ (u * v_log * w + 1) ** 2
+ (u + v_log + w - 5 + tuple_len) ** 2
)
if sharp_penalty and x > 3.20:
result += 1
if np.random.rand() < 0.1:
raise ValueError("10 percent of all jobs die here on purpose")
return result
@cluster_main
def main(working_dir, id, **kwargs): # noqa A002
# All parameters in grid_search.json (fixed parameters and the ones
# searched over) are passed to main.py as arguments, here caught in
# `**kwargs`.
# Could have written `main(working_dir, id, fn_args, test_resume, ...)`
fn_args = kwargs["fn_args"]
test_resume = kwargs["test_resume"]
# simulate that the jobs take some time
time.sleep(np.random.randint(0, 10))
result_file = os.path.join(working_dir, "result.npy")
# here we do a little simulation for checkpointing and resuming
if os.path.isfile(result_file):
# If there is a result to resume
noiseless_result = np.load(result_file)
else:
# Otherwise compute result, checkpoint it and exit
noiseless_result = fn_to_optimize(**fn_args)
print(f"save result to {result_file}")
np.save(result_file, noiseless_result)
if test_resume:
exit_for_resume()
noisy_result = noiseless_result + 0.5 * np.random.normal()
metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
return metrics
if __name__ == "__main__":
main()
The corresponding cluster_utils config file:
{
"__import__": "examples/basic/grid_search.json",
"optimization_procedure_name": "decorator_test",
"results_dir": "/is/cluster/work/{$USER}/tmp",
"script_relative_path": "examples/basic/main_with_decorator.py"
}
Note
This example is included in cluster_utils/examples/basic
and can be
directly run from there.