Conversation
|
|
||
| # pylint: disable=super-init-not-called | ||
| def __init__(self, experiment, executor=None, heartbeat=None): | ||
| # Do not call super here; we do not want to instantiate the producer |
There was a problem hiding this comment.
Maybe the producer creation in base ExperimentClient should be moved to ExperimentClient.create_experiment?
| self.storage_client = None | ||
| self.plot = PlotAccessor(self) | ||
|
|
||
| def to_pandas(self, with_evc_tree=False): |
There was a problem hiding this comment.
Why would it not be supported? It's just a read operation.
| # Storage REST API | ||
| # | ||
|
|
||
| def insert(self, params, results=None, reserve=False): |
There was a problem hiding this comment.
It's not great that insert behaves differently. I did not look at the workon module but perhaps that is where insert should be, as making insert(params, results=None, reserve=True) is a use-case for working on a trial.
There was a problem hiding this comment.
Isn't the algo running on the server side generating the parameters ?
Workon is only fetching+reserving the trials.
The only use case for inserting from the API is if a user wants to manually add a trial to be worked on by which ever worker is available ?
|
|
||
| def release(self, trial, status="interrupted"): | ||
| """See `~ExperimentClient.release`""" | ||
| # we probably should not expose this |
There was a problem hiding this comment.
Perhaps, but there should be a way to allow interupting a trial and so far release is the only way using the python API.
|
|
||
|
|
||
| @dataclass | ||
| class RemoteTrial: |
There was a problem hiding this comment.
Maybe this should be turned into a standard dataclass for trials exposed to users. The current version misses some attributes such as the objectives, constraints and stats.
| self.experiment = RemoteExperiment(**payload) | ||
| return self.experiment | ||
|
|
||
| def suggest(self, pool_size: int = 1, experiment_name=None) -> TrialCM: |
There was a problem hiding this comment.
Why should it support passing the experiment name as an argument? Shouldn't the WorkonClientREST be used only together with an existing experiment? After all it's used to replace some_existing_experiment.workon.
| ) | ||
|
|
||
| trials = [] | ||
| for trial in result["trials"]: |
There was a problem hiding this comment.
Why take the time to convert them all if only the first one is returned?
There was a problem hiding this comment.
Only one is ever returned, because that what ExperimentClient does.
But I think that for it might be useful to return more in the case of running a group of worker per machine.
| if not isinstance(results, list): | ||
| raise TypeError("Results need to be a float or a list of values") | ||
|
|
||
| self._post( |
There was a problem hiding this comment.
What happens if an exception occurs remotely during observe. Will this _post raise a RemoteException?
There was a problem hiding this comment.
If on the server the ExperimentClient raises an exception, the server convert the exception as a json message.
The message will be read by the client
- if the exception is part of the allowed exception set, then the exact the same exception that was raised by the ExperimentClient remotely is raised locally
- if the exception is not allowed (i.e internal server error) a RemoteException is rased
The setup allow us to reuse the same tests for ExperimentClient and ExperimentClientREST
and provide usage consistency throughout the clients
| failed = True | ||
| raise e | ||
| finally: | ||
| pacemaker = self._pacemakers.pop(trial.id, None) |
There was a problem hiding this comment.
Looks like this class could have something like ExperimentClient._release_reservation. Similar code is used in observe below.
| payload = self._post( | ||
| "is_done", experiment_name=self.experiment_name, euid=self.experiment_id | ||
| ) | ||
| return payload.get("is_done", True) |
There was a problem hiding this comment.
What could cause the payload to not contain is_done? Would these situations mean that the experiment is indeed done?
There was a problem hiding this comment.
that is just defensive programing
| database_instance=db, | ||
| # Skip setup, this is a shared database | ||
| # we might not have the permissions to do the setup | ||
| # and the setup should already be done anyway |
There was a problem hiding this comment.
When would the setup be done? We would need a script or command to do this when we set in place a shared database.
There was a problem hiding this comment.
Setup is done by IT when the database is created, setup.sh has a template of the code that could be executed to setup the database
|
|
||
| service: ServiceContext # Service config | ||
| username: str # Populated on authentication | ||
| password: str |
There was a problem hiding this comment.
We will need to find a way to authenticate without passing passwords in clear through the REST API.
There was a problem hiding this comment.
It is not, the password is looked up internally by the authentication layer using the API token
| ) | ||
| ) | ||
|
|
||
| def suggest(self, request: RequestContext) -> Dict: |
There was a problem hiding this comment.
This should handle the kind of exceptions than may be raised by suggest: https://github.com/Epistimio/orion/blob/develop/src/orion/client/experiment.py#L550-L565
There was a problem hiding this comment.
They are handled at the top level by the server
| # pylint: disable=protected-access | ||
| trial = storage.get_trial(uid=trial_hash, experiment_uid=client._experiment.id) | ||
|
|
||
| assert trial is not None, "Trial not found" |
There was a problem hiding this comment.
if: raise seems more appropriate here.
There was a problem hiding this comment.
IMO, that error should not occur in normal execution, that is why I put it as an assert.
Standard orion exception gets handled at the top level by the service itself, to provide a uniform RemoteException across the service
| trial = storage.get_trial(uid=trial_hash, experiment_uid=client._experiment.id) | ||
|
|
||
| assert trial is not None, "Trial not found" | ||
| client.observe(trial, results) |
There was a problem hiding this comment.
There are a few possible errors can could occur here: https://github.com/Epistimio/orion/blob/develop/src/orion/client/experiment.py#L628-L636.
There was a problem hiding this comment.
They are handled at the top level by the server
| trial_id = request.data.get("trial_id") | ||
|
|
||
| # pylint: disable=protected-access | ||
| results = storage._db._db["trials"].update_one( |
There was a problem hiding this comment.
Why not use storage.heartbeat? Is it because of the username? Shouldn't it be handled by this PR's modifications in MongoDB?
| if trial is None: | ||
| raise ValueError(f"Trial {trial_hash} does not exist in database.") | ||
|
|
||
| client.release(trial, status) |
There was a problem hiding this comment.
Sorry, is there a mechanism that handles automatically the error and return them in a json format? I guess I missed it. There are also many potential errors that could occur here: https://github.com/Epistimio/orion/blob/develop/src/orion/client/experiment.py#L492-L499.
There was a problem hiding this comment.
Yes, it is in the server, all the exceptions are handled. In fact they have to be because the RemoteClient is tested by the regular ExperimentClient unit tests to make sure the coverage is the same and avoid duplicating the tests
| return dict(status=0, result=dict(trials=[trial])) | ||
|
|
||
| def resume_from_experiment(self, experiment_name): | ||
| """Resume an experiment, create a new ExperimentClient and assign it to a worker""" |
Release 0.2.7rc
…to_dev Merge back master in develop after release
630c99d to
f7f766e
Compare
| pass | ||
|
|
||
|
|
||
| def main( |
There was a problem hiding this comment.
Why not add this in orion.core.cli?
| @@ -0,0 +1,140 @@ | |||
| """Integration testing between the REST service and the client""" | |||
There was a problem hiding this comment.
Could be good to add this under orion.testing. Perhaps in its own submodule there.
No description provided.