fix bug that mistakenly checks for positive ndts on missing data#782
fix bug that mistakenly checks for positive ndts on missing data#782AlexanderFengler merged 20 commits intomainfrom
Conversation
cpaniaguam
left a comment
There was a problem hiding this comment.
Just a few observations to consider for readability and maintainability:
- Possibly refactor some conditionals and logic for clarity
- Could be worth adjusting logging to defer string formatting
src/hssm/distribution_utils/dist.py
Outdated
| if params_is_reg is not None: | ||
| params_only = True if (not any(params_is_reg)) else False | ||
| else: | ||
| params_only = False |
There was a problem hiding this comment.
| if params_is_reg is not None: | |
| params_only = True if (not any(params_is_reg)) else False | |
| else: | |
| params_only = False | |
| params_only = params_is_reg is not None and not any(params_is_reg) |
| if missing_data and not deadline: | ||
| network = MissingDataNetwork.CPN | ||
| elif not missing_data and deadline: | ||
| elif missing_data and deadline: |
There was a problem hiding this comment.
There is a change in logic here. Is it intentional?
src/hssm/hssm.py
Outdated
| if self.model_config.backend != "pytensor": | ||
| missing_data_callable = make_missing_data_callable( | ||
| self.loglik_missing_data, "jax", params_is_reg, params_only | ||
| self.loglik_missing_data, "jax", params_is_reg, None | ||
| ) | ||
| else: | ||
| missing_data_callable = make_missing_data_callable( | ||
| self.loglik_missing_data, | ||
| self.model_config.backend, | ||
| params_is_reg, | ||
| None, |
There was a problem hiding this comment.
Why not determine the backend first and keep a single call to make_missing_data_callable? It could look like this:
backend = "jax" if self.model_config.backend != "pytensor" else self.model_config.backend
missing_data_callable = make_missing_data_callable(
self.loglik_missing_data,
backend,
params_is_reg,
None,
)| _logger.info( | ||
| f"Re-arranging data to put split missing and observed datapoints. " | ||
| f"Missing data (rt == {self.missing_data_value}) will be on top, " | ||
| f"observed datapoints follow." | ||
| ) |
There was a problem hiding this comment.
| _logger.info( | |
| f"Re-arranging data to put split missing and observed datapoints. " | |
| f"Missing data (rt == {self.missing_data_value}) will be on top, " | |
| f"observed datapoints follow." | |
| ) | |
| _logger.info( | |
| "Re-arranging data to put split missing and observed datapoints. " | |
| "Missing data (rt == %s) will be on top, observed datapoints follow.", | |
| self.missing_data_value | |
| ) |
digicosmos86
left a comment
There was a problem hiding this comment.
LGTM! I have forgotten what conversations we had about go-nogo and how to branch different use cases, but as far as this PR goes, this looks good
|
|
||
| @pytest.mark.slow | ||
| @pytest.mark.parametrize(parameter_names, parameter_grid) | ||
| @pytest.mark.parametrize(PARAMETER_NAMES, PARAMETER_GRID) |
There was a problem hiding this comment.
This is more a stylistic choice, but why we are using all caps for variables that are not constants?
There was a problem hiding this comment.
hm from my perspective these are global constants, defined up top.
weird case.
tests/test_data_sanity.py
Outdated
| match="You have no missing data in your dataset, " | ||
| + "which is not allowed when `missing_data` or `deadline` is set to " | ||
| + "True.", | ||
| match=r"Missing data is provided as True, " |
There was a problem hiding this comment.
Maybe use missing_data to signify that it is an argument?
| ) | ||
| with pytest.raises( | ||
| ValueError, | ||
| match="Missing data provided as False. \n" |
This fixes an issue where we mistakenly check for
ndt - rt > 0for missing data, which wrongly set the log likelihood to the lower bound in those cases.