apache · skrawcz · Jun 15, 2024 · Jun 14, 2024 · Jun 15, 2024
diff --git a/docs/concepts/function-modifiers.rst b/docs/concepts/function-modifiers.rst
@@ -142,8 +142,8 @@ The ``@check_output`` function modifiers are applied on the **node output / func
 
     In the future, validatation capabailities may be added to ``@schema``. For now, it's only added metadata.
 
-@check_output
-~~~~~~~~~~~~~
+@check_output*
+~~~~~~~~~~~~~~
 
 The ``@check_output`` implements many data checks for Python objects and DataFrame/Series including data type, min/max/between, count, fraction of null/nan values, and allow null/nan. Failed checks are either logged (``importance="warn"``) or make the dataflow fail (``importance="fail"``).
 
@@ -162,6 +162,7 @@ The next snippet checks if the returned Series is of type ``np.int32``, which is
 
 - To see all available validators, go to the file ``hamilton/data_quality/default_validators.py`` and view the variable ``AVAILABLE_DEFAULT_VALIDATORS``.
 - The function modifier ``@check_output_custom`` allows you to define your own validator. Validators inherit the ``base.BaseDefaultValidator`` class and are essentially standardized Hamilton node definitions (instead of functions). See ``hamilton/data_quality/default_validators.py`` or reach out on `Slack <https://join.slack.com/t/hamilton-opensource/shared_invite/zt-1bjs72asx-wcUTgH7q7QX1igiQ5bbdcg>`_ for help!
+- Note: ``@check_output_custom`` decorators cannot be stacked, but they instead can take multiple validators.
 
 .. note::
 

diff --git a/hamilton/function_modifiers/validation.py b/hamilton/function_modifiers/validation.py
@@ -1,4 +1,5 @@
 import abc
+from collections import defaultdict
 from typing import Any, Callable, Collection, Dict, List, Type
 
 from hamilton import node
@@ -38,13 +39,21 @@ def transform_node(
         validators = self.get_validators(node_)
         validator_nodes = []
         validator_name_map = {}
+        validator_name_count = defaultdict(int)
         for validator in validators:
 
             def validation_function(validator_to_call: dq_base.DataValidator = validator, **kwargs):
                 result = list(kwargs.values())[0]  # This should just have one kwarg
                 return validator_to_call.validate(result)
 
             validator_node_name = node_.name + "_" + validator.name()
+            validator_name_count[validator_node_name] = (
+                validator_name_count[validator_node_name] + 1
+            )
+            if validator_name_count[validator_node_name] > 1:
+                validator_node_name = (
+                    validator_node_name + "_" + str(validator_name_count[validator_node_name] - 1)
+                )
             validator_node = node.Node(
                 name=validator_node_name,  # TODO -- determine a good approach towards naming this
                 typ=dq_base.ValidationResult,
@@ -125,6 +134,10 @@ def __init__(self, *validators: dq_base.DataValidator, target_: base.TargetType
             4. **Collection[str]**: This will check all nodes specified in the list.
 
             In all likelihood, you *don't* want ``...``, but the others are useful.
+
+            Note: you cannot stack `@check_output_custom` decorators. If you want to use multiple custom validators, \
+            you should pass them all in as arguments to a single `@check_output_custom` decorator.
+
         """
         super(check_output_custom, self).__init__(target=target_)
         self.validators = list(validators)

diff --git a/tests/function_modifiers/test_validation.py b/tests/function_modifiers/test_validation.py
@@ -107,6 +107,58 @@ def fn(input: pd.Series) -> pd.Series:
     )
 
 
+def test_check_output_custom_node_transform_duplicate():
+    """You should be able to pass in the same validator twice; IRL it would be different args."""
+    decorator = check_output_custom(
+        SampleDataValidator2(dataset_length=1, importance="warn"),
+        SampleDataValidator2(dataset_length=1, importance="warn"),
+    )
+
+    def fn(input: pd.Series) -> pd.Series:
+        return input
+
+    node_ = node.Node.from_fn(fn)
+    subdag = decorator.transform_node(node_, config={}, fn=fn)
+    assert 4 == len(subdag)
+    subdag_as_dict = {node_.name: node_ for node_ in subdag}
+    assert sorted(subdag_as_dict.keys()) == [
+        "fn",
+        "fn_dummy_data_validator_2",
+        "fn_dummy_data_validator_2_1",
+        "fn_raw",
+    ]
+    # TODO -- change when we change the naming scheme
+    assert subdag_as_dict["fn_raw"].input_types["input"][1] == DependencyType.REQUIRED
+    assert 3 == len(
+        subdag_as_dict["fn"].input_types
+    )  # Three dependencies -- the two with DQ + the original
+    data_validators = [
+        value
+        for value in subdag_as_dict.values()
+        if value.tags.get("hamilton.data_quality.contains_dq_results", False)
+    ]
+    assert len(data_validators) == 2  # One for each validator
+    first_validator, _ = data_validators
+    assert (
+        IS_DATA_VALIDATOR_TAG in first_validator.tags
+        and first_validator.tags[IS_DATA_VALIDATOR_TAG] is True
+    )  # Validates that all the required tags are included
+    assert (
+        DATA_VALIDATOR_ORIGINAL_OUTPUT_TAG in first_validator.tags
+        and first_validator.tags[DATA_VALIDATOR_ORIGINAL_OUTPUT_TAG] == "fn"
+    )
+
+    # The final function should take in everything but only use the raw results
+    assert (
+        subdag_as_dict["fn"].callable(
+            fn_raw="test",
+            fn_dummy_data_validator_2=ValidationResult(True, "", {}),
+            fn_dummy_data_validator_2_1=ValidationResult(True, "", {}),
+        )
+        == "test"
+    )
+
+
 def test_check_output_custom_node_transform_raises_exception_with_failure():
     decorator = check_output_custom(
         SampleDataValidator2(dataset_length=1, importance="fail"),