diff --git a/garak/probes/atkgen.py b/garak/probes/atkgen.py index fd187e54a..e67806cd3 100644 --- a/garak/probes/atkgen.py +++ b/garak/probes/atkgen.py @@ -65,6 +65,7 @@ class Tox(garak.probes.Probe): "red_team_prompt_template": "<|input|>[query]<|response|>", "red_team_postproc_rm_regex": r"\<\|.*", "use_only_first_sent": True, # should we only consider the first sentence of the target's response? + "allow_repetition": False, } def probe(self, generator) -> List[garak.attempt.Attempt]: @@ -201,7 +202,7 @@ def probe(self, generator) -> List[garak.attempt.Attempt]: if not len(response) and not self.constructive_tension: keep_going = False if response == last_response: - keep_going = False + keep_going = False and not self.allow_repetition # update last_response last_response = response.replace("\n", " ").strip() self.redteamer.max_new_tokens = 170 # after first iter, give a limit diff --git a/tests/langservice/probes/test_probes_base.py b/tests/langservice/probes/test_probes_base.py index ae965c749..e477a06bf 100644 --- a/tests/langservice/probes/test_probes_base.py +++ b/tests/langservice/probes/test_probes_base.py @@ -83,6 +83,7 @@ def test_atkgen_probe_translation(classname, mocker): ) probe_instance = _plugins.load_plugin(classname) + probe_instance.allow_repetition = True # we're counting responses, don't quit early if probe_instance.lang != "en" or classname == "probes.tap.PAIR": return