bug: `buffs.paraphrase.Fast` test fails

```
_________________________________________________ test_buff_load_and_transform[buffs.paraphrase.Fast] __________________________________________________

klassname = 'buffs.paraphrase.Fast'

    @pytest.mark.parametrize("klassname", BUFFS)
    def test_buff_load_and_transform(klassname):
        try:
            b = _plugins.load_plugin(klassname)
        except GarakException:
            pytest.skip()
        assert isinstance(b, garak.buffs.base.Buff)
        a = attempt.Attempt(lang=b.lang)
        a.prompt = "I'm just a plain and simple tailor"
>       buffed_a = list(b.transform(a))  # unroll the generator

tests/buffs/test_buffs.py:40: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
garak/buffs/paraphrase.py:158: in transform
    paraphrases = self._get_response(last_message.text)
garak/buffs/paraphrase.py:134: in _get_response
    outputs = self.para_model.generate(
../../anaconda3/envs/garak/lib/python3.12/site-packages/torch/utils/_contextlib.py:116: in decorate_context
    return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(3212...    (dropout): Dropout(p=0.1, inplace=False)
  )
  (lm_head): Linear(in_features=768, out_features=32128, bias=False)
)
inputs = tensor([[ 3856, 27111,    10,    27,    31,    51,   131,     3,     9,  6080,
            11,   650, 13766,     1]])
generation_config = GenerationConfig {
  "decoder_start_token_id": 0,
  "diversity_penalty": 3.0,
  "eos_token_id": 1,
  "max_length": 128...um_beam_groups": 5,
  "num_beams": 5,
  "num_return_sequences": 5,
  "pad_token_id": 0,
  "repetition_penalty": 10.0
}

logits_processor = None, stopping_criteria = None, prefix_allowed_tokens_fn = None, synced_gpus = None, assistant_model = None, streamer = None
negative_prompt_ids = None, negative_prompt_attention_mask = None, use_model_defaults = None, custom_generate = None
kwargs = {'diversity_penalty': 3.0, 'max_length': 128, 'no_repeat_ngram_size': 2, 'num_beam_groups': 5, ...}, trust_remote_code = None
generation_mode_kwargs = {'synced_gpus': False}

    @torch.no_grad()
    def generate(
        self,
        inputs: Optional[torch.Tensor] = None,
        generation_config: Optional[GenerationConfig] = None,
        logits_processor: Optional[LogitsProcessorList] = None,
        stopping_criteria: Optional[StoppingCriteriaList] = None,
        prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], list[int]]] = None,
        synced_gpus: Optional[bool] = None,
        assistant_model: Optional["PreTrainedModel"] = None,
        streamer: Optional["BaseStreamer"] = None,
        negative_prompt_ids: Optional[torch.Tensor] = None,
        negative_prompt_attention_mask: Optional[torch.Tensor] = None,
        use_model_defaults: Optional[bool] = None,
        custom_generate: Optional[Union[str, Callable]] = None,
        **kwargs,
    ) -> Union[GenerateOutput, torch.LongTensor]:
        r"""
    
        Generates sequences of token ids for models with a language modeling head.
    
        <Tip warning={true}>
    
        Most generation-controlling parameters are set in `generation_config` which, if not passed, will be set to the
        model's default generation configuration. You can override any `generation_config` by passing the corresponding
        parameters to generate(), e.g. `.generate(inputs, num_beams=4, do_sample=True)`.
    
        For an overview of generation strategies and code examples, check out the [following
        guide](../generation_strategies).
    
        </Tip>
    
        Parameters:
            inputs (`torch.Tensor` of varying shape depending on the modality, *optional*):
                The sequence used as a prompt for the generation or as model inputs to the encoder. If `None` the
                method initializes it with `bos_token_id` and a batch size of 1. For decoder-only models `inputs`
                should be in the format of `input_ids`. For encoder-decoder models *inputs* can represent any of
                `input_ids`, `input_values`, `input_features`, or `pixel_values`.
            generation_config ([`~generation.GenerationConfig`], *optional*):
                The generation configuration to be used as base parametrization for the generation call. `**kwargs`
                passed to generate matching the attributes of `generation_config` will override them. If
                `generation_config` is not provided, the default will be used, which has the following loading
                priority: 1) from the `generation_config.json` model file, if it exists; 2) from the model
                configuration. Please note that unspecified parameters will inherit [`~generation.GenerationConfig`]'s
                default values, whose documentation should be checked to parameterize generation.
            logits_processor (`LogitsProcessorList`, *optional*):
                Custom logits processors that complement the default logits processors built from arguments and
                generation config. If a logit processor is passed that is already created with the arguments or a
                generation config an error is thrown. This feature is intended for advanced users.
            stopping_criteria (`StoppingCriteriaList`, *optional*):
                Custom stopping criteria that complements the default stopping criteria built from arguments and a
                generation config. If a stopping criteria is passed that is already created with the arguments or a
                generation config an error is thrown. If your stopping criteria depends on the `scores` input, make
                sure you pass `return_dict_in_generate=True, output_scores=True` to `generate`. This feature is
                intended for advanced users.
            prefix_allowed_tokens_fn (`Callable[[int, torch.Tensor], list[int]]`, *optional*):
                If provided, this function constraints the beam search to allowed tokens only at each step. If not
                provided no constraint is applied. This function takes 2 arguments: the batch ID `batch_id` and
                `input_ids`. It has to return a list with the allowed tokens for the next generation step conditioned
                on the batch ID `batch_id` and the previously generated tokens `inputs_ids`. This argument is useful
                for constrained generation conditioned on the prefix, as described in [Autoregressive Entity
                Retrieval](https://huggingface.co/papers/2010.00904).
            synced_gpus (`bool`, *optional*):
                Whether to continue running the while loop until max_length. Unless overridden, this flag will be set
                to `True` if using `FullyShardedDataParallel` or DeepSpeed ZeRO Stage 3 with multiple GPUs to avoid
                deadlocking if one GPU finishes generating before other GPUs. Otherwise, defaults to `False`.
            assistant_model (`PreTrainedModel`, *optional*):
                An assistant model that can be used to accelerate generation. The assistant model must have the exact
                same tokenizer. The acceleration is achieved when forecasting candidate tokens with the assistant model
                is much faster than running generation with the model you're calling generate from. As such, the
                assistant model should be much smaller.
            streamer (`BaseStreamer`, *optional*):
                Streamer object that will be used to stream the generated sequences. Generated tokens are passed
                through `streamer.put(token_ids)` and the streamer is responsible for any further processing.
            negative_prompt_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
                The negative prompt needed for some processors such as CFG. The batch size must match the input batch
                size. This is an experimental feature, subject to breaking API changes in future versions.
            negative_prompt_attention_mask (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
                Attention_mask for `negative_prompt_ids`.
            use_model_defaults (`bool`, *optional*):
                When it is `True`, unset parameters in `generation_config` will be set to the model-specific default
                generation configuration (`model.generation_config`), as opposed to the global defaults
                (`GenerationConfig()`). If unset, models saved starting from `v4.50` will consider this flag to be
                `True`.
            custom_generate (`str` or `Callable`, *optional*):
                One of the following:
                - `str` (Hugging Face Hub repository name): runs the custom `generate` function defined at
                  `custom_generate/generate.py` in that repository instead of the standard `generate` method. The
                  repository fully replaces the generation logic, and the return type may differ.
                - `str` (local repository path): same as above but from a local path, `trust_remote_code` not required.
                - `Callable`: `generate` will perform the usual input preparation steps, then call the provided callable to
                  run the decoding loop.
                For more information, see [the docs](../../generation_strategies#custom-generation-methods).
            kwargs (`dict[str, Any]`, *optional*):
                Ad hoc parametrization of `generation_config` and/or additional model-specific kwargs that will be
                forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
                specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
    
        Return:
            [`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
            or when `config.return_dict_in_generate=True`) or a `torch.LongTensor`.
    
                If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
                [`~utils.ModelOutput`] types are:
    
                    - [`~generation.GenerateDecoderOnlyOutput`],
                    - [`~generation.GenerateBeamDecoderOnlyOutput`]
    
                If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
                [`~utils.ModelOutput`] types are:
    
                    - [`~generation.GenerateEncoderDecoderOutput`],
                    - [`~generation.GenerateBeamEncoderDecoderOutput`]
        """
        # 0. If requested, load an arbitrary generation recipe from the Hub and run it instead
        trust_remote_code = kwargs.pop("trust_remote_code", None)
    
        if custom_generate is not None and isinstance(custom_generate, str):
            # Get all `generate` arguments in a single variable. Custom functions are responsible for handling them:
            # they receive the same inputs as `generate`, with `model` instead of `self` and excluding the arguments to
            # trigger the custom generation. They can access to methods from `GenerationMixin` through `model`.
            global_keys_to_exclude = {
                "self",
                "kwargs",
                "global_keys_to_exclude",
                "trust_remote_code",
                "custom_generate",
            }
            generate_arguments = {key: value for key, value in locals().items() if key not in global_keys_to_exclude}
            generate_arguments.update(kwargs)
    
            custom_generate_function = self.load_custom_generate(
                custom_generate, trust_remote_code=trust_remote_code, **kwargs
            )
            return custom_generate_function(model=self, **generate_arguments)
    
        # 1. Handle kwargs, `generation_config`, validate them and obtain generation mode
        generation_mode_kwargs = self._extract_generation_mode_kwargs(
            custom_generate,
            kwargs,
            synced_gpus,
            assistant_model,
            streamer,
        )
    
        generation_config, model_kwargs = self._prepare_generation_config(
            generation_config, use_model_defaults, **kwargs
        )
        generation_mode = generation_config.get_generation_mode(assistant_model)
        if isinstance(custom_generate, Callable):
            decoding_method = custom_generate
        else:
            # type() required to access the unbound class-level method
>           decoding_method = getattr(type(self), GENERATION_MODES_MAPPING[generation_mode])
E           AttributeError: type object 'T5ForConditionalGeneration' has no attribute 'transformers-community/group-beam-search'

../../anaconda3/envs/garak/lib/python3.12/site-packages/transformers/generation/utils.py:2386: AttributeError
----------------------------------------------------------------- Captured stdout call -----------------------------------------------------------------
🦾 loading buff: paraphrase.Fast
------------------------------------------------------------------ Captured log call -------------------------------------------------------------------
INFO     root:base.py:41 buff init: <garak.buffs.paraphrase.Fast object at 0x7d3aa8af2270>
DEBUG    root:huggingface.py:117 Using cpu, based on torch environment evaluation
DEBUG    urllib3.connectionpool:connectionpool.py:1049 Starting new HTTPS connection (1): huggingface.co:443
DEBUG    urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /garak-llm/chatgpt_paraphraser_on_T5_base/resolve/main/config.json HTTP/1.1" 307 0
DEBUG    urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /api/resolve-cache/models/garak-llm/chatgpt_paraphraser_on_T5_base/96ed40146291e134a1e13829592a63cb7e7c5602/config.json HTTP/1.1" 200 0
DEBUG    urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /garak-llm/chatgpt_paraphraser_on_T5_base/resolve/main/generation_config.json HTTP/1.1" 307 0
DEBUG    urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /api/resolve-cache/models/garak-llm/chatgpt_paraphraser_on_T5_base/96ed40146291e134a1e13829592a63cb7e7c5602/generation_config.json HTTP/1.1" 200 0
DEBUG    urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /garak-llm/chatgpt_paraphraser_on_T5_base/resolve/main/custom_generate/generate.py HTTP/1.1" 404 0
DEBUG    urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /garak-llm/chatgpt_paraphraser_on_T5_base/resolve/main/tokenizer_config.json HTTP/1.1" 307 0
DEBUG    urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /api/resolve-cache/models/garak-llm/chatgpt_paraphraser_on_T5_base/96ed40146291e134a1e13829592a63cb7e7c5602/tokenizer_config.json HTTP/1.1" 200 0
DEBUG    urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "GET /api/models/garak-llm/chatgpt_paraphraser_on_T5_base/tree/main/additional_chat_templates?recursive=False&expand=False HTTP/1.1" 404 64
=============================================================== short test summary info ================================================================
FAILED tests/buffs/test_buffs.py::test_buff_load_and_transform[buffs.paraphrase.Fast] - AttributeError: type object 'T5ForConditionalGeneration' has no attribute 'transformers-community/group-beam-search'

```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

bug: `buffs.paraphrase.Fast` test fails #1402

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

bug: buffs.paraphrase.Fast test fails #1402

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions

bug: `buffs.paraphrase.Fast` test fails #1402