-
Notifications
You must be signed in to change notification settings - Fork 805
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
_________________________________________________ test_buff_load_and_transform[buffs.paraphrase.Fast] __________________________________________________
klassname = 'buffs.paraphrase.Fast'
@pytest.mark.parametrize("klassname", BUFFS)
def test_buff_load_and_transform(klassname):
try:
b = _plugins.load_plugin(klassname)
except GarakException:
pytest.skip()
assert isinstance(b, garak.buffs.base.Buff)
a = attempt.Attempt(lang=b.lang)
a.prompt = "I'm just a plain and simple tailor"
> buffed_a = list(b.transform(a)) # unroll the generator
tests/buffs/test_buffs.py:40:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
garak/buffs/paraphrase.py:158: in transform
paraphrases = self._get_response(last_message.text)
garak/buffs/paraphrase.py:134: in _get_response
outputs = self.para_model.generate(
../../anaconda3/envs/garak/lib/python3.12/site-packages/torch/utils/_contextlib.py:116: in decorate_context
return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = T5ForConditionalGeneration(
(shared): Embedding(32128, 768)
(encoder): T5Stack(
(embed_tokens): Embedding(3212... (dropout): Dropout(p=0.1, inplace=False)
)
(lm_head): Linear(in_features=768, out_features=32128, bias=False)
)
inputs = tensor([[ 3856, 27111, 10, 27, 31, 51, 131, 3, 9, 6080,
11, 650, 13766, 1]])
generation_config = GenerationConfig {
"decoder_start_token_id": 0,
"diversity_penalty": 3.0,
"eos_token_id": 1,
"max_length": 128...um_beam_groups": 5,
"num_beams": 5,
"num_return_sequences": 5,
"pad_token_id": 0,
"repetition_penalty": 10.0
}
logits_processor = None, stopping_criteria = None, prefix_allowed_tokens_fn = None, synced_gpus = None, assistant_model = None, streamer = None
negative_prompt_ids = None, negative_prompt_attention_mask = None, use_model_defaults = None, custom_generate = None
kwargs = {'diversity_penalty': 3.0, 'max_length': 128, 'no_repeat_ngram_size': 2, 'num_beam_groups': 5, ...}, trust_remote_code = None
generation_mode_kwargs = {'synced_gpus': False}
@torch.no_grad()
def generate(
self,
inputs: Optional[torch.Tensor] = None,
generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], list[int]]] = None,
synced_gpus: Optional[bool] = None,
assistant_model: Optional["PreTrainedModel"] = None,
streamer: Optional["BaseStreamer"] = None,
negative_prompt_ids: Optional[torch.Tensor] = None,
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
use_model_defaults: Optional[bool] = None,
custom_generate: Optional[Union[str, Callable]] = None,
**kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
r"""
Generates sequences of token ids for models with a language modeling head.
<Tip warning={true}>
Most generation-controlling parameters are set in `generation_config` which, if not passed, will be set to the
model's default generation configuration. You can override any `generation_config` by passing the corresponding
parameters to generate(), e.g. `.generate(inputs, num_beams=4, do_sample=True)`.
For an overview of generation strategies and code examples, check out the [following
guide](../generation_strategies).
</Tip>
Parameters:
inputs (`torch.Tensor` of varying shape depending on the modality, *optional*):
The sequence used as a prompt for the generation or as model inputs to the encoder. If `None` the
method initializes it with `bos_token_id` and a batch size of 1. For decoder-only models `inputs`
should be in the format of `input_ids`. For encoder-decoder models *inputs* can represent any of
`input_ids`, `input_values`, `input_features`, or `pixel_values`.
generation_config ([`~generation.GenerationConfig`], *optional*):
The generation configuration to be used as base parametrization for the generation call. `**kwargs`
passed to generate matching the attributes of `generation_config` will override them. If
`generation_config` is not provided, the default will be used, which has the following loading
priority: 1) from the `generation_config.json` model file, if it exists; 2) from the model
configuration. Please note that unspecified parameters will inherit [`~generation.GenerationConfig`]'s
default values, whose documentation should be checked to parameterize generation.
logits_processor (`LogitsProcessorList`, *optional*):
Custom logits processors that complement the default logits processors built from arguments and
generation config. If a logit processor is passed that is already created with the arguments or a
generation config an error is thrown. This feature is intended for advanced users.
stopping_criteria (`StoppingCriteriaList`, *optional*):
Custom stopping criteria that complements the default stopping criteria built from arguments and a
generation config. If a stopping criteria is passed that is already created with the arguments or a
generation config an error is thrown. If your stopping criteria depends on the `scores` input, make
sure you pass `return_dict_in_generate=True, output_scores=True` to `generate`. This feature is
intended for advanced users.
prefix_allowed_tokens_fn (`Callable[[int, torch.Tensor], list[int]]`, *optional*):
If provided, this function constraints the beam search to allowed tokens only at each step. If not
provided no constraint is applied. This function takes 2 arguments: the batch ID `batch_id` and
`input_ids`. It has to return a list with the allowed tokens for the next generation step conditioned
on the batch ID `batch_id` and the previously generated tokens `inputs_ids`. This argument is useful
for constrained generation conditioned on the prefix, as described in [Autoregressive Entity
Retrieval](https://huggingface.co/papers/2010.00904).
synced_gpus (`bool`, *optional*):
Whether to continue running the while loop until max_length. Unless overridden, this flag will be set
to `True` if using `FullyShardedDataParallel` or DeepSpeed ZeRO Stage 3 with multiple GPUs to avoid
deadlocking if one GPU finishes generating before other GPUs. Otherwise, defaults to `False`.
assistant_model (`PreTrainedModel`, *optional*):
An assistant model that can be used to accelerate generation. The assistant model must have the exact
same tokenizer. The acceleration is achieved when forecasting candidate tokens with the assistant model
is much faster than running generation with the model you're calling generate from. As such, the
assistant model should be much smaller.
streamer (`BaseStreamer`, *optional*):
Streamer object that will be used to stream the generated sequences. Generated tokens are passed
through `streamer.put(token_ids)` and the streamer is responsible for any further processing.
negative_prompt_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
The negative prompt needed for some processors such as CFG. The batch size must match the input batch
size. This is an experimental feature, subject to breaking API changes in future versions.
negative_prompt_attention_mask (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Attention_mask for `negative_prompt_ids`.
use_model_defaults (`bool`, *optional*):
When it is `True`, unset parameters in `generation_config` will be set to the model-specific default
generation configuration (`model.generation_config`), as opposed to the global defaults
(`GenerationConfig()`). If unset, models saved starting from `v4.50` will consider this flag to be
`True`.
custom_generate (`str` or `Callable`, *optional*):
One of the following:
- `str` (Hugging Face Hub repository name): runs the custom `generate` function defined at
`custom_generate/generate.py` in that repository instead of the standard `generate` method. The
repository fully replaces the generation logic, and the return type may differ.
- `str` (local repository path): same as above but from a local path, `trust_remote_code` not required.
- `Callable`: `generate` will perform the usual input preparation steps, then call the provided callable to
run the decoding loop.
For more information, see [the docs](../../generation_strategies#custom-generation-methods).
kwargs (`dict[str, Any]`, *optional*):
Ad hoc parametrization of `generation_config` and/or additional model-specific kwargs that will be
forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
Return:
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
or when `config.return_dict_in_generate=True`) or a `torch.LongTensor`.
If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
[`~utils.ModelOutput`] types are:
- [`~generation.GenerateDecoderOnlyOutput`],
- [`~generation.GenerateBeamDecoderOnlyOutput`]
If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
[`~utils.ModelOutput`] types are:
- [`~generation.GenerateEncoderDecoderOutput`],
- [`~generation.GenerateBeamEncoderDecoderOutput`]
"""
# 0. If requested, load an arbitrary generation recipe from the Hub and run it instead
trust_remote_code = kwargs.pop("trust_remote_code", None)
if custom_generate is not None and isinstance(custom_generate, str):
# Get all `generate` arguments in a single variable. Custom functions are responsible for handling them:
# they receive the same inputs as `generate`, with `model` instead of `self` and excluding the arguments to
# trigger the custom generation. They can access to methods from `GenerationMixin` through `model`.
global_keys_to_exclude = {
"self",
"kwargs",
"global_keys_to_exclude",
"trust_remote_code",
"custom_generate",
}
generate_arguments = {key: value for key, value in locals().items() if key not in global_keys_to_exclude}
generate_arguments.update(kwargs)
custom_generate_function = self.load_custom_generate(
custom_generate, trust_remote_code=trust_remote_code, **kwargs
)
return custom_generate_function(model=self, **generate_arguments)
# 1. Handle kwargs, `generation_config`, validate them and obtain generation mode
generation_mode_kwargs = self._extract_generation_mode_kwargs(
custom_generate,
kwargs,
synced_gpus,
assistant_model,
streamer,
)
generation_config, model_kwargs = self._prepare_generation_config(
generation_config, use_model_defaults, **kwargs
)
generation_mode = generation_config.get_generation_mode(assistant_model)
if isinstance(custom_generate, Callable):
decoding_method = custom_generate
else:
# type() required to access the unbound class-level method
> decoding_method = getattr(type(self), GENERATION_MODES_MAPPING[generation_mode])
E AttributeError: type object 'T5ForConditionalGeneration' has no attribute 'transformers-community/group-beam-search'
../../anaconda3/envs/garak/lib/python3.12/site-packages/transformers/generation/utils.py:2386: AttributeError
----------------------------------------------------------------- Captured stdout call -----------------------------------------------------------------
🦾 loading buff: paraphrase.Fast
------------------------------------------------------------------ Captured log call -------------------------------------------------------------------
INFO root:base.py:41 buff init: <garak.buffs.paraphrase.Fast object at 0x7d3aa8af2270>
DEBUG root:huggingface.py:117 Using cpu, based on torch environment evaluation
DEBUG urllib3.connectionpool:connectionpool.py:1049 Starting new HTTPS connection (1): huggingface.co:443
DEBUG urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /garak-llm/chatgpt_paraphraser_on_T5_base/resolve/main/config.json HTTP/1.1" 307 0
DEBUG urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /api/resolve-cache/models/garak-llm/chatgpt_paraphraser_on_T5_base/96ed40146291e134a1e13829592a63cb7e7c5602/config.json HTTP/1.1" 200 0
DEBUG urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /garak-llm/chatgpt_paraphraser_on_T5_base/resolve/main/generation_config.json HTTP/1.1" 307 0
DEBUG urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /api/resolve-cache/models/garak-llm/chatgpt_paraphraser_on_T5_base/96ed40146291e134a1e13829592a63cb7e7c5602/generation_config.json HTTP/1.1" 200 0
DEBUG urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /garak-llm/chatgpt_paraphraser_on_T5_base/resolve/main/custom_generate/generate.py HTTP/1.1" 404 0
DEBUG urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /garak-llm/chatgpt_paraphraser_on_T5_base/resolve/main/tokenizer_config.json HTTP/1.1" 307 0
DEBUG urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "HEAD /api/resolve-cache/models/garak-llm/chatgpt_paraphraser_on_T5_base/96ed40146291e134a1e13829592a63cb7e7c5602/tokenizer_config.json HTTP/1.1" 200 0
DEBUG urllib3.connectionpool:connectionpool.py:544 https://huggingface.co:443 "GET /api/models/garak-llm/chatgpt_paraphraser_on_T5_base/tree/main/additional_chat_templates?recursive=False&expand=False HTTP/1.1" 404 64
=============================================================== short test summary info ================================================================
FAILED tests/buffs/test_buffs.py::test_buff_load_and_transform[buffs.paraphrase.Fast] - AttributeError: type object 'T5ForConditionalGeneration' has no attribute 'transformers-community/group-beam-search'
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working