Skip to content

Train - optimizers and callback error #1

@savi8sant8s

Description

@savi8sant8s

Hello everyone. Amazing article. Congratulations. I would like to evaluate the performance of MrT5 in the spelling correction task. I have already done this task with ByT5.

I am having a problem in the training. It says that optimizers is coming as None. Commenting directly on the relevant lines in the transformers library, the next error I encounter is related to the callbacks attribute. Please point me to where I am going wrong. I appreciate your attention. Here is the code and errors below:

import torch
 
from transformers import (
    ByT5Tokenizer,
    Adafactor
)
from datasets import load_dataset
from models.modeling_mrt5 import MrT5ForConditionalGeneration, MrT5Config
from training.trainer import MrT5Trainer, MrT5TrainingArguments

dataset = load_dataset('csv', data_files='combined_prompts.csv')
dataset_full = dataset['train'].train_test_split(shuffle=True, test_size=0.1)
dataset_train, dataset_valid = dataset_full['train'], dataset_full['test']

model_name =  'google/byt5-small'
tokenizer = ByT5Tokenizer.from_pretrained(model_name)

def preprocess_function(examples):
    input = examples['input']
    output = examples['output']
    model_inputs = tokenizer(
        input, 
        max_length=200,
        truncation=True,
        padding='max_length'
    )
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            output, 
            max_length=200,
            truncation=True,
            padding='max_length'
        )
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_train = dataset_train.map(
    preprocess_function, 
    batched=True,
    num_proc=8
)
tokenized_valid = dataset_valid.map(
    preprocess_function, 
    batched=True,
    num_proc=8
)

config = MrT5Config(
    train_language='pt',
    eval_language='pt',
    vocab_size=tokenizer.vocab_size,
    feed_forward_proj="gated-gelu",
    deletion_type="scaled_sigmoid"
)
config.has_absolute_position_embeddings = False
model = MrT5ForConditionalGeneration._from_config(config)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")

out_dir = 'mrt5_sc'
batch_size = 8
epochs = 50

training_args = MrT5TrainingArguments(
    output_dir=out_dir,               
    num_train_epochs=epochs,              
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=2,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir=out_dir,            
    logging_steps=10,
    evaluation_strategy='epoch',                
    save_strategy='epoch',
    report_to='tensorboard',
    learning_rate=2e-5,
    dataloader_num_workers=2,
)

optimizer = Adafactor(model.parameters(), relative_step=True, warmup_init=True)
scheduler = None

trainer = MrT5Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=tokenized_train,       
    eval_dataset=tokenized_valid,
    optimizers=(optimizer, scheduler),
    callbacks=[],
)
history = trainer.train()

model.save_pretrained(out_dir)
tokenizer.save_pretrained(out_dir)

Error:
Traceback (most recent call last):
File "/home/savio/Documents/mestrado/mrt5/ft.py", line 91, in
trainer = MrT5Trainer(
File "/home/savio/Documents/mestrado/mrt5/training/trainer.py", line 163, in init
super().init(
File "/home/savio/Documents/mestrado/mrt5/training/trainer.py", line 48, in init
super().init(
File "/home/savio/.local/lib/python3.10/site-packages/transformers/utils/deprecation.py", line 165, in wrapped_func
return func(*args, **kwargs)
File "/home/savio/.local/lib/python3.10/site-packages/transformers/trainer.py", line 632, in init
self.optimizer, self.lr_scheduler = optimizers
TypeError: cannot unpack non-iterable NoneType object

Error (after comment optimizers logic inside transformers library):
Traceback (most recent call last):
File "/home/savio/Documents/mestrado/mrt5/ft.py", line 91, in
trainer = MrT5Trainer(
File "/home/savio/Documents/mestrado/mrt5/training/trainer.py", line 163, in init
super().init(
File "/home/savio/Documents/mestrado/mrt5/training/trainer.py", line 48, in init
super().init(
File "/home/savio/.local/lib/python3.10/site-packages/transformers/utils/deprecation.py", line 165, in wrapped_func
return func(*args, **kwargs)
File "/home/savio/.local/lib/python3.10/site-packages/transformers/trainer.py", line 664, in init
callbacks = default_callbacks if callbacks is None else default_callbacks + callbacks
TypeError: can only concatenate list (not "tuple") to list

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions