Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Configuration file to control (pytest) coverage
[run]
# Run branch coverage, too
branch = True

[paths]
source =
src/setfit

[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover

# Don't complain about missing debug-only code:
def __repr__
if self\.debug

# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError

# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:

# Don't complain about abstract methods, they aren't run:
@(abc\.)?abstractmethod

# Ignore TYPE_CHECKING code
if TYPE_CHECKING:

[html]
directory = coverage_report_html
title = SetFit coverage report
6 changes: 5 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@ per-file-ignores =
exclude =
results
scripts/adapet
scripts/tfew
scripts/tfew

[tool:pytest]
testpaths = tests
addopts = --cov=setfit --durations=10
125 changes: 99 additions & 26 deletions tests/exporters/test_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

import numpy as np
import onnxruntime
import pytest
from datasets import Dataset
from transformers import AutoTokenizer

from setfit import SetFitModel
from setfit.data import get_augmented_samples
from setfit.exporters.onnx import export_onnx
from setfit.trainer import SetFitTrainer


def test_export_onnx_sklearn_head():
Expand All @@ -15,34 +19,103 @@ def test_export_onnx_sklearn_head():

# Export the sklearn based model
output_path = "model.onnx"
export_onnx(model.model_body, model.model_head, opset=12, output_path=output_path)

# Check that the model was saved.
assert output_path in os.listdir(), "Model not saved to output_path"

# Run inference using the original model.
input_text = ["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"]
pytorch_preds = model(input_text)

# Run inference using the exported onnx model.
tokenizer = AutoTokenizer.from_pretrained(model_path)
inputs = tokenizer(
input_text,
padding=True,
truncation=True,
return_attention_mask=True,
return_token_type_ids=True,
return_tensors="np",
try:
export_onnx(model.model_body, model.model_head, opset=12, output_path=output_path)

# Check that the model was saved.
assert output_path in os.listdir(), "Model not saved to output_path"

# Run inference using the original model.
input_text = ["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"]
pytorch_preds = model(input_text)

# Run inference using the exported onnx model.
tokenizer = AutoTokenizer.from_pretrained(model_path)
inputs = tokenizer(
input_text,
padding=True,
truncation=True,
return_attention_mask=True,
return_token_type_ids=True,
return_tensors="np",
)
# Map inputs to int64 from int32
inputs = {key: value.astype("int64") for key, value in inputs.items()}

session = onnxruntime.InferenceSession(output_path)

onnx_preds = session.run(None, dict(inputs))[0]

# Compare the results and ensure that we get the same predictions.
assert np.array_equal(onnx_preds, pytorch_preds)

finally:
# Cleanup the model.
os.remove(output_path)


@pytest.mark.skip("ONNX exporting of SetFit model with Torch head not yet supported.")
@pytest.mark.parametrize("out_features", [1, 2, 3])
def test_export_onnx_torch_head(out_features):
"""Test that the exported `ONNX` model returns the same predictions as the original model."""
dataset = Dataset.from_dict(get_augmented_samples("SentEval-CR"))
model_path = "sentence-transformers/paraphrase-albert-small-v2"
model = SetFitModel.from_pretrained(
model_path, use_differentiable_head=True, head_params={"out_features": out_features}
)
# Map inputs to int64 from int32
inputs = {key: value.astype("int64") for key, value in inputs.items()}

session = onnxruntime.InferenceSession(output_path)
trainer = SetFitTrainer(
model=model,
train_dataset=dataset,
eval_dataset=dataset,
num_iterations=15,
column_mapping={"text": "text", "label": "label"},
)
# Train and evaluate
trainer.freeze() # Freeze the head
trainer.train() # Train only the body
# Unfreeze the head and unfreeze the body -> end-to-end training
trainer.unfreeze(keep_body_frozen=False)
trainer.train(
num_epochs=15,
batch_size=16,
body_learning_rate=1e-5,
learning_rate=1e-2,
l2_weight=0.0,
)

# Export the sklearn based model
output_path = "model.onnx"
try:
export_onnx(model.model_body, model.model_head, opset=12, output_path=output_path)

# Check that the model was saved.
assert output_path in os.listdir(), "Model not saved to output_path"

# Run inference using the original model.
input_text = ["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"]
pytorch_preds = model(input_text)

# Run inference using the exported onnx model.
tokenizer = AutoTokenizer.from_pretrained(model_path)
inputs = tokenizer(
input_text,
padding=True,
truncation=True,
return_attention_mask=True,
return_token_type_ids=True,
return_tensors="np",
)
# Map inputs to int64 from int32
inputs = {key: value.astype("int64") for key, value in inputs.items()}

session = onnxruntime.InferenceSession(output_path)

onnx_preds = session.run(None, dict(inputs))[0]
onnx_preds = session.run(None, dict(inputs))[0]

# Compare the results and ensure that we get the same predictions.
assert np.array_equal(onnx_preds, pytorch_preds)
# Compare the results and ensure that we get the same predictions.
assert np.array_equal(onnx_preds, pytorch_preds)

# Cleanup the model.
os.remove(output_path)
finally:
# Cleanup the model.
os.remove(output_path)
69 changes: 68 additions & 1 deletion tests/test_data.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import string

import numpy as np
import pandas as pd
import pytest
from datasets import Dataset
from datasets import Dataset, load_dataset

from setfit.data import (
SAMPLE_SIZES,
SEEDS,
add_templated_examples,
create_fewshot_splits,
create_fewshot_splits_multilabel,
create_samples,
get_augmented_samples,
sample_dataset,
)

Expand Down Expand Up @@ -102,9 +105,48 @@ def test_subset_is_smaller_than_sample_size(sample_size):

def test_expected_number_of_splits():
dataset = Dataset.from_pandas(pd.DataFrame({"label": [0] * 50 + [1] * 50}))
num_labels = 2
splits_ds = create_fewshot_splits(dataset, SAMPLE_SIZES)
assert len(splits_ds) == len(SAMPLE_SIZES) * len(SEEDS)

split: Dataset
for idx, split in enumerate(splits_ds.values()):
sample_size = SAMPLE_SIZES[idx // len(SEEDS)]
# The number of rows is limited by 100 due to the size of the original dataset
assert len(split) == min(sample_size * num_labels, len(dataset))


def test_create_fewshot_splits_with_augmentation():
dataset_name = "sst5"
dataset = load_dataset(f"SetFit/{dataset_name}", split="train")
num_labels = len(set(dataset["label"]))
splits_ds = create_fewshot_splits(dataset, SAMPLE_SIZES, add_data_augmentation=True, dataset_name=dataset_name)
assert len(splits_ds) == len(SAMPLE_SIZES) * len(SEEDS)

split: Dataset
for idx, split in enumerate(splits_ds.values()):
sample_size = SAMPLE_SIZES[idx // len(SEEDS)]
# Each split should have sample_size * num_labels * 2 rows:
# for each label we sample `sample_size`, and then we generate
# another `sample_size` samples through augmentation.
assert len(split) == sample_size * num_labels * 2


def test_create_fewshot_splits_multilabel():
num_samples = 50
dataset = Dataset.from_dict(
{
"text": string.ascii_letters[:50],
"label_one": np.random.randint(2, size=(num_samples,)),
"label_two": np.random.randint(2, size=(num_samples,)),
"label_three": np.random.randint(2, size=(num_samples,)),
}
)
splits_ds = create_fewshot_splits_multilabel(dataset, SAMPLE_SIZES)
assert len(splits_ds) == len(SAMPLE_SIZES) * len(SEEDS)
# We can't safely test the number of rows of each of the splits
# as duplicate samples are removed.


def test_sample_dataset_returns_expected_samples():
num_samples = 2
Expand All @@ -130,3 +172,28 @@ def test_sample_dataset_with_unbalanced_ds(unbalanced_dataset):
# has one label with more than `num_samples` entries and another label with just 1 row.
# We sample `num_samples` from the former, and 1 from the latter.
assert ds.num_rows == num_samples + 1


@pytest.mark.parametrize(
"dataset",
[
"emotion",
"ag_news",
"amazon_counterfactual_en",
"SentEval-CR",
"sst5",
"enron_spam",
"tweet_eval_stance_abortion",
"ade_corpus_v2_classification",
],
)
def test_get_augmented_samples(dataset: str):
dataset_dict = get_augmented_samples(dataset)
assert set(dataset_dict.keys()) == {"text", "label"}
assert len(dataset_dict["text"])
assert len(dataset_dict["label"])


def test_get_augmented_samples_negative():
with pytest.raises(ValueError):
get_augmented_samples(None)