diff --git a/.all-contributorsrc b/.all-contributorsrc
index 7118f6ebbc..4aabbcb4af 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -2583,6 +2583,15 @@
         "design",
         "doc"
       ]
+    },
+    {
+      "login": "fspinna",
+      "name": "Francesco Spinnato",
+      "avatar_url": "https://avatars.githubusercontent.com/u/35352023?v=4",
+      "profile": "https://github.com/fspinna",
+      "contributions": [
+        "code"
+      ]
     }
   ],
   "commitType": "docs"
diff --git a/aeon/transformations/collection/dictionary_based/__init__.py b/aeon/transformations/collection/dictionary_based/__init__.py
index 598f90733a..5f75cc9658 100644
--- a/aeon/transformations/collection/dictionary_based/__init__.py
+++ b/aeon/transformations/collection/dictionary_based/__init__.py
@@ -1,7 +1,8 @@
 """Transformers."""
 
-__all__ = ["PAA", "SFA", "SFAFast", "SAX"]
+__all__ = ["PAA", "SFA", "SFAFast", "SAX", "BORF"]
 
+from aeon.transformations.collection.dictionary_based._borf import BORF
 from aeon.transformations.collection.dictionary_based._paa import PAA
 from aeon.transformations.collection.dictionary_based._sax import SAX
 from aeon.transformations.collection.dictionary_based._sfa import SFA
diff --git a/aeon/transformations/collection/dictionary_based/_borf.py b/aeon/transformations/collection/dictionary_based/_borf.py
new file mode 100644
index 0000000000..f1af5bf944
--- /dev/null
+++ b/aeon/transformations/collection/dictionary_based/_borf.py
@@ -0,0 +1,1199 @@
+"""BORF transformer.
+
+multivariate dictionary based transformer based on Bag-Of-Receptive-Fields transform.
+"""
+
+__maintainer__ = ["fspinna"]
+__all__ = ["BORF"]
+
+import itertools
+import math
+from collections.abc import Sequence
+from typing import Literal, Optional
+
+import numba as nb
+import numpy as np
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.pipeline import FeatureUnion
+from sklearn.pipeline import make_pipeline as make_pipeline_sklearn
+
+from aeon.transformations.collection import BaseCollectionTransformer
+
+
+class BORF(BaseCollectionTransformer):
+    """
+    Bag-of-Receptive-Fields (BORF) Transformer.
+
+    Transforms time series data into a bag-of-receptive-fields representation [1] using
+    SAX. This transformer extracts SAX words from time series by applying sliding
+    windows, discretizing, and counting the occurrences of the SAX words in the
+    time series. The output is a sparse feature representation suitable for
+    various downstream tasks such as classification and regression.
+
+    Parameters
+    ----------
+    window_size_min_window_size : int, default=4
+        The minimum window size for the sliding window.
+    window_size_max_window_size : int or None, default=None
+        The maximum window size for the sliding window. If None, it is set
+        via a heuristic function based on the time series length.
+    word_lengths_n_word_lengths : int, default=4
+        The length of the words used in the SAX representation.
+    alphabets_min_symbols : int, default=3
+        The minimum number of symbols in the alphabet.
+    alphabets_max_symbols : int, default=4
+        The maximum number of symbols in the alphabet.
+    alphabets_step : int, default=1
+        The step size when iterating over the number of symbols in the alphabet.
+    dilations_min_dilation : int, default=1
+        The minimum dilation factor.
+    dilations_max_dilation : int or None, default=None
+        The maximum dilation factor. If None, it is set via a heuristic function
+        based on the time series length.
+    min_window_to_signal_std_ratio : float, default=0.0
+        Minimum ratio of the window size to the standard deviation of the signal.
+        If the window standard deviation is lower than this ratio,
+        the window is considered "flat" and set to all zeros to avoid amplifying noise.
+    n_jobs : int, default=1
+        The number of `IndividualBORF` instances to run in parallel.
+    n_jobs_numba : int, default=1
+        The number of threads used for parallelizing each
+        configuration inside each `IndividualBORF`.
+    transformer_weights : array-like or None, default=None
+        Weights applied to each transformer in the pipeline.
+    complexity : {'quadratic', 'linear'}, default='quadratic'
+        The computational complexity mode:
+        - `'quadratic'`: Higher accuracy with more computational cost.
+        - `'linear'`: Faster computations with potentially lower accuracy.
+    densify : bool, default=False
+        If True, converts the output to a dense array.
+        By default, the output is returned as a scipy sparse matrix.
+
+
+    Attributes
+    ----------
+    pipe_ : sklearn.pipeline.Pipeline
+        The internal pipeline used for transforming the data.
+    configs_ : dict
+        Configuration parameters used in the pipeline.
+
+    References
+    ----------
+    .. [1] F. Spinnato, R. Guidotti, A. Monreale and M. Nanni,
+    "Fast, Interpretable and Deterministic Time Series Classification
+    with a Bag-Of-Receptive-Fields," in IEEE Access,
+    doi: 10.1109/ACCESS.2024.3464743
+
+    Examples
+    --------
+    >>> from aeon.transformations.collection.dictionary_based import BORF
+    >>> from aeon.datasets import load_unit_test
+    >>> X, _ = load_unit_test()
+    >>> borf = BORF()  # doctest: +SKIP
+    >>> borf.fit(X)  # doctest: +SKIP
+    BORF()
+    >>> X_transformed = borf.transform(X)  # doctest: +SKIP
+
+    """
+
+    _tags = {
+        "X_inner_type": "numpy3D",
+        "capability:inverse_transform": False,
+        "capability:missing_values": True,
+        "capability:multivariate": True,
+        "capability:multithreading": True,
+        "input_data_type": "Collection",
+        "algorithm_type": "dictionary",
+        "output_data_type": "Tabular",
+        "requires_y": False,
+        "python_dependencies": "sparse",
+        "cant_pickle": True,
+        "non_deterministic": True,
+    }
+
+    def __init__(
+        self,
+        window_size_min_window_size=4,
+        window_size_max_window_size=None,
+        word_lengths_n_word_lengths=4,
+        alphabets_min_symbols=3,
+        alphabets_max_symbols=4,
+        alphabets_step=1,
+        dilations_min_dilation=1,
+        dilations_max_dilation=None,
+        min_window_to_signal_std_ratio: float = 0.0,
+        n_jobs=1,
+        n_jobs_numba=1,
+        transformer_weights=None,
+        complexity: Literal["quadratic", "linear"] = "quadratic",
+        densify=False,
+    ):
+        self.window_size_min_window_size = window_size_min_window_size
+        self.window_size_max_window_size = window_size_max_window_size
+        self.word_lengths_n_word_lengths = word_lengths_n_word_lengths
+        self.alphabets_min_symbols = alphabets_min_symbols
+        self.alphabets_max_symbols = alphabets_max_symbols
+        self.alphabets_step = alphabets_step
+        self.dilations_min_dilation = dilations_min_dilation
+        self.dilations_max_dilation = dilations_max_dilation
+        self.min_window_to_signal_std_ratio = min_window_to_signal_std_ratio
+        self.n_jobs = n_jobs
+        self.n_jobs_numba = n_jobs_numba
+        self.transformer_weights = transformer_weights
+        self.complexity = complexity
+        self.densify = densify
+        super().__init__()
+
+    def _fit(self, X, y=None):
+        time_series_length = X.shape[2]
+        # for better computation time, this should be moved to the init,
+        #  setting time_series_length as a user parameter
+
+        pipeline_objects = [
+            (_ReshapeTo2D, dict()),
+            (_ZeroColumnsRemover, dict()),
+            (_ToScipySparse, dict()),
+        ]
+        if self.densify:
+            pipeline_objects.append((_ToDense, dict()))
+
+        self.pipe_, self.configs_ = _build_pipeline_auto(
+            time_series_min_length=time_series_length,
+            time_series_max_length=time_series_length,
+            window_size_min_window_size=self.window_size_min_window_size,
+            window_size_max_window_size=self.window_size_max_window_size,
+            word_lengths_n_word_lengths=self.word_lengths_n_word_lengths,
+            alphabets_min_symbols=self.alphabets_min_symbols,
+            alphabets_max_symbols=self.alphabets_max_symbols,
+            alphabets_step=self.alphabets_step,
+            dilations_min_dilation=self.dilations_min_dilation,
+            dilations_max_dilation=self.dilations_max_dilation,
+            min_window_to_signal_std_ratio=self.min_window_to_signal_std_ratio,
+            n_jobs=self.n_jobs,
+            n_jobs_numba=self.n_jobs_numba,
+            transformer_weights=self.transformer_weights,
+            pipeline_objects=pipeline_objects,
+            complexity=self.complexity,
+        )
+        self.pipe_.fit(X)
+        return self
+
+    def _transform(self, X, y=None):
+        return self.pipe_.transform(X)
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return the "default" set.
+
+        Returns
+        -------
+        dict or list of dict
+            Parameters to create testing instances of the class.
+            Each dict contains parameters to construct an "interesting" test instance,
+            i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test
+            instance. `create_test_instance` uses the first (or only) dictionary in
+            `params`.
+        """
+        params = [
+            {
+                "window_size_min_window_size": 4,
+                "window_size_max_window_size": None,
+                "word_lengths_n_word_lengths": 4,
+                "alphabets_min_symbols": 3,
+                "alphabets_max_symbols": 4,
+                "alphabets_step": 1,
+                "dilations_min_dilation": 1,
+                "dilations_max_dilation": None,
+                "min_window_to_signal_std_ratio": 0.0,
+                "n_jobs": 1,
+                "n_jobs_numba": 1,
+                "transformer_weights": None,
+                "complexity": "quadratic",
+                "densify": False,
+            },
+            {
+                "window_size_min_window_size": 4,
+                "window_size_max_window_size": None,
+                "word_lengths_n_word_lengths": 4,
+                "alphabets_min_symbols": 3,
+                "alphabets_max_symbols": 4,
+                "alphabets_step": 1,
+                "dilations_min_dilation": 1,
+                "dilations_max_dilation": None,
+                "min_window_to_signal_std_ratio": 0.0,
+                "n_jobs": 1,
+                "n_jobs_numba": 1,
+                "transformer_weights": None,
+                "complexity": "quadratic",
+                "densify": True,
+            },
+        ]
+        return params
+
+
+def _build_pipeline_auto(
+    time_series_min_length: int,
+    time_series_max_length: int,
+    window_size_min_window_size=4,
+    window_size_max_window_size=None,
+    word_lengths_n_word_lengths=4,
+    alphabets_min_symbols=3,
+    alphabets_max_symbols=4,
+    alphabets_step=1,
+    dilations_min_dilation=1,
+    dilations_max_dilation=None,
+    min_window_to_signal_std_ratio: float = 0.0,
+    n_jobs=1,
+    n_jobs_numba=1,
+    transformer_weights=None,
+    pipeline_objects: Optional[Sequence[tuple]] = None,
+    complexity: Literal["quadratic", "linear"] = "quadratic",
+):
+    configs = _heuristic_function_sax(
+        time_series_min_length=time_series_min_length,
+        time_series_max_length=time_series_max_length,
+        window_size_min_window_size=window_size_min_window_size,
+        window_size_max_window_size=window_size_max_window_size,
+        word_lengths_n_word_lengths=word_lengths_n_word_lengths,
+        alphabets_min_symbols=alphabets_min_symbols,
+        alphabets_max_symbols=alphabets_max_symbols,
+        alphabets_step=alphabets_step,
+        dilations_min_dilation=dilations_min_dilation,
+        dilations_max_dilation=dilations_max_dilation,
+        complexity=complexity,
+    )
+
+    return (
+        _build_pipeline(
+            configs=configs,
+            min_window_to_signal_std_ratio=min_window_to_signal_std_ratio,
+            n_jobs=n_jobs,
+            n_jobs_numba=n_jobs_numba,
+            transformer_weights=transformer_weights,
+            pipeline_objects=pipeline_objects,
+        ),
+        configs,
+    )
+
+
+def _build_pipeline(
+    configs,
+    min_window_to_signal_std_ratio: float = 0.0,
+    n_jobs_numba=1,
+    n_jobs=1,
+    transformer_weights=None,
+    pipeline_objects: Optional[Sequence[tuple]] = None,
+):
+    transformers = list()
+    if pipeline_objects is None:
+        pipeline_objects = list()
+    for config in configs:
+        # alphabet_size, window_size, word_length, dilation, stride
+        borf = IndividualBORF(
+            **config,
+            min_window_to_signal_std_ratio=min_window_to_signal_std_ratio,
+            n_jobs=n_jobs_numba,
+        )
+        transformer = make_pipeline_sklearn(
+            borf, *[obj(**kwargs) for obj, kwargs in pipeline_objects]
+        )
+        transformers.append(transformer)
+    union = FeatureUnion(
+        transformer_list=[(str(i), transformers[i]) for i in range(len(transformers))],
+        n_jobs=n_jobs,
+        transformer_weights=transformer_weights,
+    )
+    return union
+
+
+class IndividualBORF(BaseEstimator, TransformerMixin):
+    def __init__(
+        self,
+        window_size=4,
+        dilation=1,
+        alphabet_size: int = 3,
+        word_length: int = 2,
+        stride: int = 1,
+        min_window_to_signal_std_ratio: float = 0.0,
+        n_jobs: int = 1,
+        prefix="",
+    ):
+        self.window_size = window_size
+        self.dilation = dilation
+        self.word_length = word_length
+        self.stride = stride
+        self.alphabet_size = alphabet_size
+        self.min_window_to_signal_std_ratio = min_window_to_signal_std_ratio
+        self.prefix = prefix
+        self.n_jobs = n_jobs
+        self.n_words = _convert_to_base_10(
+            _array_to_int(np.full(self.word_length, self.alphabet_size - 1)) + 1,
+            base=self.alphabet_size,
+        )
+        _set_n_jobs_numba(n_jobs=self.n_jobs)
+
+        self.feature_names_in_ = None
+        self.n_features_in_ = None
+
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X, y=None):
+        import sparse  # FIXME: can we move this outside for better performance?
+
+        shape_ = (len(X), len(X[0]), self.n_words)
+        out = _transform_sax_patterns(
+            panel=X,
+            window_size=self.window_size,
+            dilation=self.dilation,
+            alphabet_size=self.alphabet_size,
+            word_length=self.word_length,
+            stride=self.stride,
+            min_window_to_signal_std_ratio=self.min_window_to_signal_std_ratio,
+        )
+        # ts_idx, signal_idx, words, count
+        return sparse.COO(coords=out[:, :3].T, data=out[:, -1].T, shape=shape_)
+
+
+class _ZeroColumnsRemover(BaseEstimator, TransformerMixin):
+    def __init__(self, axis=0):
+        self.axis = axis
+
+        self.n_original_columns_ = None
+        self.columns_to_keep_ = None
+
+        self.feature_names_in_ = None
+        self.n_features_in_ = None
+
+    def fit(self, X, y=None):
+        self.n_original_columns_ = X.shape[1]
+        self.columns_to_keep_ = np.argwhere(X.any(axis=self.axis)).ravel()
+        return self
+
+    def transform(self, X):
+        return X[..., self.columns_to_keep_]
+
+
+class _ReshapeTo2D(BaseEstimator, TransformerMixin):
+    def __init__(self, keep_unraveled_index=False):
+        self.keep_unraveled_index = keep_unraveled_index
+        # shape: (n_flattened_features, 2) -> flattened index -> (dimension, word)
+        self.unraveled_index_ = None
+        self.original_shape_ = None
+
+        self.feature_names_in_ = None
+        self.n_features_in_ = None
+
+    def fit(self, X, y=None):
+        self.original_shape_ = X.shape
+        if self.keep_unraveled_index:
+            self.unraveled_index_ = np.hstack(
+                [np.unravel_index(np.arange(np.prod(X.shape[1:])), X.shape[1:])]
+            ).T
+        return self
+
+    def transform(self, X):
+        return X.reshape((X.shape[0], -1))
+
+
+class _ToScipySparse(BaseEstimator, TransformerMixin):
+    def __init__(self):
+        self.feature_names_in_ = None
+        self.n_features_in_ = None
+
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X):
+        return X.to_scipy_sparse()
+
+
+class _ToDense(BaseEstimator, TransformerMixin):
+    def __init__(self):
+        self.feature_names_in_ = None
+        self.n_features_in_ = None
+
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X):
+        return X.toarray()
+
+
+@nb.njit(fastmath=True, cache=True)
+def _convert_to_base_10(number, base):
+    result = 0
+    multiplier = 1
+
+    while number > 0:
+        digit = number % 10
+        result += digit * multiplier
+        multiplier *= base
+        number //= 10
+
+    return result
+
+
+@nb.njit(cache=True)
+def _array_to_int(arr):
+    result = 0
+    for i in range(len(arr)):
+        result = result * 10 + arr[i]
+    return result
+
+
+def _set_n_jobs_numba(n_jobs):
+    if n_jobs == -1:
+        nb.set_num_threads(nb.config.NUMBA_DEFAULT_NUM_THREADS)
+    else:
+        nb.set_num_threads(n_jobs)
+
+
+@nb.njit(parallel=True, nogil=True, cache=True)
+def _transform_sax_patterns(
+    panel,
+    window_size,
+    word_length,
+    alphabet_size,
+    stride,
+    dilation,
+    min_window_to_signal_std_ratio=0.0,
+):
+    bins = _get_norm_bins(alphabet_size=alphabet_size)
+    n_signals = len(panel[0])
+    n_ts = len(panel)
+    iterations = n_ts * n_signals
+    counts = np.zeros(iterations + 1, dtype=np.int64)
+    for i in nb.prange(iterations):
+        ts_idx, signal_idx = _ndindex_2d_array(i, n_signals)
+        signal = np.asarray(panel[ts_idx][signal_idx])
+        signal = signal[~np.isnan(signal)]
+        if not _are_window_size_and_dilation_compatible_with_signal_length(
+            window_size, dilation, signal.size
+        ):
+            continue
+        counts[i + 1] = len(
+            _new_transform_single_conf(
+                a=signal,
+                ts_idx=ts_idx,
+                signal_idx=signal_idx,
+                window_size=window_size,
+                word_length=word_length,
+                alphabet_size=alphabet_size,
+                bins=bins,
+                dilation=dilation,
+                stride=stride,
+                min_window_to_signal_std_ratio=min_window_to_signal_std_ratio,
+            )
+        )
+    cum_counts = np.cumsum(counts)
+    n_rows = np.sum(counts)
+    shape = (n_rows, 4)
+    out = np.empty(shape, dtype=np.int64)
+    for i in nb.prange(iterations):
+        ts_idx, signal_idx = _ndindex_2d_array(i, n_signals)
+        signal = np.asarray(panel[ts_idx][signal_idx])
+        signal = signal[~np.isnan(signal)]
+        if not _are_window_size_and_dilation_compatible_with_signal_length(
+            window_size, dilation, signal.size
+        ):
+            continue
+        out_ = _new_transform_single_conf(
+            a=signal,
+            ts_idx=ts_idx,
+            signal_idx=signal_idx,
+            window_size=window_size,
+            word_length=word_length,
+            alphabet_size=alphabet_size,
+            bins=bins,
+            dilation=dilation,
+            stride=stride,
+            min_window_to_signal_std_ratio=min_window_to_signal_std_ratio,
+        )
+        out[cum_counts[i] : cum_counts[i + 1], :] = out_
+    return out
+
+
+@nb.njit(cache=True)
+def _new_transform_single_conf(
+    a,
+    ts_idx,
+    signal_idx,
+    window_size,
+    word_length,
+    alphabet_size,
+    bins,
+    dilation,
+    stride=1,
+    min_window_to_signal_std_ratio=0.0,
+):
+    words, counts = _new_transform_single(
+        a=a,
+        window_size=window_size,
+        word_length=word_length,
+        alphabet_size=alphabet_size,
+        bins=bins,
+        dilation=dilation,
+        stride=stride,
+        min_window_to_signal_std_ratio=min_window_to_signal_std_ratio,
+    )
+    ts_idxs = np.full(len(words), ts_idx)
+    signal_idxs = np.full(len(words), signal_idx)
+    return np.column_stack((ts_idxs, signal_idxs, words, counts))
+
+
+@nb.njit(cache=True)
+def _new_transform_single(
+    a,
+    window_size,
+    word_length,
+    alphabet_size,
+    bins,
+    dilation,
+    stride=1,
+    min_window_to_signal_std_ratio=0.0,
+):
+    sax_words = _sax(
+        a=a,
+        window_size=window_size,
+        word_length=word_length,
+        bins=bins,
+        min_window_to_signal_std_ratio=min_window_to_signal_std_ratio,
+        dilation=dilation,
+        stride=stride,
+    )
+    sax_words = _sax_words_to_int(sax_words, alphabet_size)
+    return _unique(sax_words)
+
+
+@nb.njit(cache=True)
+def _ndindex_2d_array(idx, dim2_shape):
+    row_idx = idx // dim2_shape
+    col_idx = idx % dim2_shape
+    return row_idx, col_idx
+
+
+@nb.njit(cache=True)
+def _get_norm_bins(alphabet_size: int, mu=0, std=1):
+    return _ppf(np.linspace(0, 1, alphabet_size + 1)[1:-1], mu, std)
+
+
+@nb.njit(fastmath=True, cache=True)
+def _erfinv(x: float) -> float:
+    w = -math.log((1 - x) * (1 + x))
+    if w < 5:
+        w = w - 2.5
+        p = 2.81022636e-08
+        p = 3.43273939e-07 + p * w
+        p = -3.5233877e-06 + p * w
+        p = -4.39150654e-06 + p * w
+        p = 0.00021858087 + p * w
+        p = -0.00125372503 + p * w
+        p = -0.00417768164 + p * w
+        p = 0.246640727 + p * w
+        p = 1.50140941 + p * w
+    else:
+        w = math.sqrt(w) - 3
+        p = -0.000200214257
+        p = 0.000100950558 + p * w
+        p = 0.00134934322 + p * w
+        p = -0.00367342844 + p * w
+        p = 0.00573950773 + p * w
+        p = -0.0076224613 + p * w
+        p = 0.00943887047 + p * w
+        p = 1.00167406 + p * w
+        p = 2.83297682 + p * w
+    return p * x
+
+
+@nb.vectorize(cache=True)
+def _ppf(x, mu=0, std=1):
+    return mu + math.sqrt(2) * _erfinv(2 * x - 1) * std
+
+
+@nb.njit(fastmath=True, cache=True)
+def _are_window_size_and_dilation_compatible_with_signal_length(
+    window_size, dilation, signal_length
+):
+    if window_size + (window_size - 1) * (dilation - 1) <= signal_length:
+        return True
+    else:
+        return False
+
+
+@nb.njit(cache=True)
+def _sax(
+    a,
+    window_size,
+    word_length,
+    bins,
+    stride=1,
+    dilation=1,
+    min_window_to_signal_std_ratio=0.0,
+):
+    n_windows = _get_n_windows(
+        sequence_size=a.size, window_size=window_size, dilation=dilation, stride=stride
+    )
+    n_windows_moving = _get_n_windows(
+        sequence_size=a.size, window_size=window_size, dilation=dilation
+    )
+    global_std = np.std(a)
+    if global_std == 0:
+        return np.zeros((n_windows, word_length), dtype=np.uint8)
+    seg_size = window_size // word_length
+    n_windows = _get_n_windows(
+        sequence_size=a.size, window_size=window_size, dilation=dilation, stride=stride
+    )
+    n_segments = _get_n_windows(
+        sequence_size=a.size, window_size=seg_size, dilation=dilation
+    )
+    segment_means = np.full(n_segments, np.nan)
+    window_means = np.full(n_windows_moving, np.nan)
+    window_stds = np.full(n_windows_moving, np.nan)
+    for d in range(dilation):
+        window_means[d::dilation] = _move_mean(a[d::dilation], window_size)[
+            window_size - 1 :
+        ]
+        window_stds[d::dilation] = _move_std(a[d::dilation], window_size)[
+            window_size - 1 :
+        ]
+        segment_means[d::dilation] = _move_mean(a[d::dilation], seg_size)[
+            seg_size - 1 :
+        ]
+    out = np.zeros((n_windows, word_length))
+    for i in range(n_windows):
+        for j in range(word_length):
+            out[i, j] = _zscore_threshold(
+                a=segment_means[(i * stride) + (j * seg_size * dilation)],
+                mu=window_means[i * stride],
+                sigma=window_stds[i * stride],
+                sigma_global=global_std,
+                sigma_threshold=min_window_to_signal_std_ratio,
+            )
+    return np.digitize(out, bins).astype(np.uint8)
+
+
+@nb.njit(fastmath=True, cache=True)
+def _get_n_windows(sequence_size, window_size, dilation=1, stride=1, padding=0):
+    return 1 + math.floor(
+        (sequence_size + 2 * padding - window_size - (dilation - 1) * (window_size - 1))
+        / stride
+    )
+
+
+@nb.njit(fastmath=True, cache=True)
+def _zscore_threshold(
+    a: float, mu: float, sigma: float, sigma_global: float, sigma_threshold: float
+) -> float:
+    if sigma_global == 0:
+        return 0
+    if sigma / sigma_global < sigma_threshold:
+        return 0
+    return _zscore(a=a, mu=mu, sigma=sigma)
+
+
+@nb.njit(fastmath=True, cache=True)
+def _zscore(a: float, mu: float, sigma: float) -> float:
+    if sigma == 0:
+        return 0
+    return (a - mu) / sigma
+
+
+@nb.njit(fastmath=True, cache=True)
+def _move_mean(a, window_width):
+    out = np.empty_like(a)
+    asum = 0.0
+    count = 0
+
+    # Calculating the initial moving window sum
+    for i in range(window_width):
+        asum += a[i]
+        count += 1
+        out[i] = asum / count
+
+    # Moving window
+    for i in range(window_width, len(a)):
+        asum += a[i] - a[i - window_width]
+        out[i] = asum / window_width
+    return out
+
+
+@nb.njit(fastmath=True, cache=True)
+def _move_std(a, window_width, ddof=0):
+    out = np.empty_like(a, dtype=np.float64)
+
+    # Initial mean and variance calculation
+    mean = 0.0
+    M2 = 0.0
+    for i in range(window_width):
+        delta = a[i] - mean
+        mean += delta / (i + 1)
+        delta2 = a[i] - mean
+        M2 += delta * delta2
+
+    # Adjusting for degrees of freedom
+    if window_width - ddof > 0:
+        variance = M2 / (window_width - ddof)
+    else:
+        variance = 0  # Avoid division by zero
+
+    out[window_width - 1] = np.sqrt(max(variance, 0))
+
+    # Moving window
+    for i in range(window_width, len(a)):
+        x0 = a[i - window_width]
+        xn = a[i]
+
+        new_avg = mean + (xn - x0) / window_width
+
+        # Update the variance using the new degrees of freedom
+        if window_width - ddof > 0:
+            new_var = variance + (xn - new_avg + x0 - mean) * (xn - x0) / (
+                window_width - ddof
+            )
+        else:
+            new_var = 0  # Avoid division by zero
+
+        out[i] = np.sqrt(max(new_var, 0))  # TODO: investigate negative variance
+
+        mean = new_avg
+        variance = new_var
+
+    out[: window_width - 1] = np.nan
+
+    return out
+
+
+@nb.njit(fastmath=True, cache=True)
+def _sax_words_to_int(arrays, base):
+    out = np.empty(arrays.shape[0], dtype=np.int64)
+    for i in range(arrays.shape[0]):
+        out[i] = _array_to_int_new_base(arrays[i], base)
+    return out
+
+
+@nb.njit(fastmath=True, cache=True)
+def _array_to_int_new_base(array, base):
+    word_length = array.shape[0]
+    result = 0
+    for i in range(0, word_length, 1):
+        result += array[i] * base ** (word_length - i - 1)
+    return result
+
+
+@nb.njit(cache=True)
+def _length(a):
+    a = int(np.ceil(np.log2(a)))
+    a = 2 << a
+    return a
+
+
+@nb.njit(cache=True)
+def _hash_function(v):
+
+    byte_mask = np.uint64(255)
+    bs = np.uint64(v)
+    x1 = (bs) & byte_mask
+    x2 = (bs >> 8) & byte_mask
+    x3 = (bs >> 16) & byte_mask
+    x4 = (bs >> 24) & byte_mask
+    x5 = (bs >> 32) & byte_mask
+    x6 = (bs >> 40) & byte_mask
+    x7 = (bs >> 48) & byte_mask
+    x8 = (bs >> 56) & byte_mask
+
+    FNV_primer = np.uint64(1099511628211)
+    FNV_bias = np.uint64(14695981039346656037)
+    h = FNV_bias
+    h = h * FNV_primer
+    h = h ^ x1
+    h = h * FNV_primer
+    h = h ^ x2
+    h = h * FNV_primer
+    h = h ^ x3
+    h = h * FNV_primer
+    h = h ^ x4
+    h = h * FNV_primer
+    h = h ^ x5
+    h = h * FNV_primer
+    h = h ^ x6
+    h = h * FNV_primer
+    h = h ^ x7
+    h = h * FNV_primer
+    h = h ^ x8
+    return h
+
+
+@nb.njit(cache=True)
+def _make_hash_table(ar):
+    a = _length(len(ar))
+    mask = a - 1
+
+    uniques = np.empty(a, dtype=ar.dtype)
+    uniques_cnt = np.zeros(a, dtype=np.int_)
+    return uniques, uniques_cnt, a, mask
+
+
+@nb.njit(cache=True)
+def _set_item(uniques, uniques_cnt, mask, h, v, total, miss_hits, weight):
+    index = h & mask
+    while True:
+        if uniques_cnt[index] == 0:
+            # insert new
+            uniques_cnt[index] += weight
+            uniques[index] = v
+            total += 1
+            break
+        elif uniques[index] == v:
+            uniques_cnt[index] += weight
+            break
+        else:
+            miss_hits += 1
+            index += 1
+            index = index & mask
+    return total, miss_hits
+
+
+@nb.njit(cache=True)
+def _concrete(ar, uniques, uniques_cnt, a, total):
+    # flush the results in a concrete array
+    uniques_ = np.empty(total, dtype=ar.dtype)
+    uniques_cnt_ = np.empty(total, dtype=np.int_)
+    t = 0
+    for i in range(a):
+        if uniques_cnt[i] > 0:
+            uniques_[t] = uniques[i]
+            uniques_cnt_[t] = uniques_cnt[i]
+            t += 1
+    return uniques_, uniques_cnt_
+
+
+@nb.njit(cache=True)
+def _unique(ar):
+    # https://github.com/lhprojects/blog/blob/master/_posts/JupyterNotebooks/HashUnique.ipynb
+    uniques, uniques_cnt, l, mask = _make_hash_table(ar)
+    total = 0
+    miss_hits = 0
+    for v in ar:
+        h = _hash_function(v)
+        total, miss_hits = _set_item(
+            uniques, uniques_cnt, mask, h, v, total, miss_hits, 1
+        )
+    uniques_, uniques_cnt_ = _concrete(ar, uniques, uniques_cnt, l, total)
+    return uniques_, uniques_cnt_
+
+
+def _get_borf_params(
+    time_series_min_length,
+    time_series_max_length,
+    window_size_min_window_size=4,
+    window_size_max_window_size=None,
+    window_size_power=2,
+    word_lengths_n_word_lengths=4,
+    strides_n_strides=1,
+    alphabets_mean_min_symbols=2,
+    alphabets_mean_max_symbols=3,
+    alphabets_mean_step=1,
+    alphabets_slope_min_symbols=3,
+    alphabets_slope_max_symbols=4,
+    alphabets_slope_step=1,
+    dilations_min_dilation=1,
+    dilations_max_dilation=None,
+):
+    params = {}
+    params["window_sizes"] = _get_window_sizes(
+        m_min=time_series_min_length,
+        m_max=time_series_max_length,
+        min_window_size=window_size_min_window_size,
+        max_window_size=window_size_max_window_size,
+        power=window_size_power,
+    ).tolist()
+
+    params["word_lengths"] = _get_word_lengths(
+        n_word_lengths=word_lengths_n_word_lengths,
+        start=0,
+    ).tolist()
+
+    params["dilations"] = _get_dilations(
+        max_length=time_series_max_length,
+        min_dilation=dilations_min_dilation,
+        max_dilation=dilations_max_dilation,
+    ).tolist()
+    params["strides"] = _get_strides(n_strides=strides_n_strides).tolist()
+    params["alphabet_sizes_slope"] = _get_alphabet_sizes(
+        min_symbols=alphabets_slope_min_symbols,
+        max_symbols=alphabets_slope_max_symbols,
+        step=alphabets_slope_step,
+    ).tolist()
+    params["alphabet_sizes_mean"] = _get_alphabet_sizes(
+        min_symbols=alphabets_mean_min_symbols,
+        max_symbols=alphabets_mean_max_symbols,
+        step=alphabets_mean_step,
+    ).tolist()
+    return params
+
+
+@nb.njit(cache=True)
+def _is_empty(a) -> bool:
+    return a.size == 0
+
+
+def _get_window_sizes(m_min, m_max, min_window_size=4, max_window_size=None, power=2):
+    if max_window_size is None:
+        max_window_size = m_max
+    m = 2
+    windows = list()
+    windows_min = list()
+    while m <= max_window_size:
+        if m < min_window_size:
+            windows_min.append(m)
+        else:
+            windows.append(m)
+        m = int(m * power)
+    windows = np.array(windows)
+    windows_min = np.array(windows_min[1:])
+    if not _is_empty(windows_min):
+        if m_min <= windows_min.max() * power:
+            windows = np.concatenate([windows_min, windows])
+    return windows.astype(int)
+
+
+def _get_word_lengths(n_word_lengths=4, start=0):
+    return np.array([2**i for i in range(start, n_word_lengths + start)])
+
+
+def _get_dilations(max_length, min_dilation=1, max_dilation=None):
+    dilations = list()
+    max_length_log2 = np.log2(max_length)
+    if max_dilation is None:
+        max_dilation = max_length_log2
+    start = min_dilation
+    while start <= max_dilation:
+        dilations.append(start)
+        start *= 2
+    return np.array(dilations)
+
+
+def _get_strides(n_strides=1):
+    return np.arange(1, n_strides + 1)
+
+
+def _get_alphabet_sizes(min_symbols=3, max_symbols=4, step=1):
+    return np.arange(min_symbols, max_symbols, step)
+
+
+def _generate_sax_parameters_configurations(
+    window_sizes,
+    strides,
+    dilations,
+    word_lengths,
+    alphabet_sizes_mean,
+    alphabet_sizes_slope,
+):
+    parameters = list(
+        itertools.product(
+            *[
+                window_sizes,
+                strides,
+                dilations,
+                word_lengths,
+                alphabet_sizes_mean,
+                alphabet_sizes_slope,
+            ]
+        )
+    )
+    cleaned_parameters = list()
+    for parameter in parameters:
+        (
+            window_size,
+            stride,
+            dilation,
+            word_length,
+            alphabet_mean,
+            alphabet_slope,
+        ) = _extract_parameters_from_args(parameter)
+        if word_length > window_size:  # word_length cannot be greater than window_size
+            continue
+        if (
+            alphabet_slope <= 1 and word_length == 1
+        ):  # if alphabet_slope <= 1, word_length=1 is useless
+            continue
+        cleaned_parameters.append(
+            dict(
+                window_size=window_size,
+                stride=stride,
+                dilation=dilation,
+                word_length=word_length,
+                alphabet_size_mean=alphabet_mean,
+                alphabet_size_slope=alphabet_slope,
+            )
+        )
+    return cleaned_parameters
+
+
+@nb.njit(cache=True)
+def _is_valid_windowing(sequence_size: int, window_size: int, dilation: int) -> bool:
+    if (
+        sequence_size < window_size * dilation
+    ):  # if window_size * dilation exceeds the length of the sequence
+        return False
+    else:
+        return True
+
+
+def _clean_sax_parameters_configurations(parameters, max_length):
+    cleaned_parameters = list()
+    for parameter in parameters:
+        window_size = parameter["window_size"]
+        dilation = parameter["dilation"]
+        word_length = parameter["word_length"]
+        alphabet_mean = parameter["alphabet_size_mean"]
+        alphabet_slope = parameter["alphabet_size_slope"]
+        stride = parameter["stride"]
+        if not _is_valid_windowing(
+            window_size=window_size, sequence_size=max_length, dilation=dilation
+        ):
+            continue
+        cleaned_parameters.append(
+            dict(
+                window_size=window_size,
+                stride=stride,
+                dilation=dilation,
+                word_length=word_length,
+                alphabet_size_mean=alphabet_mean,
+                alphabet_size_slope=alphabet_slope,
+            )
+        )
+    return cleaned_parameters
+
+
+def _sax_parameters_configurations_linear_strides(parameters):
+    new_parameters = list()
+    for parameter in parameters:
+        window_size = parameter["window_size"]
+        dilation = parameter["dilation"]
+        word_length = parameter["word_length"]
+        alphabet_mean = parameter["alphabet_size_mean"]
+        alphabet_slope = parameter["alphabet_size_slope"]
+        parameter = dict(
+            window_size=window_size,
+            stride=word_length,
+            dilation=dilation,
+            word_length=word_length,
+            alphabet_size_mean=alphabet_mean,
+            alphabet_size_slope=alphabet_slope,
+        )
+        new_parameters.append(parameter)
+    return new_parameters
+
+
+def _extract_parameters_from_args(parameter):
+    window_size = parameter[0]
+    stride = parameter[1]
+    dilation = parameter[2]
+    word_length = parameter[3]
+    alphabet_mean = parameter[4]
+    alphabet_slope = parameter[5]
+    return window_size, stride, dilation, word_length, alphabet_mean, alphabet_slope
+
+
+def _heuristic_function_1dsax(
+    time_series_min_length,
+    time_series_max_length,
+    window_size_min_window_size=4,
+    window_size_max_window_size=None,
+    window_size_power=2,
+    word_lengths_n_word_lengths=3,
+    strides_n_strides=1,
+    alphabets_mean_min_symbols=2,
+    alphabets_mean_max_symbols=3,
+    alphabets_mean_step=1,
+    alphabets_slope_min_symbols=2,
+    alphabets_slope_max_symbols=3,
+    alphabets_slope_step=1,
+    dilations_min_dilation=1,
+    dilations_max_dilation=None,
+    complexity: Literal["quadratic", "linear"] = "quadratic",
+):
+    params = _get_borf_params(
+        time_series_min_length=time_series_min_length,
+        time_series_max_length=time_series_max_length,
+        window_size_min_window_size=window_size_min_window_size,
+        window_size_max_window_size=window_size_max_window_size,
+        window_size_power=window_size_power,
+        word_lengths_n_word_lengths=word_lengths_n_word_lengths,
+        strides_n_strides=strides_n_strides,
+        alphabets_slope_min_symbols=alphabets_slope_min_symbols,
+        alphabets_slope_max_symbols=alphabets_slope_max_symbols,
+        alphabets_slope_step=alphabets_slope_step,
+        alphabets_mean_min_symbols=alphabets_mean_min_symbols,
+        alphabets_mean_max_symbols=alphabets_mean_max_symbols,
+        alphabets_mean_step=alphabets_mean_step,
+        dilations_min_dilation=dilations_min_dilation,
+        dilations_max_dilation=dilations_max_dilation,
+    )
+
+    params_list = _generate_sax_parameters_configurations(
+        window_sizes=params["window_sizes"],
+        strides=params["strides"],
+        dilations=params["dilations"],
+        word_lengths=params["word_lengths"],
+        alphabet_sizes_mean=params["alphabet_sizes_mean"],
+        alphabet_sizes_slope=params["alphabet_sizes_slope"],
+    )
+
+    cleaned_params_list = _clean_sax_parameters_configurations(
+        parameters=params_list, max_length=time_series_max_length
+    )
+
+    if complexity == "linear":
+        cleaned_params_list = _sax_parameters_configurations_linear_strides(
+            parameters=cleaned_params_list
+        )
+
+    return cleaned_params_list
+
+
+def _heuristic_function_sax(
+    time_series_min_length,
+    time_series_max_length,
+    window_size_min_window_size=4,
+    window_size_max_window_size=None,
+    window_size_power=2,
+    word_lengths_n_word_lengths=4,
+    strides_n_strides=1,
+    alphabets_min_symbols=2,
+    alphabets_max_symbols=3,
+    alphabets_step=1,
+    dilations_min_dilation=1,
+    dilations_max_dilation=None,
+    complexity: Literal["quadratic", "linear"] = "quadratic",
+):
+    configs = _heuristic_function_1dsax(
+        time_series_min_length=time_series_min_length,
+        time_series_max_length=time_series_max_length,
+        window_size_min_window_size=window_size_min_window_size,
+        window_size_max_window_size=window_size_max_window_size,
+        window_size_power=window_size_power,
+        word_lengths_n_word_lengths=word_lengths_n_word_lengths,
+        strides_n_strides=strides_n_strides,
+        alphabets_slope_min_symbols=0,
+        alphabets_slope_max_symbols=1,
+        alphabets_slope_step=1,
+        alphabets_mean_min_symbols=alphabets_min_symbols,
+        alphabets_mean_max_symbols=alphabets_max_symbols,
+        alphabets_mean_step=alphabets_step,
+        dilations_min_dilation=dilations_min_dilation,
+        dilations_max_dilation=dilations_max_dilation,
+        complexity=complexity,
+    )
+    for config in configs:
+        config.pop("alphabet_size_slope")
+        config["alphabet_size"] = config.pop("alphabet_size_mean")
+    return configs
diff --git a/docs/api_reference/transformations.rst b/docs/api_reference/transformations.rst
index d471923f5c..06f641a3a9 100644
--- a/docs/api_reference/transformations.rst
+++ b/docs/api_reference/transformations.rst
@@ -101,6 +101,7 @@ Dictionary-based features
     PAA
     SFA
     SFAFast
+    BORF
 
 
 Feature based
diff --git a/pyproject.toml b/pyproject.toml
index aeb02e3cde..9dfa087c32 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,6 +73,7 @@ all_extras = [
     "torch>=1.13.1",
     "tsfresh>=0.20.0",
     "tslearn>=0.5.2",
+    "sparse"
 ]
 dl = [
     "keras-self-attention",