aeon-toolkit · MatthewMiddlehurst · Jul 11, 2025 · Mar 19, 2025 · Mar 19, 2025 · Mar 19, 2025
@@ -62,6 +62,6 @@ runs:
         path: ${{ github.workspace }}/.numba_cache
         # Try restore using today's date
         key: numba-${{ inputs.cache_name }}-${{ inputs.runner_os }}-${{ inputs.python_version }}-${{ env.CURRENT_DATE }}
-        # If cant restore with today's date try another cache (without date)
+        # If can't restore with today's date try another cache (without date)
         restore-keys: |
           numba-${{ inputs.cache_name }}-${{ inputs.runner_os }}-${{ inputs.python_version }}-
@@ -0,0 +1,4 @@
+fpr
+mape
+recuse
+strat
@@ -60,3 +60,16 @@ jobs:
         with:
           commit_message: Automatic `pre-commit` fixes
           commit_user_name: aeon-actions-bot[bot]
+
+  codespell-annotations:
+    runs-on: ubuntu-24.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -78,3 +78,11 @@ repos:
     hooks:
       - id: check-manifest
         stages: [ manual ]
+
+  - repo: https://github.com/codespell-project/codespell
+    # Configuration for codespell is in pyproject.toml
+    rev: v2.4.1
+    hooks:
+      - id: codespell
+        additional_dependencies:
+          - tomli  # for python_version < '3.11'
@@ -42,7 +42,7 @@ class KMeansAD(BaseSeriesAnomalyDetector):
 
     stride : int, default=1
         The stride of the sliding window. The stride determines how many time points
-        the windows are spaced appart. A stride of 1 means that the window is moved one
+        the windows are spaced apart. A stride of 1 means that the window is moved one
         time point forward compared to the previous window. The larger the stride, the
         fewer windows are created, which leads to noisier anomaly scores.
 

@@ -102,26 +102,26 @@ def _predict(self, X):
 
         r = 2 * np.sqrt(self.min_length)
         distances = np.full(len(lengths), -1.0)
-        indicies = np.full(len(lengths), -1)
+        indices = np.full(len(lengths), -1)
 
-        indicies[0], distances[0] = self._find_index(X, lengths[0], r, np.multiply, 0.5)
+        indices[0], distances[0] = self._find_index(X, lengths[0], r, np.multiply, 0.5)
 
         for i in range(1, min(5, len(lengths))):
             r = distances[i - 1] * 0.99
-            indicies[i], distances[i] = self._find_index(
+            indices[i], distances[i] = self._find_index(
                 X, lengths[i], r, np.multiply, 0.99
             )
 
         for i in range(min(5, len(lengths)), len(lengths)):
             m = mean(distances[i - 5 : i])
             s = std(distances[i - 5 : i])
             r = m - 2 * s
-            indicies[i], distances[i] = self._find_index(
+            indices[i], distances[i] = self._find_index(
                 X, lengths[i], r, np.subtract, s
             )
 
         anomalies = np.zeros(X.shape[0], dtype=bool)
-        for i in indicies:
+        for i in indices:
             if i > -1:
                 anomalies[i] = True
 

@@ -20,7 +20,7 @@ class STRAY(BaseSeriesAnomalyDetector):
     ability to detect clusters of outliers in multidimensional data without
     requiring a model of the typical behavior of the system. However, it suffers
     from some limitations that affect its accuracy. STRAY is an extension of
-    HDoutliers that uses extreme value theory for the anomolous threshold
+    HDoutliers that uses extreme value theory for the anomalous threshold
     calculation, to deal with data streams that exhibit non-stationary behavior.
 
     Parameters
@@ -39,7 +39,7 @@ class STRAY(BaseSeriesAnomalyDetector):
         Proportion of possible candidates for outliers. This defines the starting point
         for the bottom up searching algorithm.
     size_threshold : int, default=50
-        Sample size to calculate an emperical threshold.
+        Sample size to calculate an empirical threshold.
     outlier_tail : str {"min", "max"}, default="max"
         Direction of the outlier tail.
 

diff --git a/aeon/base/_estimators/compose/collection_ensemble.py b/aeon/base/_estimators/compose/collection_ensemble.py
@@ -42,7 +42,7 @@ class BaseCollectionEnsemble(ComposableEstimatorMixin, BaseCollectionEstimator):
         Only used if weights is a float. The method used to generate a performance
         estimation from the training data set i.e. cross-validation.
         If None, predictions are made using that estimators fit_predict or
-            fit_predict_proba methods. These are somtimes overridden for efficient
+            fit_predict_proba methods. These are sometimes overridden for efficient
             performance evaluations, i.e. out-of-bag predictions.
         If int or sklearn object input, the parameter is passed directly to the cv
             parameter of the cross_val_predict function from sklearn.

@@ -201,7 +201,7 @@ def estimator_alias(name: str) -> str:
 def get_available_estimators(
     task: str = "classification", as_list: bool = False
 ) -> Union[pd.DataFrame, list]:
-    """Get a DataFrame of estimators avialable for a specific learning task.
+    """Get a DataFrame of estimators available for a specific learning task.
 
     Parameters
     ----------
@@ -251,7 +251,7 @@ def get_estimator_results(
 
     Parameters
     ----------
-    estimators : str ot list of str
+    estimators : str or list of str
         Estimator name or list of estimator names to search for. See
         get_available_estimators, aeon.benchmarking.results_loading.NAME_ALIASES or
         the directory at path for valid options.

@@ -57,7 +57,7 @@ def nemenyi_test(ordered_avg_ranks, n_datasets, alpha):
     ordered_avg_ranks : np.array
         Average ranks of estimators.
     n_datasets : int
-        Mumber of datasets.
+        Number of datasets.
     alpha : float
         alpha level for Nemenyi test.
 

@@ -107,7 +107,7 @@ def test_resample_data_indices():
     concat = np.concatenate((train_indices, test_indices), axis=0)
     assert len(np.unique(concat)) == len(concat)
 
-    # expected indicies after resampling
+    # expected indices after resampling
     np.testing.assert_array_equal(
         concat,
         [
@@ -277,7 +277,7 @@ def test_stratified_resample_data_indices():
     concat = np.concatenate((train_indices, test_indices), axis=0)
     assert len(np.unique(concat)) == len(concat)
 
-    # expected indicies after resampling
+    # expected indices after resampling
     np.testing.assert_array_equal(
         concat,
         [

@@ -33,7 +33,7 @@ class ClassifierEnsemble(BaseCollectionEnsemble, BaseClassifier):
         Only used if weights is a float. The method used to generate a performance
         estimation from the training data set i.e. cross-validation.
         If None, predictions are made using that estimators fit_predict or
-            fit_predict_proba methods. These are somtimes overridden for efficient
+            fit_predict_proba methods. These are sometimes overridden for efficient
             performance evaluations, i.e. out-of-bag predictions.
         If int or sklearn object input, the parameter is passed directly to the cv
             parameter of the cross_val_predict function from sklearn.

@@ -25,42 +25,42 @@ class DisjointCNNClassifier(BaseDeepClassifier):
         Number of 1+1D Convolution layers.
     n_filters : int or list of int, default = 64
         Number of filters used in convolution layers. If
-        input is set to a list, the lenght should be the same
+        input is set to a list, the length should be the same
         as `n_layers`, if input is int the a list of the same
         element is created of length `n_layers`.
     kernel_size : int or list of int, default = [8, 5, 5, 3]
         Size of convolution kernel. If
-        input is set to a list, the lenght should be the same
+        input is set to a list, the length should be the same
         as `n_layers`, if input is int the a list of the same
         element is created of length `n_layers`.
     dilation_rate : int or list of int, default = 1
         The dilation rate for convolution. If
-        input is set to a list, the lenght should be the same
+        input is set to a list, the length should be the same
         as `n_layers`, if input is int the a list of the same
         element is created of length `n_layers`.
     strides : int or list of int, default = 1
         The strides of the convolution filter. If
-        input is set to a list, the lenght should be the same
+        input is set to a list, the length should be the same
         as `n_layers`, if input is int the a list of the same
         element is created of length `n_layers`.
     padding : str or list of str, default = "same"
         The type of padding used for convolution. If
-        input is set to a list, the lenght should be the same
+        input is set to a list, the length should be the same
         as `n_layers`, if input is int the a list of the same
         element is created of length `n_layers`.
     activation : str or list of str, default = "elu"
         Activation used after the convolution. If
-        input is set to a list, the lenght should be the same
+        input is set to a list, the length should be the same
         as `n_layers`, if input is int the a list of the same
         element is created of length `n_layers`.
     use_bias : bool or list of bool, default = True
-        Whether or not ot use bias in convolution. If
-        input is set to a list, the lenght should be the same
+        Whether or not to use bias in convolution. If
+        input is set to a list, the length should be the same
         as `n_layers`, if input is int the a list of the same
         element is created of length `n_layers`.
     kernel_initializer: str or list of str, default = "he_uniform"
         The initialization method of convolution layers. If
-        input is set to a list, the lenght should be the same
+        input is set to a list, the length should be the same
         as `n_layers`, if input is int the a list of the same
         element is created of length `n_layers`.
     pool_size: int, default = 5

@@ -36,7 +36,7 @@ class FCNClassifier(BaseDeepClassifier):
     activation : str or list of str, default = "relu"
         Activation used after the convolution.
     use_bias : bool or list of bool, default = True
-        Whether or not ot use bias in convolution.
+        Whether or not to use bias in convolution.
     n_epochs : int, default = 2000
         The number of epochs to train the model.
     batch_size : int, default = 16

@@ -354,7 +354,7 @@ def _predict_proba(self, X) -> np.ndarray:
     def load_model(self, model_path, classes):
         """Load pre-trained classifiers instead of fitting.
 
-        When calling this function, all funcationalities can be used
+        When calling this function, all functionalities can be used
         such as predict, predict_proba, etc. with the loaded models.
 
         Parameters

@@ -286,7 +286,7 @@ def _predict_proba(self, X) -> np.ndarray:
     def load_model(self, model_path, classes):
         """Load pre-trained classifiers instead of fitting.
 
-        When calling this function, all funcationalities can be used
+        When calling this function, all functionalities can be used
         such as predict, predict_proba, etc. with the loaded models.
 
         Parameters
@@ -448,7 +448,7 @@ class IndividualLITEClassifier(BaseDeepClassifier):
     References
     ----------
     ..[1] Ismail-Fawaz et al. LITE: Light Inception with boosTing
-    tEchniques for Time Series Classificaion, IEEE International
+    tEchniques for Time Series Classification, IEEE International
     Conference on Data Science and Advanced Analytics, 2023.
     ..[2] Ismail-Fawaz, Ali, et al. "Look Into the LITE
     in Deep Learning for Time Series Classification."

@@ -719,11 +719,11 @@ def boss_distance(X, Y, i, XX_all=None, XY_all=None):
     """Find the distance between two histograms.
 
     This returns the distance between first and second dictionaries, using a non-
-    symmetric distance measure. It is used to find the distance between historgrams
+    symmetric distance measure. It is used to find the distance between histograms
     of words.
 
     This distance function is designed for sparse matrix, represented as either a
-    dictionary or an arrray. It only measures the distance between counts present in
+    dictionary or an array. It only measures the distance between counts present in
     the first dictionary and the second. Hence dist(a,b) does not necessarily equal
     dist(b,a).
 

@@ -28,7 +28,7 @@ class ContractableBOSS(BaseClassifier):
     described in [2]_.
 
     Overview: Input "n" series of length "m" and cBOSS randomly samples
-    `n_parameter_samples` parameter sets, evaluting each with LOOCV. It then
+    `n_parameter_samples` parameter sets, evaluating each with LOOCV. It then
     retains `max_ensemble_size` classifiers with the highest accuracy
     There are three primary parameters:
         - alpha: alphabet size

@@ -36,7 +36,7 @@ class REDCOMETS(BaseClassifier):
         RED CoMETS variant to use from {1, 2, 3, 4, 5, 6, 7, 8, 9} to use as per [1]_.
         Defaults to RED CoMETS-3. Variants 4-9 only support multivariate problems.
     perc_length : int or float, default=5
-        Percentage of time series length used to determinne number of lenses during
+        Percentage of time series length used to determine number of lenses during
         pair selection.
     n_trees : int, default=100
         Number of trees used by each random forest sub-classifier.
@@ -185,12 +185,12 @@ def _build_univariate_ensemble(self, X, y):
         Returns
         -------
         sfa_transforms :
-            List of ``SFAFast()`` instances with random word length and alpabet size
+            List of ``SFAFast()`` instances with random word length and alphabet size
         sfa_clfs :
             List of ``(RandomForestClassifier(), weight)`` tuples fitted on `SFAFast`
             transformed training data
         sax_transforms :
-            List of ``SAX()`` instances with random word length and alpabet size
+            List of ``SAX()`` instances with random word length and alphabet size
         sax_clfs :
             List of ``(RandomForestClassifier(), weight)`` tuples fitted on `SAX`
             transformed training data
@@ -321,13 +321,13 @@ def _build_dimension_ensemble(self, X, y):
         Returns
         -------
         sfa_transforms : list
-            List of lists of ``SFAFast()`` instances with random word length and alpabet
-            size
+            List of lists of ``SFAFast()`` instances with random word length and
+            alphabet size
         sfa_clfs : list
             List of lists of ``(RandomForestClassifier(), weight)`` tuples fitted on
             `SFAFast` transformed training data
         sax_transforms : list
-            List of lists of ``SAX()`` instances with random word length and alpabet
+            List of lists of ``SAX()`` instances with random word length and alphabet
             size
         sax_clfs : list
             List of lists ``(RandomForestClassifier(), weight)`` tuples fitted on `SAX`

@@ -618,7 +618,7 @@ class IndividualTDE(BaseClassifier):
     bigrams : bool, default=False
         Whether to record word bigrams in the SFA transform.
     dim_threshold : float, default=0.85
-        Accuracy threshold as a propotion of the highest accuracy dimension for words
+        Accuracy threshold as a proportion of the highest accuracy dimension for words
         extracted from each dimensions. Only applicable for multivariate data.
     max_dims : int, default=20
         Maximum number of dimensions words are extracted from. Only applicable for

@@ -47,7 +47,7 @@ class ProbabilityThresholdEarlyClassifier(BaseEarlyClassifier):
         List of integer time series time stamps to build classifiers and allow
         predictions at. Early predictions must have a series length that matches a value
         in the _classification_points List. Duplicate values will be removed, and the
-        full series length will be appeneded if not present.
+        full series length will be appended if not present.
         If None, will use 20 thresholds linearly spaces from 0 to the series length.
     n_jobs : int, default=1
         The number of jobs to run in parallel for both `fit` and `predict`.
@@ -73,7 +73,7 @@ class ProbabilityThresholdEarlyClassifier(BaseEarlyClassifier):
     state_info : 2d np.ndarray (4 columns)
         Information stored about input instances after the decision-making process in
         update/predict methods. Used in update methods to make decisions based on
-        the resutls of previous method calls.
+        the results of previous method calls.
         Records in order: the time stamp index, the number of consecutive decisions
         made, the predicted class and the series length.
 

@@ -56,7 +56,7 @@ class TEASER(BaseEarlyClassifier):
         List of integer time series time stamps to build classifiers and allow
         predictions at. Early predictions must have a series length that matches a value
         in the _classification_points List. Duplicate values will be removed, and the
-        full series length will be appeneded if not present.
+        full series length will be appended if not present.
         If None, will use 20 thresholds linearly spaces from 0 to the series length.
     n_jobs : int, default=1
         The number of jobs to run in parallel for both `fit` and `predict`.
@@ -82,7 +82,7 @@ class TEASER(BaseEarlyClassifier):
     state_info : 2d np.ndarray (4 columns)
         Information stored about input instances after the decision-making process in
         update/predict methods. Used in update methods to make decisions based on
-        the resutls of previous method calls.
+        the results of previous method calls.
         Records in order: the time stamp index, the number of consecutive decisions
         made, the predicted class and the series length.
 

@@ -66,7 +66,7 @@ def __init__(self):
 
         """
         An array containing the state info for each decision in X from update and
-        predict methods. Contains classifier dependant information for future decisions
+        predict methods. Contains classifier dependent information for future decisions
         on the data and information on when a cases decision has been made. Each row
         contains information for a case from the latest decision on its safety made in
         update/predict. Successive updates are likely to remove rows from the
@@ -308,7 +308,7 @@ def get_state_info(self):
         Returns
         -------
         An array containing the state info for each decision in X from update and
-        predict methods. Contains classifier dependant information for future decisions
+        predict methods. Contains classifier dependent information for future decisions
         on the data and information on when a cases decision has been made. Each row
         contains information for a case from the latest decision on its safety made in
         update/predict. Successive updates are likely to remove rows from the

@@ -592,7 +592,7 @@ class IndividualOrdinalTDE(BaseClassifier):
     bigrams : bool, default=False
         Whether to record word bigrams in the SFA transform.
     dim_threshold : float, default=0.85
-        Accuracy threshold as a propotion of the highest accuracy dimension for words
+        Accuracy threshold as a proportion of the highest accuracy dimension for words
         extracted from each dimensions. Only applicable for multivariate data.
     max_dims : int, default=20
         Maximum number of dimensions words are extracted from. Only applicable for

@@ -28,7 +28,7 @@ class SASTClassifier(BaseClassifier):
     ----------
     length_list : int[], default = None
         an array containing the lengths of the subsequences to be generated.
-        If None, will be infered during fit as np.arange(3, X.shape[1])
+        If None, will be inferred during fit as np.arange(3, X.shape[1])
     stride : int, default = 1
         the stride used when generating subsquences
     nb_inst_per_class : int default = 1
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    fpr
+    mape
+    recuse
+    strat