diff --git a/requirements.txt b/requirements.txt
index 619d35ebcd4..d46f799c221 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ pathy>=0.3.5
 numpy>=1.15.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0
+pydantic>=1.9.0,<1.10.0
 jinja2
 langcodes>=3.2.0,<4.0.0
 # Official Python utilities
@@ -31,7 +31,7 @@ pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
 flake8>=3.8.0,<3.10.0
 hypothesis>=3.27.0,<7.0.0
-mypy==0.910
+mypy>=0.910,<=0.950
 types-dataclasses>=0.1.3; python_version < "3.7"
 types-mock>=0.1.1
 types-requests
diff --git a/setup.cfg b/setup.cfg
index 2626de87e86..4b344af378d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -57,7 +57,7 @@ install_requires =
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0
     requests>=2.13.0,<3.0.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0
+    pydantic>=1.9.0,<1.10.0
     jinja2
     # Official Python utilities
     setuptools
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index 5d49b6eb758..427e07cdbd3 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -55,12 +55,12 @@ def render(
         raise ValueError(Errors.E096)
     renderer_func, converter = factories[style]
     renderer = renderer_func(options=options)
-    parsed = [converter(doc, options) for doc in docs] if not manual else docs  # type: ignore
+    parsed = [converter(doc, options) for doc in docs] if not manual else docs
     if manual:
         for doc in docs:
             if isinstance(doc, dict) and "ents" in doc:
                 doc["ents"] = sorted(doc["ents"], key=lambda x: (x["start"], x["end"]))
-    _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()  # type: ignore
+    _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()  # type: ignore [attr-defined]
     html = _html["parsed"]
     if RENDER_WRAPPER is not None:
         html = RENDER_WRAPPER(html)
diff --git a/spacy/errors.py b/spacy/errors.py
index b01afcb802f..d581da1a88f 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -1,3 +1,4 @@
+from typing import Literal, Union
 import warnings
 
 
@@ -26,7 +27,10 @@ def setup_default_warnings():
     filter_warning("once", error_msg="[W114]")
 
 
-def filter_warning(action: str, error_msg: str):
+def filter_warning(
+    action: Literal["default", "error", "ignore", "always", "module", "once"],
+    error_msg: str,
+):
     """Customize how spaCy should handle a certain warning.
 
     error_msg (str): e.g. "W006", or a full error message
diff --git a/spacy/language.py b/spacy/language.py
index bab403f0eb4..8a556d3ec3e 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1153,7 +1153,7 @@ def update(
         for name, proc in self.pipeline:
             # ignore statements are used here because mypy ignores hasattr
             if name not in exclude and hasattr(proc, "update"):
-                proc.update(examples, sgd=None, losses=losses, **component_cfg[name])  # type: ignore
+                proc.update(examples, sgd=None, losses=losses, **component_cfg[name])  # type: ignore[attr-defined]
             if sgd not in (None, False):
                 if (
                     name not in exclude
diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py
index c9c82f36949..eda41e7adb9 100644
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@@ -1,5 +1,7 @@
+from typing import cast
 from thinc.api import Model
 
+from thinc.types import Ints1d
 from ..util import registry
 from ..attrs import LOWER
 
@@ -16,10 +18,10 @@ def forward(model: Model, docs, is_train: bool):
     batch_keys = []
     batch_vals = []
     for doc in docs:
-        unigrams = model.ops.asarray(doc.to_array([model.attrs["attr"]]))
+        unigrams = cast(Ints1d, model.ops.asarray(doc.to_array([model.attrs["attr"]])))
         ngrams = [unigrams]
         for n in range(2, model.attrs["ngram_size"] + 1):
-            ngrams.append(model.ops.ngrams(n, unigrams))  # type: ignore[arg-type]
+            ngrams.append(model.ops.ngrams(n, unigrams))
         keys = model.ops.xp.concatenate(ngrams)
         keys, vals = model.ops.xp.unique(keys, return_counts=True)
         batch_keys.append(keys)
diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py
index d5e9bc07cba..2288dc2c870 100644
--- a/spacy/ml/extract_spans.py
+++ b/spacy/ml/extract_spans.py
@@ -1,6 +1,6 @@
-from typing import Tuple, Callable
+from typing import Tuple, Callable, cast
 from thinc.api import Model, to_numpy
-from thinc.types import Ragged, Ints1d
+from thinc.types import Ragged, Ints1d, FloatsXd
 
 from ..util import registry
 
@@ -29,7 +29,9 @@ def forward(
     assert spans.dataXd.ndim == 2
     indices = _get_span_indices(ops, spans, X.lengths)
     if len(indices) > 0:
-        Y = Ragged(X.dataXd[indices], spans.dataXd[:, 1] - spans.dataXd[:, 0])  # type: ignore[arg-type, index]
+        Y = Ragged(
+            X.dataXd[indices], cast(Ints1d, spans.dataXd[:, 1] - spans.dataXd[:, 0])
+        )
     else:
         Y = Ragged(
             ops.xp.zeros(X.dataXd.shape, dtype=X.dataXd.dtype),
@@ -40,7 +42,7 @@ def forward(
 
     def backprop_windows(dY: Ragged) -> Tuple[Ragged, Ragged]:
         dX = Ragged(ops.alloc2f(*x_shape), x_lengths)
-        ops.scatter_add(dX.dataXd, indices, dY.dataXd)  # type: ignore[arg-type]
+        ops.scatter_add(cast(FloatsXd, dX.dataXd), indices, cast(FloatsXd, dY.dataXd))
         return (dX, spans)
 
     return Y, backprop_windows
@@ -57,7 +59,7 @@ def _get_span_indices(ops, spans: Ragged, lengths: Ints1d) -> Ints1d:
     for i, length in enumerate(lengths):
         spans_i = spans[i].dataXd + offset
         for j in range(spans_i.shape[0]):
-            indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1]))  # type: ignore[call-overload, index]
+            indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1]))  # type: ignore[call-overload]
         offset += length
     return ops.flatten(indices, dtype="i", ndim_if_empty=1)
 
diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 0149bea89c6..fba4b485f10 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -23,7 +23,7 @@ def build_nel_encoder(
             ((tok2vec >> list2ragged()) & build_span_maker())
             >> extract_spans()
             >> reduce_mean()
-            >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0))  # type: ignore[arg-type]
+            >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0))
             >> output_layer
         )
         model.set_ref("output_layer", output_layer)
diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
index a7d67c6dda8..2d89336ff27 100644
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@@ -142,7 +142,7 @@ def build_cloze_characters_multi_task_model(
         cast(Model[List["Floats2d"], Floats2d], list2array()),
         Maxout(nO=hidden_size, nP=maxout_pieces),
         LayerNorm(nI=hidden_size),
-        MultiSoftmax([256] * nr_char, nI=hidden_size),  # type: ignore[arg-type]
+        MultiSoftmax(tuple([256] * nr_char), nI=hidden_size),
     )
     model = build_masked_language_model(vocab, chain(tok2vec, output_layer))
     model.set_ref("tok2vec", tok2vec)
diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py
index 9f8ef7b2b9e..5fa0e6fff4f 100644
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@@ -22,7 +22,7 @@ def build_tagger_model(
     output_layer = Softmax_v2(
         nO, t2v_width, init_W=zero_init, normalize_outputs=normalize
     )
-    softmax = with_array(output_layer)  # type: ignore
+    softmax = with_array(output_layer)
     model = chain(tok2vec, softmax)
     model.set_ref("tok2vec", tok2vec)
     model.set_ref("softmax", output_layer)
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index c8c146f026b..e436d59de6f 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -1,5 +1,5 @@
 from functools import partial
-from typing import Optional, List
+from typing import Optional, List, cast
 
 from thinc.types import Floats2d
 from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic
@@ -59,7 +59,7 @@ def build_simple_cnn_text_classifier(
             resizable_layer=resizable_layer,
         )
     model.set_ref("tok2vec", tok2vec)
-    model.set_dim("nO", nO)  # type: ignore  # TODO: remove type ignore once Thinc has been updated
+    model.set_dim("nO", cast(int, nO))
     model.attrs["multi_label"] = not exclusive_classes
     return model
 
@@ -85,7 +85,7 @@ def build_bow_text_classifier(
         if not no_output_layer:
             fill_defaults["b"] = NEG_VALUE
             output_layer = softmax_activation() if exclusive_classes else Logistic()
-        resizable_layer = resizable(  # type: ignore[var-annotated]
+        resizable_layer: Model[Floats2d, Floats2d] = resizable(
             sparse_linear,
             resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults),
         )
@@ -93,7 +93,7 @@ def build_bow_text_classifier(
         model = with_cpu(model, model.ops)
         if output_layer:
             model = model >> with_cpu(output_layer, output_layer.ops)
-    model.set_dim("nO", nO)  # type: ignore[arg-type]
+    model.set_dim("nO", cast(int, nO))
     model.set_ref("output_layer", sparse_linear)
     model.attrs["multi_label"] = not exclusive_classes
     model.attrs["resize_output"] = partial(
@@ -130,7 +130,7 @@ def build_text_classifier_v2(
         model = (linear_model | cnn_model) >> output_layer
         model.set_ref("tok2vec", tok2vec)
     if model.has_dim("nO") is not False:
-        model.set_dim("nO", nO)  # type: ignore[arg-type]
+        model.set_dim("nO", cast(int, nO))
     model.set_ref("output_layer", linear_model.get_ref("output_layer"))
     model.set_ref("attention_layer", attention_layer)
     model.set_ref("maxout_layer", maxout_layer)
@@ -164,7 +164,7 @@ def build_text_classifier_lowdata(
             >> list2ragged()
             >> ParametricAttention(width)
             >> reduce_sum()
-            >> residual(Relu(width, width)) ** 2  # type: ignore[arg-type]
+            >> residual(Relu(width, width)) ** 2
             >> Linear(nO, width)
         )
         if dropout:
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index ecdf6be27c6..84c9ef669ae 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -1,5 +1,5 @@
 from typing import Optional, List, Union, cast
-from thinc.types import Floats2d, Ints2d, Ragged
+from thinc.types import Floats2d, Ints1d, Ints2d, Ragged, ArrayXd
 from thinc.api import chain, clone, concatenate, with_array, with_padded
 from thinc.api import Model, noop, list2ragged, ragged2list, HashEmbed
 from thinc.api import expand_window, residual, Maxout, Mish, PyTorchLSTM
@@ -159,7 +159,7 @@ def make_hash_embed(index):
     embeddings = [make_hash_embed(i) for i in range(len(attrs))]
     concat_size = width * (len(embeddings) + include_static_vectors)
     max_out: Model[Ragged, Ragged] = with_array(
-        Maxout(width, concat_size, nP=3, dropout=0.0, normalize=True)  # type: ignore
+        Maxout(width, concat_size, nP=3, dropout=0.0, normalize=True)
     )
     if include_static_vectors:
         feature_extractor: Model[List[Doc], Ragged] = chain(
@@ -173,7 +173,7 @@ def make_hash_embed(index):
                 StaticVectors(width, dropout=0.0),
             ),
             max_out,
-            cast(Model[Ragged, List[Floats2d]], ragged2list()),
+            ragged2list(),
         )
     else:
         model = chain(
@@ -181,9 +181,9 @@ def make_hash_embed(index):
             cast(Model[List[Ints2d], Ragged], list2ragged()),
             with_array(concatenate(*embeddings)),
             max_out,
-            cast(Model[Ragged, List[Floats2d]], ragged2list()),
+            ragged2list(),
         )
-    return model
+    return cast(Model[List[Doc], List[Floats2d]], model)
 
 
 @registry.architectures("spacy.CharacterEmbed.v2")
@@ -231,13 +231,14 @@ def CharacterEmbed(
     )
     feature_extractor: Model[List[Doc], Ragged] = chain(
         FeatureExtractor([feature]),
-        cast(Model[List[Ints2d], Ragged], list2ragged()),
-        with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)),  # type: ignore
+        list2ragged(),
+        # TODO: the typing does not seem to make sense here
+        with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)),  # type:ignore
     )
     max_out: Model[Ragged, Ragged]
     if include_static_vectors:
         max_out = with_array(
-            Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0)  # type: ignore
+            Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0)
         )
         model = chain(
             concatenate(
@@ -246,11 +247,11 @@ def CharacterEmbed(
                 StaticVectors(width, dropout=0.0),
             ),
             max_out,
-            cast(Model[Ragged, List[Floats2d]], ragged2list()),
+            ragged2list(),
         )
     else:
         max_out = with_array(
-            Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)  # type: ignore
+            Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)
         )
         model = chain(
             concatenate(
@@ -258,9 +259,9 @@ def CharacterEmbed(
                 feature_extractor,
             ),
             max_out,
-            cast(Model[Ragged, List[Floats2d]], ragged2list()),
+            ragged2list(),
         )
-    return model
+    return cast(Model[List[Doc], List[Floats2d]], model)
 
 
 @registry.architectures("spacy.MaxoutWindowEncoder.v2")
@@ -280,7 +281,7 @@ def MaxoutWindowEncoder(
     depth (int): The number of convolutional layers. Recommended value is 4.
     """
     cnn = chain(
-        expand_window(window_size=window_size),
+        cast(Model[Floats2d, Floats2d], expand_window(window_size=window_size)),
         Maxout(
             nO=width,
             nI=width * ((window_size * 2) + 1),
@@ -289,10 +290,10 @@ def MaxoutWindowEncoder(
             normalize=True,
         ),
     )
-    model = clone(residual(cnn), depth)  # type: ignore[arg-type]
+    model = clone(residual(cnn), depth)
     model.set_dim("nO", width)
     receptive_field = window_size * depth
-    return with_array(model, pad=receptive_field)  # type: ignore[arg-type]
+    return cast(Model[List[Floats2d], List[Floats2d]], with_array(model, pad=receptive_field))
 
 
 @registry.architectures("spacy.MishWindowEncoder.v2")
@@ -310,12 +311,12 @@ def MishWindowEncoder(
     depth (int): The number of convolutional layers. Recommended value is 4.
     """
     cnn = chain(
-        expand_window(window_size=window_size),
+        cast(Model[Floats2d, Floats2d], expand_window(window_size=window_size)),
         Mish(nO=width, nI=width * ((window_size * 2) + 1), dropout=0.0, normalize=True),
     )
-    model = clone(residual(cnn), depth)  # type: ignore[arg-type]
+    model = clone(residual(cnn), depth)
     model.set_dim("nO", width)
-    return with_array(model)  # type: ignore[arg-type]
+    return cast(Model[List[Floats2d], List[Floats2d]], with_array(model))
 
 
 @registry.architectures("spacy.TorchBiLSTMEncoder.v1")
diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
index 8d9b1af9b6a..ca81e3d3ab8 100644
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@@ -41,12 +41,12 @@ def forward(
         return _handle_empty(model.ops, model.get_dim("nO"))
     key_attr: int = model.attrs["key_attr"]
     keys: Ints1d = model.ops.flatten(
-        cast(Sequence, [doc.to_array(key_attr) for doc in docs])
+        cast(List[Ints1d], [doc.to_array(key_attr) for doc in docs])
     )
     vocab: Vocab = docs[0].vocab
     W = cast(Floats2d, model.ops.as_contig(model.get_param("W")))
     if vocab.vectors.mode == Mode.default:
-        V = cast(Floats2d, model.ops.asarray(vocab.vectors.data))
+        V = model.ops.asarray2f(vocab.vectors.data)
         rows = vocab.vectors.find(keys=keys)
         V = model.ops.as_contig(V[rows])
     elif vocab.vectors.mode == Mode.floret:
@@ -63,7 +63,7 @@ def forward(
         # TODO: more options for UNK tokens
         vectors_data[rows < 0] = 0
     output = Ragged(
-        vectors_data, model.ops.asarray([len(doc) for doc in docs], dtype="i")  # type: ignore
+        vectors_data, model.ops.asarray1i([len(doc) for doc in docs], dtype="i")
     )
     mask = None
     if is_train:
@@ -115,5 +115,5 @@ def _handle_empty(ops: Ops, nO: int):
 def _get_drop_mask(ops: Ops, nO: int, rate: Optional[float]) -> Optional[Floats1d]:
     if rate is not None:
         mask = ops.get_dropout_mask((nO,), rate)
-        return mask  # type: ignore
+        return mask  # type: ignore[return-value]
     return None
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 89e7576bf8d..ee4db94e49e 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -362,13 +362,12 @@ def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d):
                 method="get_loss", msg="gold entities do not match up"
             )
             raise RuntimeError(err)
-        # TODO: fix typing issue here
-        gradients = self.distance.get_grad(selected_encodings, entity_encodings)  # type: ignore
+        gradients = self.distance.get_grad(selected_encodings, entity_encodings)
         # to match the input size, we need to give a zero gradient for items not in the kb
         out = self.model.ops.alloc2f(*sentence_encodings.shape)
         out[keep_ents] = gradients
 
-        loss = self.distance.get_loss(selected_encodings, entity_encodings)  # type: ignore
+        loss = self.distance.get_loss(selected_encodings, entity_encodings)
         loss = loss / len(entity_encodings)
         return float(loss), out
 
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 0a6138fbc4d..1250b98a45b 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -1,7 +1,7 @@
 from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast
 from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops
 from thinc.api import Optimizer
-from thinc.types import Ragged, Ints2d, Floats2d, Ints1d
+from thinc.types import Ragged, Ints2d, Floats2d, Ints1d, ArrayXd
 
 import numpy
 
@@ -75,7 +75,7 @@ def ngram_suggester(docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged
                 if spans:
                     assert spans[-1].ndim == 2, spans[-1].shape
             lengths.append(length)
-        lengths_array = cast(Ints1d, ops.asarray(lengths, dtype="i"))
+        lengths_array = ops.asarray1i(lengths, dtype="i")
         if len(spans) > 0:
             output = Ragged(ops.xp.vstack(spans), lengths_array)
         else:
@@ -113,7 +113,7 @@ def make_spancat(
     nlp: Language,
     name: str,
     suggester: Suggester,
-    model: Model[Tuple[List[Doc], Ragged], Floats2d],
+    model: Model[Tuple[Iterable[Doc], Ragged], Floats2d],
     spans_key: str,
     scorer: Optional[Callable],
     threshold: float,
@@ -126,7 +126,7 @@ def make_spancat(
     suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans.
         Spans are returned as a ragged array with two integer columns, for the
         start and end positions.
-    model (Model[Tuple[List[Doc], Ragged], Floats2d]): A model instance that
+    model (Model[Tuple[Iterable[Doc], Ragged], Floats2d]): A model instance that
         is given a list of documents and (start, end) indices representing
         candidate span offsets. The model predicts a probability for each category
         for each span.
@@ -178,7 +178,7 @@ class SpanCategorizer(TrainablePipe):
     def __init__(
         self,
         vocab: Vocab,
-        model: Model[Tuple[List[Doc], Ragged], Floats2d],
+        model: Model[Tuple[Iterable[Doc], Ragged], Floats2d],
         suggester: Suggester,
         name: str = "spancat",
         *,
@@ -269,7 +269,7 @@ def predict(self, docs: Iterable[Doc]):
         DOCS: https://spacy.io/api/spancategorizer#predict
         """
         indices = self.suggester(docs, ops=self.model.ops)
-        scores = self.model.predict((docs, indices))  # type: ignore
+        scores = self.model.predict((docs, indices))
         return indices, scores
 
     def set_candidates(
@@ -343,6 +343,7 @@ def update(
         set_dropout_rate(self.model, drop)
         scores, backprop_scores = self.model.begin_update((docs, spans))
         loss, d_scores = self.get_loss(examples, (spans, scores))
+        # TODO: the types do not seem to make sense here
         backprop_scores(d_scores)  # type: ignore
         if sgd is not None:
             self.finish_update(sgd)
@@ -387,7 +388,7 @@ def get_loss(
             # The target is a flat array for all docs. Track the position
             # we're at within the flat array.
             offset += spans.lengths[i]
-        target = self.model.ops.asarray(target, dtype="f")  # type: ignore
+        target = self.model.ops.asarray(cast(ArrayXd, target), dtype="f")
         # The target will have the values 0 (for untrue predictions) or 1
         # (for true predictions).
         # The scores should be in the range [0, 1].
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index bc3f127fca8..d1676ecda5e 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -1,4 +1,4 @@
-from typing import Iterable, Tuple, Optional, Dict, List, Callable, Any
+from typing import Iterable, Tuple, Optional, Dict, List, Callable, Any, cast
 from thinc.api import get_array_module, Model, Optimizer, set_dropout_rate, Config
 from thinc.types import Floats2d
 import numpy
@@ -293,18 +293,20 @@ def rehearse(
 
     def _examples_to_truth(
         self, examples: Iterable[Example]
-    ) -> Tuple[numpy.ndarray, numpy.ndarray]:
+    ) -> Tuple[Floats2d, Floats2d]:
         nr_examples = len(list(examples))
-        truths = numpy.zeros((nr_examples, len(self.labels)), dtype="f")
-        not_missing = numpy.ones((nr_examples, len(self.labels)), dtype="f")
+        truths = cast(Floats2d, numpy.zeros((nr_examples, len(self.labels)), dtype="f"))
+        not_missing = cast(
+            Floats2d, numpy.ones((nr_examples, len(self.labels)), dtype="f")
+        )
         for i, eg in enumerate(examples):
             for j, label in enumerate(self.labels):
                 if label in eg.reference.cats:
                     truths[i, j] = eg.reference.cats[label]
                 elif self.support_missing_values:
                     not_missing[i, j] = 0.0
-        truths = self.model.ops.asarray(truths)  # type: ignore
-        return truths, not_missing  # type: ignore
+        truths = self.model.ops.asarray2f(truths)
+        return truths, not_missing
 
     def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
         """Find the loss and gradient of loss for the batch of documents and
@@ -319,7 +321,7 @@ def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
         validate_examples(examples, "TextCategorizer.get_loss")
         self._validate_categories(examples)
         truths, not_missing = self._examples_to_truth(examples)
-        not_missing = self.model.ops.asarray(not_missing)  # type: ignore
+        not_missing = self.model.ops.asarray(not_missing)
         d_scores = scores - truths
         d_scores *= not_missing
         mean_square_error = (d_scores**2).mean()
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 1dfd8ee85c9..b85336e7a80 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -104,7 +104,7 @@ def get_arg_model(
         sig_args[param.name] = (annotation, default)
     is_strict = strict and not has_variable
     sig_args["__config__"] = ArgSchemaConfig if is_strict else ArgSchemaConfigExtra  # type: ignore[assignment]
-    return create_model(name, **sig_args)  # type: ignore[arg-type, return-value]
+    return create_model(name, **sig_args)  # type:ignore[call-overload]
 
 
 def validate_init_settings(
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 8cd755ac40c..28338a25f18 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -131,7 +131,7 @@ def score(self, examples: Iterable[Example]) -> Dict[str, Any]:
         """
         scores = {}
         if hasattr(self.nlp.tokenizer, "score"):
-            scores.update(self.nlp.tokenizer.score(examples, **self.cfg))  # type: ignore
+            scores.update(self.nlp.tokenizer.score(examples, **self.cfg))  # type: ignore[union-attr]
         for name, component in self.nlp.pipeline:
             if hasattr(component, "score"):
                 scores.update(component.score(examples, **self.cfg))
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 0bb036a334f..91cc7794c8b 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -816,6 +816,7 @@ def test_textcat_loss(multi_label: bool, expected_loss: float):
         textcat = nlp.add_pipe("textcat")
     textcat.initialize(lambda: train_examples)
     assert isinstance(textcat, TextCategorizer)
+    # TODO: the typing doesn't seem to make sense here
     scores = textcat.model.ops.asarray(
         [[0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype="f"  # type: ignore
     )
diff --git a/spacy/tests/vocab_vectors/test_vectors.py b/spacy/tests/vocab_vectors/test_vectors.py
index e3ad206f4e6..9ee8c9e948a 100644
--- a/spacy/tests/vocab_vectors/test_vectors.py
+++ b/spacy/tests/vocab_vectors/test_vectors.py
@@ -4,7 +4,7 @@
 from thinc.api import NumpyOps, get_current_ops
 
 from spacy.lang.en import English
-from spacy.strings import hash_string  # type: ignore
+from spacy.strings import hash_string  # type:ignore[attr-defined]
 from spacy.tokenizer import Tokenizer
 from spacy.tokens import Doc
 from spacy.training.initialize import convert_vectors
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index c4e8f26f408..7fd24c95a08 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -1,10 +1,11 @@
-from typing import List, Dict, Set, Iterable, Iterator, Union, Optional
+from typing import List, Dict, Set, Iterable, Iterator, Union, Optional, cast
 from pathlib import Path
 import numpy
 from numpy import ndarray
 import zlib
 import srsly
 from thinc.api import NumpyOps
+from thinc.types import Ints2d
 
 from .doc import Doc
 from ..vocab import Vocab
@@ -140,12 +141,12 @@ def get_docs(self, vocab: Vocab) -> Iterator[Doc]:
         orth_col = self.attrs.index(ORTH)
         for i in range(len(self.tokens)):
             flags = self.flags[i]
-            tokens = self.tokens[i]
+            tokens = cast(Ints2d, self.tokens[i])
             spaces: Optional[ndarray] = self.spaces[i]
             if flags.get("has_unknown_spaces"):
                 spaces = None
             doc = Doc(vocab, words=tokens[:, orth_col], spaces=spaces)  # type: ignore
-            doc = doc.from_array(self.attrs, tokens)  # type: ignore
+            doc = doc.from_array(self.attrs, tokens)
             doc.cats = self.cats[i]
             # backwards-compatibility: may be b'' or serialized empty list
             if self.span_groups[i] and self.span_groups[i] != SpanGroups._EMPTY_BYTES:
diff --git a/spacy/util.py b/spacy/util.py
index 66e257dd867..4767a68819f 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -1,4 +1,4 @@
-from typing import List, Mapping, NoReturn, Union, Dict, Any, Set
+from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
 from typing import Optional, Iterable, Callable, Tuple, Type
 from typing import Iterator, Type, Pattern, Generator, TYPE_CHECKING
 from types import ModuleType
@@ -391,7 +391,7 @@ def get_module_path(module: ModuleType) -> Path:
     """
     if not hasattr(module, "__module__"):
         raise ValueError(Errors.E169.format(module=repr(module)))
-    return Path(sys.modules[module.__module__].__file__).parent
+    return Path(cast(str, sys.modules[module.__module__].__file__)).parent
 
 
 def load_model(
@@ -878,7 +878,7 @@ def get_package_path(name: str) -> Path:
     # Here we're importing the module just to find it. This is worryingly
     # indirect, but it's otherwise very difficult to find the package.
     pkg = importlib.import_module(name)
-    return Path(pkg.__file__).parent
+    return Path(cast(str, pkg.__file__)).parent
 
 
 def replace_model_node(model: Model, target: Model, replacement: Model) -> None: