explosion · richardpaulhudson · Feb 25, 2022 · Mar 16, 2022 · Mar 16, 2022 · May 2, 2022
diff --git a/requirements.txt b/requirements.txt
@@ -16,7 +16,7 @@ pathy>=0.3.5
 numpy>=1.15.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0
+pydantic>=1.9.0,<1.10.0
 jinja2
 langcodes>=3.2.0,<4.0.0
 # Official Python utilities
@@ -31,7 +31,7 @@ pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
 flake8>=3.8.0,<3.10.0
 hypothesis>=3.27.0,<7.0.0
-mypy==0.910
+mypy>=0.910,<=0.950
 types-dataclasses>=0.1.3; python_version < "3.7"
 types-mock>=0.1.1
 types-requests

diff --git a/setup.cfg b/setup.cfg
@@ -57,7 +57,7 @@ install_requires =
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0
     requests>=2.13.0,<3.0.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0
+    pydantic>=1.9.0,<1.10.0
     jinja2
     # Official Python utilities
     setuptools

diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
@@ -55,12 +55,12 @@ def render(
         raise ValueError(Errors.E096)
     renderer_func, converter = factories[style]
     renderer = renderer_func(options=options)
-    parsed = [converter(doc, options) for doc in docs] if not manual else docs  # type: ignore
+    parsed = [converter(doc, options) for doc in docs] if not manual else docs
     if manual:
         for doc in docs:
             if isinstance(doc, dict) and "ents" in doc:
                 doc["ents"] = sorted(doc["ents"], key=lambda x: (x["start"], x["end"]))
-    _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()  # type: ignore
+    _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()  # type: ignore [attr-defined]
     html = _html["parsed"]
     if RENDER_WRAPPER is not None:
         html = RENDER_WRAPPER(html)

diff --git a/spacy/errors.py b/spacy/errors.py
@@ -1,3 +1,4 @@
+from typing import Literal, Union
 import warnings
 
 
@@ -26,7 +27,10 @@ def setup_default_warnings():
     filter_warning("once", error_msg="[W114]")
 
 
-def filter_warning(action: str, error_msg: str):
+def filter_warning(
+    action: Literal["default", "error", "ignore", "always", "module", "once"],
+    error_msg: str,
+):
     """Customize how spaCy should handle a certain warning.
 
     error_msg (str): e.g. "W006", or a full error message

diff --git a/spacy/language.py b/spacy/language.py
@@ -1153,7 +1153,7 @@ def update(
         for name, proc in self.pipeline:
             # ignore statements are used here because mypy ignores hasattr
             if name not in exclude and hasattr(proc, "update"):
-                proc.update(examples, sgd=None, losses=losses, **component_cfg[name])  # type: ignore
+                proc.update(examples, sgd=None, losses=losses, **component_cfg[name])  # type: ignore[attr-defined]
             if sgd not in (None, False):
                 if (
                     name not in exclude

diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py
@@ -1,5 +1,7 @@
+from typing import cast
 from thinc.api import Model
 
+from thinc.types import Ints1d
 from ..util import registry
 from ..attrs import LOWER
 
@@ -16,10 +18,10 @@ def forward(model: Model, docs, is_train: bool):
     batch_keys = []
     batch_vals = []
     for doc in docs:
-        unigrams = model.ops.asarray(doc.to_array([model.attrs["attr"]]))
+        unigrams = cast(Ints1d, model.ops.asarray(doc.to_array([model.attrs["attr"]])))
         ngrams = [unigrams]
         for n in range(2, model.attrs["ngram_size"] + 1):
-            ngrams.append(model.ops.ngrams(n, unigrams))  # type: ignore[arg-type]
+            ngrams.append(model.ops.ngrams(n, unigrams))
         keys = model.ops.xp.concatenate(ngrams)
         keys, vals = model.ops.xp.unique(keys, return_counts=True)
         batch_keys.append(keys)

diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py
@@ -1,6 +1,6 @@
-from typing import Tuple, Callable
+from typing import Tuple, Callable, cast
 from thinc.api import Model, to_numpy
-from thinc.types import Ragged, Ints1d
+from thinc.types import Ragged, Ints1d, FloatsXd
 
 from ..util import registry
 
@@ -29,7 +29,9 @@ def forward(
     assert spans.dataXd.ndim == 2
     indices = _get_span_indices(ops, spans, X.lengths)
     if len(indices) > 0:
-        Y = Ragged(X.dataXd[indices], spans.dataXd[:, 1] - spans.dataXd[:, 0])  # type: ignore[arg-type, index]
+        Y = Ragged(
+            X.dataXd[indices], cast(Ints1d, spans.dataXd[:, 1] - spans.dataXd[:, 0])
+        )
     else:
         Y = Ragged(
             ops.xp.zeros(X.dataXd.shape, dtype=X.dataXd.dtype),
@@ -40,7 +42,7 @@ def forward(
 
     def backprop_windows(dY: Ragged) -> Tuple[Ragged, Ragged]:
         dX = Ragged(ops.alloc2f(*x_shape), x_lengths)
-        ops.scatter_add(dX.dataXd, indices, dY.dataXd)  # type: ignore[arg-type]
+        ops.scatter_add(cast(FloatsXd, dX.dataXd), indices, cast(FloatsXd, dY.dataXd))
         return (dX, spans)
 
     return Y, backprop_windows
@@ -57,7 +59,7 @@ def _get_span_indices(ops, spans: Ragged, lengths: Ints1d) -> Ints1d:
     for i, length in enumerate(lengths):
         spans_i = spans[i].dataXd + offset
         for j in range(spans_i.shape[0]):
-            indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1]))  # type: ignore[call-overload, index]
+            indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1]))  # type: ignore[call-overload]
         offset += length
     return ops.flatten(indices, dtype="i", ndim_if_empty=1)
 

diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
@@ -23,7 +23,7 @@ def build_nel_encoder(
             ((tok2vec >> list2ragged()) & build_span_maker())
             >> extract_spans()
             >> reduce_mean()
-            >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0))  # type: ignore[arg-type]
+            >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0))
             >> output_layer
         )
         model.set_ref("output_layer", output_layer)

diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
@@ -142,7 +142,7 @@ def build_cloze_characters_multi_task_model(
         cast(Model[List["Floats2d"], Floats2d], list2array()),
         Maxout(nO=hidden_size, nP=maxout_pieces),
         LayerNorm(nI=hidden_size),
-        MultiSoftmax([256] * nr_char, nI=hidden_size),  # type: ignore[arg-type]
+        MultiSoftmax(tuple([256] * nr_char), nI=hidden_size),
     )
     model = build_masked_language_model(vocab, chain(tok2vec, output_layer))
     model.set_ref("tok2vec", tok2vec)

diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py
@@ -22,7 +22,7 @@ def build_tagger_model(
     output_layer = Softmax_v2(
         nO, t2v_width, init_W=zero_init, normalize_outputs=normalize
     )
-    softmax = with_array(output_layer)  # type: ignore
+    softmax = with_array(output_layer)
     model = chain(tok2vec, softmax)
     model.set_ref("tok2vec", tok2vec)
     model.set_ref("softmax", output_layer)

diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
@@ -1,5 +1,5 @@
 from functools import partial
-from typing import Optional, List
+from typing import Optional, List, cast
 
 from thinc.types import Floats2d
 from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic
@@ -59,7 +59,7 @@ def build_simple_cnn_text_classifier(
             resizable_layer=resizable_layer,
         )
     model.set_ref("tok2vec", tok2vec)
-    model.set_dim("nO", nO)  # type: ignore  # TODO: remove type ignore once Thinc has been updated
+    model.set_dim("nO", cast(int, nO))
     model.attrs["multi_label"] = not exclusive_classes
     return model
 
@@ -85,15 +85,15 @@ def build_bow_text_classifier(
         if not no_output_layer:
             fill_defaults["b"] = NEG_VALUE
             output_layer = softmax_activation() if exclusive_classes else Logistic()
-        resizable_layer = resizable(  # type: ignore[var-annotated]
+        resizable_layer: Model[Floats2d, Floats2d] = resizable(
             sparse_linear,
             resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults),
         )
         model = extract_ngrams(ngram_size, attr=ORTH) >> resizable_layer
         model = with_cpu(model, model.ops)
         if output_layer:
             model = model >> with_cpu(output_layer, output_layer.ops)
-    model.set_dim("nO", nO)  # type: ignore[arg-type]
+    model.set_dim("nO", cast(int, nO))
     model.set_ref("output_layer", sparse_linear)
     model.attrs["multi_label"] = not exclusive_classes
     model.attrs["resize_output"] = partial(
@@ -130,7 +130,7 @@ def build_text_classifier_v2(
         model = (linear_model | cnn_model) >> output_layer
         model.set_ref("tok2vec", tok2vec)
     if model.has_dim("nO") is not False:
-        model.set_dim("nO", nO)  # type: ignore[arg-type]
+        model.set_dim("nO", cast(int, nO))
     model.set_ref("output_layer", linear_model.get_ref("output_layer"))
     model.set_ref("attention_layer", attention_layer)
     model.set_ref("maxout_layer", maxout_layer)
@@ -164,7 +164,7 @@ def build_text_classifier_lowdata(
             >> list2ragged()
             >> ParametricAttention(width)
             >> reduce_sum()
-            >> residual(Relu(width, width)) ** 2  # type: ignore[arg-type]
+            >> residual(Relu(width, width)) ** 2
             >> Linear(nO, width)
         )
         if dropout:

diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
@@ -1,5 +1,5 @@
 from typing import Optional, List, Union, cast
-from thinc.types import Floats2d, Ints2d, Ragged
+from thinc.types import Floats2d, Ints1d, Ints2d, Ragged, ArrayXd
 from thinc.api import chain, clone, concatenate, with_array, with_padded
 from thinc.api import Model, noop, list2ragged, ragged2list, HashEmbed
 from thinc.api import expand_window, residual, Maxout, Mish, PyTorchLSTM
@@ -159,7 +159,7 @@ def make_hash_embed(index):
     embeddings = [make_hash_embed(i) for i in range(len(attrs))]
     concat_size = width * (len(embeddings) + include_static_vectors)
     max_out: Model[Ragged, Ragged] = with_array(
-        Maxout(width, concat_size, nP=3, dropout=0.0, normalize=True)  # type: ignore
+        Maxout(width, concat_size, nP=3, dropout=0.0, normalize=True)
     )
     if include_static_vectors:
         feature_extractor: Model[List[Doc], Ragged] = chain(
@@ -173,17 +173,17 @@ def make_hash_embed(index):
                 StaticVectors(width, dropout=0.0),
             ),
             max_out,
-            cast(Model[Ragged, List[Floats2d]], ragged2list()),
+            ragged2list(),
         )
     else:
         model = chain(
             FeatureExtractor(list(attrs)),
             cast(Model[List[Ints2d], Ragged], list2ragged()),
             with_array(concatenate(*embeddings)),
             max_out,
-            cast(Model[Ragged, List[Floats2d]], ragged2list()),
+            ragged2list(),
         )
-    return model
+    return cast(Model[List[Doc], List[Floats2d]], model)
 
 
 @registry.architectures("spacy.CharacterEmbed.v2")
@@ -231,13 +231,14 @@ def CharacterEmbed(
     )
     feature_extractor: Model[List[Doc], Ragged] = chain(
         FeatureExtractor([feature]),
-        cast(Model[List[Ints2d], Ragged], list2ragged()),
-        with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)),  # type: ignore
+        list2ragged(),
+        # TODO: the typing does not seem to make sense here
+        with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)),  # type:ignore
     )
     max_out: Model[Ragged, Ragged]
     if include_static_vectors:
         max_out = with_array(
-            Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0)  # type: ignore
+            Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0)
         )
         model = chain(
             concatenate(
@@ -246,21 +247,21 @@ def CharacterEmbed(
                 StaticVectors(width, dropout=0.0),
             ),
             max_out,
-            cast(Model[Ragged, List[Floats2d]], ragged2list()),
+            ragged2list(),
         )
     else:
         max_out = with_array(
-            Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)  # type: ignore
+            Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)
         )
         model = chain(
             concatenate(
                 char_embed,
                 feature_extractor,
             ),
             max_out,
-            cast(Model[Ragged, List[Floats2d]], ragged2list()),
+            ragged2list(),
         )
-    return model
+    return cast(Model[List[Doc], List[Floats2d]], model)
 
 
 @registry.architectures("spacy.MaxoutWindowEncoder.v2")
@@ -280,7 +281,7 @@ def MaxoutWindowEncoder(
     depth (int): The number of convolutional layers. Recommended value is 4.
     """
     cnn = chain(
-        expand_window(window_size=window_size),
+        cast(Model[Floats2d, Floats2d], expand_window(window_size=window_size)),
         Maxout(
             nO=width,
             nI=width * ((window_size * 2) + 1),
@@ -289,10 +290,10 @@ def MaxoutWindowEncoder(
             normalize=True,
         ),
     )
-    model = clone(residual(cnn), depth)  # type: ignore[arg-type]
+    model = clone(residual(cnn), depth)
     model.set_dim("nO", width)
     receptive_field = window_size * depth
-    return with_array(model, pad=receptive_field)  # type: ignore[arg-type]
+    return cast(Model[List[Floats2d], List[Floats2d]], with_array(model, pad=receptive_field))
 
 
 @registry.architectures("spacy.MishWindowEncoder.v2")
@@ -310,12 +311,12 @@ def MishWindowEncoder(
     depth (int): The number of convolutional layers. Recommended value is 4.
     """
     cnn = chain(
-        expand_window(window_size=window_size),
+        cast(Model[Floats2d, Floats2d], expand_window(window_size=window_size)),
         Mish(nO=width, nI=width * ((window_size * 2) + 1), dropout=0.0, normalize=True),
     )
-    model = clone(residual(cnn), depth)  # type: ignore[arg-type]
+    model = clone(residual(cnn), depth)
     model.set_dim("nO", width)
-    return with_array(model)  # type: ignore[arg-type]
+    return cast(Model[List[Floats2d], List[Floats2d]], with_array(model))
 
 
 @registry.architectures("spacy.TorchBiLSTMEncoder.v1")

diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
@@ -41,12 +41,12 @@ def forward(
         return _handle_empty(model.ops, model.get_dim("nO"))
     key_attr: int = model.attrs["key_attr"]
     keys: Ints1d = model.ops.flatten(
-        cast(Sequence, [doc.to_array(key_attr) for doc in docs])
+        cast(List[Ints1d], [doc.to_array(key_attr) for doc in docs])
     )
     vocab: Vocab = docs[0].vocab
     W = cast(Floats2d, model.ops.as_contig(model.get_param("W")))
     if vocab.vectors.mode == Mode.default:
-        V = cast(Floats2d, model.ops.asarray(vocab.vectors.data))
+        V = model.ops.asarray2f(vocab.vectors.data)
         rows = vocab.vectors.find(keys=keys)
         V = model.ops.as_contig(V[rows])
     elif vocab.vectors.mode == Mode.floret:
@@ -63,7 +63,7 @@ def forward(
         # TODO: more options for UNK tokens
         vectors_data[rows < 0] = 0
     output = Ragged(
-        vectors_data, model.ops.asarray([len(doc) for doc in docs], dtype="i")  # type: ignore
+        vectors_data, model.ops.asarray1i([len(doc) for doc in docs], dtype="i")
     )
     mask = None
     if is_train:
@@ -115,5 +115,5 @@ def _handle_empty(ops: Ops, nO: int):
 def _get_drop_mask(ops: Ops, nO: int, rate: Optional[float]) -> Optional[Floats1d]:
     if rate is not None:
         mask = ops.get_dropout_mask((nO,), rate)
-        return mask  # type: ignore
+        return mask  # type: ignore[return-value]
     return None
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
@@ -362,13 +362,12 @@ def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d):
                 method="get_loss", msg="gold entities do not match up"
             )
             raise RuntimeError(err)
-        # TODO: fix typing issue here
-        gradients = self.distance.get_grad(selected_encodings, entity_encodings)  # type: ignore
+        gradients = self.distance.get_grad(selected_encodings, entity_encodings)
         # to match the input size, we need to give a zero gradient for items not in the kb
         out = self.model.ops.alloc2f(*sentence_encodings.shape)
         out[keep_ents] = gradients
 
-        loss = self.distance.get_loss(selected_encodings, entity_encodings)  # type: ignore
+        loss = self.distance.get_loss(selected_encodings, entity_encodings)
         loss = loss / len(entity_encodings)
         return float(loss), out