diff --git a/requirements.txt b/requirements.txt index 619d35ebcd4..d46f799c221 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ pathy>=0.3.5 numpy>=1.15.0 requests>=2.13.0,<3.0.0 tqdm>=4.38.0,<5.0.0 -pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0 +pydantic>=1.9.0,<1.10.0 jinja2 langcodes>=3.2.0,<4.0.0 # Official Python utilities @@ -31,7 +31,7 @@ pytest-timeout>=1.3.0,<2.0.0 mock>=2.0.0,<3.0.0 flake8>=3.8.0,<3.10.0 hypothesis>=3.27.0,<7.0.0 -mypy==0.910 +mypy>=0.910,<=0.950 types-dataclasses>=0.1.3; python_version < "3.7" types-mock>=0.1.1 types-requests diff --git a/setup.cfg b/setup.cfg index 2626de87e86..4b344af378d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,7 +57,7 @@ install_requires = tqdm>=4.38.0,<5.0.0 numpy>=1.15.0 requests>=2.13.0,<3.0.0 - pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0 + pydantic>=1.9.0,<1.10.0 jinja2 # Official Python utilities setuptools diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index 5d49b6eb758..427e07cdbd3 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -55,12 +55,12 @@ def render( raise ValueError(Errors.E096) renderer_func, converter = factories[style] renderer = renderer_func(options=options) - parsed = [converter(doc, options) for doc in docs] if not manual else docs # type: ignore + parsed = [converter(doc, options) for doc in docs] if not manual else docs if manual: for doc in docs: if isinstance(doc, dict) and "ents" in doc: doc["ents"] = sorted(doc["ents"], key=lambda x: (x["start"], x["end"])) - _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip() # type: ignore + _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip() # type: ignore [attr-defined] html = _html["parsed"] if RENDER_WRAPPER is not None: html = RENDER_WRAPPER(html) diff --git a/spacy/errors.py b/spacy/errors.py index b01afcb802f..d581da1a88f 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -1,3 +1,4 @@ +from typing import Literal, Union import warnings @@ -26,7 +27,10 @@ def setup_default_warnings(): filter_warning("once", error_msg="[W114]") -def filter_warning(action: str, error_msg: str): +def filter_warning( + action: Literal["default", "error", "ignore", "always", "module", "once"], + error_msg: str, +): """Customize how spaCy should handle a certain warning. error_msg (str): e.g. "W006", or a full error message diff --git a/spacy/language.py b/spacy/language.py index bab403f0eb4..8a556d3ec3e 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1153,7 +1153,7 @@ def update( for name, proc in self.pipeline: # ignore statements are used here because mypy ignores hasattr if name not in exclude and hasattr(proc, "update"): - proc.update(examples, sgd=None, losses=losses, **component_cfg[name]) # type: ignore + proc.update(examples, sgd=None, losses=losses, **component_cfg[name]) # type: ignore[attr-defined] if sgd not in (None, False): if ( name not in exclude diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py index c9c82f36949..eda41e7adb9 100644 --- a/spacy/ml/extract_ngrams.py +++ b/spacy/ml/extract_ngrams.py @@ -1,5 +1,7 @@ +from typing import cast from thinc.api import Model +from thinc.types import Ints1d from ..util import registry from ..attrs import LOWER @@ -16,10 +18,10 @@ def forward(model: Model, docs, is_train: bool): batch_keys = [] batch_vals = [] for doc in docs: - unigrams = model.ops.asarray(doc.to_array([model.attrs["attr"]])) + unigrams = cast(Ints1d, model.ops.asarray(doc.to_array([model.attrs["attr"]]))) ngrams = [unigrams] for n in range(2, model.attrs["ngram_size"] + 1): - ngrams.append(model.ops.ngrams(n, unigrams)) # type: ignore[arg-type] + ngrams.append(model.ops.ngrams(n, unigrams)) keys = model.ops.xp.concatenate(ngrams) keys, vals = model.ops.xp.unique(keys, return_counts=True) batch_keys.append(keys) diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py index d5e9bc07cba..2288dc2c870 100644 --- a/spacy/ml/extract_spans.py +++ b/spacy/ml/extract_spans.py @@ -1,6 +1,6 @@ -from typing import Tuple, Callable +from typing import Tuple, Callable, cast from thinc.api import Model, to_numpy -from thinc.types import Ragged, Ints1d +from thinc.types import Ragged, Ints1d, FloatsXd from ..util import registry @@ -29,7 +29,9 @@ def forward( assert spans.dataXd.ndim == 2 indices = _get_span_indices(ops, spans, X.lengths) if len(indices) > 0: - Y = Ragged(X.dataXd[indices], spans.dataXd[:, 1] - spans.dataXd[:, 0]) # type: ignore[arg-type, index] + Y = Ragged( + X.dataXd[indices], cast(Ints1d, spans.dataXd[:, 1] - spans.dataXd[:, 0]) + ) else: Y = Ragged( ops.xp.zeros(X.dataXd.shape, dtype=X.dataXd.dtype), @@ -40,7 +42,7 @@ def forward( def backprop_windows(dY: Ragged) -> Tuple[Ragged, Ragged]: dX = Ragged(ops.alloc2f(*x_shape), x_lengths) - ops.scatter_add(dX.dataXd, indices, dY.dataXd) # type: ignore[arg-type] + ops.scatter_add(cast(FloatsXd, dX.dataXd), indices, cast(FloatsXd, dY.dataXd)) return (dX, spans) return Y, backprop_windows @@ -57,7 +59,7 @@ def _get_span_indices(ops, spans: Ragged, lengths: Ints1d) -> Ints1d: for i, length in enumerate(lengths): spans_i = spans[i].dataXd + offset for j in range(spans_i.shape[0]): - indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1])) # type: ignore[call-overload, index] + indices.append(ops.xp.arange(spans_i[j, 0], spans_i[j, 1])) # type: ignore[call-overload] offset += length return ops.flatten(indices, dtype="i", ndim_if_empty=1) diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py index 0149bea89c6..fba4b485f10 100644 --- a/spacy/ml/models/entity_linker.py +++ b/spacy/ml/models/entity_linker.py @@ -23,7 +23,7 @@ def build_nel_encoder( ((tok2vec >> list2ragged()) & build_span_maker()) >> extract_spans() >> reduce_mean() - >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0)) # type: ignore[arg-type] + >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0)) >> output_layer ) model.set_ref("output_layer", output_layer) diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py index a7d67c6dda8..2d89336ff27 100644 --- a/spacy/ml/models/multi_task.py +++ b/spacy/ml/models/multi_task.py @@ -142,7 +142,7 @@ def build_cloze_characters_multi_task_model( cast(Model[List["Floats2d"], Floats2d], list2array()), Maxout(nO=hidden_size, nP=maxout_pieces), LayerNorm(nI=hidden_size), - MultiSoftmax([256] * nr_char, nI=hidden_size), # type: ignore[arg-type] + MultiSoftmax(tuple([256] * nr_char), nI=hidden_size), ) model = build_masked_language_model(vocab, chain(tok2vec, output_layer)) model.set_ref("tok2vec", tok2vec) diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py index 9f8ef7b2b9e..5fa0e6fff4f 100644 --- a/spacy/ml/models/tagger.py +++ b/spacy/ml/models/tagger.py @@ -22,7 +22,7 @@ def build_tagger_model( output_layer = Softmax_v2( nO, t2v_width, init_W=zero_init, normalize_outputs=normalize ) - softmax = with_array(output_layer) # type: ignore + softmax = with_array(output_layer) model = chain(tok2vec, softmax) model.set_ref("tok2vec", tok2vec) model.set_ref("softmax", output_layer) diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py index c8c146f026b..e436d59de6f 100644 --- a/spacy/ml/models/textcat.py +++ b/spacy/ml/models/textcat.py @@ -1,5 +1,5 @@ from functools import partial -from typing import Optional, List +from typing import Optional, List, cast from thinc.types import Floats2d from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic @@ -59,7 +59,7 @@ def build_simple_cnn_text_classifier( resizable_layer=resizable_layer, ) model.set_ref("tok2vec", tok2vec) - model.set_dim("nO", nO) # type: ignore # TODO: remove type ignore once Thinc has been updated + model.set_dim("nO", cast(int, nO)) model.attrs["multi_label"] = not exclusive_classes return model @@ -85,7 +85,7 @@ def build_bow_text_classifier( if not no_output_layer: fill_defaults["b"] = NEG_VALUE output_layer = softmax_activation() if exclusive_classes else Logistic() - resizable_layer = resizable( # type: ignore[var-annotated] + resizable_layer: Model[Floats2d, Floats2d] = resizable( sparse_linear, resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults), ) @@ -93,7 +93,7 @@ def build_bow_text_classifier( model = with_cpu(model, model.ops) if output_layer: model = model >> with_cpu(output_layer, output_layer.ops) - model.set_dim("nO", nO) # type: ignore[arg-type] + model.set_dim("nO", cast(int, nO)) model.set_ref("output_layer", sparse_linear) model.attrs["multi_label"] = not exclusive_classes model.attrs["resize_output"] = partial( @@ -130,7 +130,7 @@ def build_text_classifier_v2( model = (linear_model | cnn_model) >> output_layer model.set_ref("tok2vec", tok2vec) if model.has_dim("nO") is not False: - model.set_dim("nO", nO) # type: ignore[arg-type] + model.set_dim("nO", cast(int, nO)) model.set_ref("output_layer", linear_model.get_ref("output_layer")) model.set_ref("attention_layer", attention_layer) model.set_ref("maxout_layer", maxout_layer) @@ -164,7 +164,7 @@ def build_text_classifier_lowdata( >> list2ragged() >> ParametricAttention(width) >> reduce_sum() - >> residual(Relu(width, width)) ** 2 # type: ignore[arg-type] + >> residual(Relu(width, width)) ** 2 >> Linear(nO, width) ) if dropout: diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py index ecdf6be27c6..84c9ef669ae 100644 --- a/spacy/ml/models/tok2vec.py +++ b/spacy/ml/models/tok2vec.py @@ -1,5 +1,5 @@ from typing import Optional, List, Union, cast -from thinc.types import Floats2d, Ints2d, Ragged +from thinc.types import Floats2d, Ints1d, Ints2d, Ragged, ArrayXd from thinc.api import chain, clone, concatenate, with_array, with_padded from thinc.api import Model, noop, list2ragged, ragged2list, HashEmbed from thinc.api import expand_window, residual, Maxout, Mish, PyTorchLSTM @@ -159,7 +159,7 @@ def make_hash_embed(index): embeddings = [make_hash_embed(i) for i in range(len(attrs))] concat_size = width * (len(embeddings) + include_static_vectors) max_out: Model[Ragged, Ragged] = with_array( - Maxout(width, concat_size, nP=3, dropout=0.0, normalize=True) # type: ignore + Maxout(width, concat_size, nP=3, dropout=0.0, normalize=True) ) if include_static_vectors: feature_extractor: Model[List[Doc], Ragged] = chain( @@ -173,7 +173,7 @@ def make_hash_embed(index): StaticVectors(width, dropout=0.0), ), max_out, - cast(Model[Ragged, List[Floats2d]], ragged2list()), + ragged2list(), ) else: model = chain( @@ -181,9 +181,9 @@ def make_hash_embed(index): cast(Model[List[Ints2d], Ragged], list2ragged()), with_array(concatenate(*embeddings)), max_out, - cast(Model[Ragged, List[Floats2d]], ragged2list()), + ragged2list(), ) - return model + return cast(Model[List[Doc], List[Floats2d]], model) @registry.architectures("spacy.CharacterEmbed.v2") @@ -231,13 +231,14 @@ def CharacterEmbed( ) feature_extractor: Model[List[Doc], Ragged] = chain( FeatureExtractor([feature]), - cast(Model[List[Ints2d], Ragged], list2ragged()), - with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)), # type: ignore + list2ragged(), + # TODO: the typing does not seem to make sense here + with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)), # type:ignore ) max_out: Model[Ragged, Ragged] if include_static_vectors: max_out = with_array( - Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0) # type: ignore + Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0) ) model = chain( concatenate( @@ -246,11 +247,11 @@ def CharacterEmbed( StaticVectors(width, dropout=0.0), ), max_out, - cast(Model[Ragged, List[Floats2d]], ragged2list()), + ragged2list(), ) else: max_out = with_array( - Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0) # type: ignore + Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0) ) model = chain( concatenate( @@ -258,9 +259,9 @@ def CharacterEmbed( feature_extractor, ), max_out, - cast(Model[Ragged, List[Floats2d]], ragged2list()), + ragged2list(), ) - return model + return cast(Model[List[Doc], List[Floats2d]], model) @registry.architectures("spacy.MaxoutWindowEncoder.v2") @@ -280,7 +281,7 @@ def MaxoutWindowEncoder( depth (int): The number of convolutional layers. Recommended value is 4. """ cnn = chain( - expand_window(window_size=window_size), + cast(Model[Floats2d, Floats2d], expand_window(window_size=window_size)), Maxout( nO=width, nI=width * ((window_size * 2) + 1), @@ -289,10 +290,10 @@ def MaxoutWindowEncoder( normalize=True, ), ) - model = clone(residual(cnn), depth) # type: ignore[arg-type] + model = clone(residual(cnn), depth) model.set_dim("nO", width) receptive_field = window_size * depth - return with_array(model, pad=receptive_field) # type: ignore[arg-type] + return cast(Model[List[Floats2d], List[Floats2d]], with_array(model, pad=receptive_field)) @registry.architectures("spacy.MishWindowEncoder.v2") @@ -310,12 +311,12 @@ def MishWindowEncoder( depth (int): The number of convolutional layers. Recommended value is 4. """ cnn = chain( - expand_window(window_size=window_size), + cast(Model[Floats2d, Floats2d], expand_window(window_size=window_size)), Mish(nO=width, nI=width * ((window_size * 2) + 1), dropout=0.0, normalize=True), ) - model = clone(residual(cnn), depth) # type: ignore[arg-type] + model = clone(residual(cnn), depth) model.set_dim("nO", width) - return with_array(model) # type: ignore[arg-type] + return cast(Model[List[Floats2d], List[Floats2d]], with_array(model)) @registry.architectures("spacy.TorchBiLSTMEncoder.v1") diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py index 8d9b1af9b6a..ca81e3d3ab8 100644 --- a/spacy/ml/staticvectors.py +++ b/spacy/ml/staticvectors.py @@ -41,12 +41,12 @@ def forward( return _handle_empty(model.ops, model.get_dim("nO")) key_attr: int = model.attrs["key_attr"] keys: Ints1d = model.ops.flatten( - cast(Sequence, [doc.to_array(key_attr) for doc in docs]) + cast(List[Ints1d], [doc.to_array(key_attr) for doc in docs]) ) vocab: Vocab = docs[0].vocab W = cast(Floats2d, model.ops.as_contig(model.get_param("W"))) if vocab.vectors.mode == Mode.default: - V = cast(Floats2d, model.ops.asarray(vocab.vectors.data)) + V = model.ops.asarray2f(vocab.vectors.data) rows = vocab.vectors.find(keys=keys) V = model.ops.as_contig(V[rows]) elif vocab.vectors.mode == Mode.floret: @@ -63,7 +63,7 @@ def forward( # TODO: more options for UNK tokens vectors_data[rows < 0] = 0 output = Ragged( - vectors_data, model.ops.asarray([len(doc) for doc in docs], dtype="i") # type: ignore + vectors_data, model.ops.asarray1i([len(doc) for doc in docs], dtype="i") ) mask = None if is_train: @@ -115,5 +115,5 @@ def _handle_empty(ops: Ops, nO: int): def _get_drop_mask(ops: Ops, nO: int, rate: Optional[float]) -> Optional[Floats1d]: if rate is not None: mask = ops.get_dropout_mask((nO,), rate) - return mask # type: ignore + return mask # type: ignore[return-value] return None diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index 89e7576bf8d..ee4db94e49e 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -362,13 +362,12 @@ def get_loss(self, examples: Iterable[Example], sentence_encodings: Floats2d): method="get_loss", msg="gold entities do not match up" ) raise RuntimeError(err) - # TODO: fix typing issue here - gradients = self.distance.get_grad(selected_encodings, entity_encodings) # type: ignore + gradients = self.distance.get_grad(selected_encodings, entity_encodings) # to match the input size, we need to give a zero gradient for items not in the kb out = self.model.ops.alloc2f(*sentence_encodings.shape) out[keep_ents] = gradients - loss = self.distance.get_loss(selected_encodings, entity_encodings) # type: ignore + loss = self.distance.get_loss(selected_encodings, entity_encodings) loss = loss / len(entity_encodings) return float(loss), out diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py index 0a6138fbc4d..1250b98a45b 100644 --- a/spacy/pipeline/spancat.py +++ b/spacy/pipeline/spancat.py @@ -1,7 +1,7 @@ from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops from thinc.api import Optimizer -from thinc.types import Ragged, Ints2d, Floats2d, Ints1d +from thinc.types import Ragged, Ints2d, Floats2d, Ints1d, ArrayXd import numpy @@ -75,7 +75,7 @@ def ngram_suggester(docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged if spans: assert spans[-1].ndim == 2, spans[-1].shape lengths.append(length) - lengths_array = cast(Ints1d, ops.asarray(lengths, dtype="i")) + lengths_array = ops.asarray1i(lengths, dtype="i") if len(spans) > 0: output = Ragged(ops.xp.vstack(spans), lengths_array) else: @@ -113,7 +113,7 @@ def make_spancat( nlp: Language, name: str, suggester: Suggester, - model: Model[Tuple[List[Doc], Ragged], Floats2d], + model: Model[Tuple[Iterable[Doc], Ragged], Floats2d], spans_key: str, scorer: Optional[Callable], threshold: float, @@ -126,7 +126,7 @@ def make_spancat( suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans. Spans are returned as a ragged array with two integer columns, for the start and end positions. - model (Model[Tuple[List[Doc], Ragged], Floats2d]): A model instance that + model (Model[Tuple[Iterable[Doc], Ragged], Floats2d]): A model instance that is given a list of documents and (start, end) indices representing candidate span offsets. The model predicts a probability for each category for each span. @@ -178,7 +178,7 @@ class SpanCategorizer(TrainablePipe): def __init__( self, vocab: Vocab, - model: Model[Tuple[List[Doc], Ragged], Floats2d], + model: Model[Tuple[Iterable[Doc], Ragged], Floats2d], suggester: Suggester, name: str = "spancat", *, @@ -269,7 +269,7 @@ def predict(self, docs: Iterable[Doc]): DOCS: https://spacy.io/api/spancategorizer#predict """ indices = self.suggester(docs, ops=self.model.ops) - scores = self.model.predict((docs, indices)) # type: ignore + scores = self.model.predict((docs, indices)) return indices, scores def set_candidates( @@ -343,6 +343,7 @@ def update( set_dropout_rate(self.model, drop) scores, backprop_scores = self.model.begin_update((docs, spans)) loss, d_scores = self.get_loss(examples, (spans, scores)) + # TODO: the types do not seem to make sense here backprop_scores(d_scores) # type: ignore if sgd is not None: self.finish_update(sgd) @@ -387,7 +388,7 @@ def get_loss( # The target is a flat array for all docs. Track the position # we're at within the flat array. offset += spans.lengths[i] - target = self.model.ops.asarray(target, dtype="f") # type: ignore + target = self.model.ops.asarray(cast(ArrayXd, target), dtype="f") # The target will have the values 0 (for untrue predictions) or 1 # (for true predictions). # The scores should be in the range [0, 1]. diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index bc3f127fca8..d1676ecda5e 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -1,4 +1,4 @@ -from typing import Iterable, Tuple, Optional, Dict, List, Callable, Any +from typing import Iterable, Tuple, Optional, Dict, List, Callable, Any, cast from thinc.api import get_array_module, Model, Optimizer, set_dropout_rate, Config from thinc.types import Floats2d import numpy @@ -293,18 +293,20 @@ def rehearse( def _examples_to_truth( self, examples: Iterable[Example] - ) -> Tuple[numpy.ndarray, numpy.ndarray]: + ) -> Tuple[Floats2d, Floats2d]: nr_examples = len(list(examples)) - truths = numpy.zeros((nr_examples, len(self.labels)), dtype="f") - not_missing = numpy.ones((nr_examples, len(self.labels)), dtype="f") + truths = cast(Floats2d, numpy.zeros((nr_examples, len(self.labels)), dtype="f")) + not_missing = cast( + Floats2d, numpy.ones((nr_examples, len(self.labels)), dtype="f") + ) for i, eg in enumerate(examples): for j, label in enumerate(self.labels): if label in eg.reference.cats: truths[i, j] = eg.reference.cats[label] elif self.support_missing_values: not_missing[i, j] = 0.0 - truths = self.model.ops.asarray(truths) # type: ignore - return truths, not_missing # type: ignore + truths = self.model.ops.asarray2f(truths) + return truths, not_missing def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]: """Find the loss and gradient of loss for the batch of documents and @@ -319,7 +321,7 @@ def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]: validate_examples(examples, "TextCategorizer.get_loss") self._validate_categories(examples) truths, not_missing = self._examples_to_truth(examples) - not_missing = self.model.ops.asarray(not_missing) # type: ignore + not_missing = self.model.ops.asarray(not_missing) d_scores = scores - truths d_scores *= not_missing mean_square_error = (d_scores**2).mean() diff --git a/spacy/schemas.py b/spacy/schemas.py index 1dfd8ee85c9..b85336e7a80 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -104,7 +104,7 @@ def get_arg_model( sig_args[param.name] = (annotation, default) is_strict = strict and not has_variable sig_args["__config__"] = ArgSchemaConfig if is_strict else ArgSchemaConfigExtra # type: ignore[assignment] - return create_model(name, **sig_args) # type: ignore[arg-type, return-value] + return create_model(name, **sig_args) # type:ignore[call-overload] def validate_init_settings( diff --git a/spacy/scorer.py b/spacy/scorer.py index 8cd755ac40c..28338a25f18 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -131,7 +131,7 @@ def score(self, examples: Iterable[Example]) -> Dict[str, Any]: """ scores = {} if hasattr(self.nlp.tokenizer, "score"): - scores.update(self.nlp.tokenizer.score(examples, **self.cfg)) # type: ignore + scores.update(self.nlp.tokenizer.score(examples, **self.cfg)) # type: ignore[union-attr] for name, component in self.nlp.pipeline: if hasattr(component, "score"): scores.update(component.score(examples, **self.cfg)) diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 0bb036a334f..91cc7794c8b 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -816,6 +816,7 @@ def test_textcat_loss(multi_label: bool, expected_loss: float): textcat = nlp.add_pipe("textcat") textcat.initialize(lambda: train_examples) assert isinstance(textcat, TextCategorizer) + # TODO: the typing doesn't seem to make sense here scores = textcat.model.ops.asarray( [[0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype="f" # type: ignore ) diff --git a/spacy/tests/vocab_vectors/test_vectors.py b/spacy/tests/vocab_vectors/test_vectors.py index e3ad206f4e6..9ee8c9e948a 100644 --- a/spacy/tests/vocab_vectors/test_vectors.py +++ b/spacy/tests/vocab_vectors/test_vectors.py @@ -4,7 +4,7 @@ from thinc.api import NumpyOps, get_current_ops from spacy.lang.en import English -from spacy.strings import hash_string # type: ignore +from spacy.strings import hash_string # type:ignore[attr-defined] from spacy.tokenizer import Tokenizer from spacy.tokens import Doc from spacy.training.initialize import convert_vectors diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index c4e8f26f408..7fd24c95a08 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -1,10 +1,11 @@ -from typing import List, Dict, Set, Iterable, Iterator, Union, Optional +from typing import List, Dict, Set, Iterable, Iterator, Union, Optional, cast from pathlib import Path import numpy from numpy import ndarray import zlib import srsly from thinc.api import NumpyOps +from thinc.types import Ints2d from .doc import Doc from ..vocab import Vocab @@ -140,12 +141,12 @@ def get_docs(self, vocab: Vocab) -> Iterator[Doc]: orth_col = self.attrs.index(ORTH) for i in range(len(self.tokens)): flags = self.flags[i] - tokens = self.tokens[i] + tokens = cast(Ints2d, self.tokens[i]) spaces: Optional[ndarray] = self.spaces[i] if flags.get("has_unknown_spaces"): spaces = None doc = Doc(vocab, words=tokens[:, orth_col], spaces=spaces) # type: ignore - doc = doc.from_array(self.attrs, tokens) # type: ignore + doc = doc.from_array(self.attrs, tokens) doc.cats = self.cats[i] # backwards-compatibility: may be b'' or serialized empty list if self.span_groups[i] and self.span_groups[i] != SpanGroups._EMPTY_BYTES: diff --git a/spacy/util.py b/spacy/util.py index 66e257dd867..4767a68819f 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1,4 +1,4 @@ -from typing import List, Mapping, NoReturn, Union, Dict, Any, Set +from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast from typing import Optional, Iterable, Callable, Tuple, Type from typing import Iterator, Type, Pattern, Generator, TYPE_CHECKING from types import ModuleType @@ -391,7 +391,7 @@ def get_module_path(module: ModuleType) -> Path: """ if not hasattr(module, "__module__"): raise ValueError(Errors.E169.format(module=repr(module))) - return Path(sys.modules[module.__module__].__file__).parent + return Path(cast(str, sys.modules[module.__module__].__file__)).parent def load_model( @@ -878,7 +878,7 @@ def get_package_path(name: str) -> Path: # Here we're importing the module just to find it. This is worryingly # indirect, but it's otherwise very difficult to find the package. pkg = importlib.import_module(name) - return Path(pkg.__file__).parent + return Path(cast(str, pkg.__file__)).parent def replace_model_node(model: Model, target: Model, replacement: Model) -> None: