From ac06779feeb40d2087817945bd3c0a2538487fcf Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Wed, 17 Sep 2025 23:43:33 +0000
Subject: [PATCH 1/2] Fix model cards and modalities in toctree

---
 docs/source/en/_toctree.yml                  | 28 ++++++++---------
 docs/source/en/model_doc/bert-generation.md  |  1 +
 docs/source/en/model_doc/hunyuan_v1_dense.md |  1 +
 docs/source/en/model_doc/hunyuan_v1_moe.md   |  1 +
 docs/source/en/model_doc/longcat_flash.md    |  5 ++-
 docs/source/en/model_doc/ministral.md        |  1 +
 docs/source/en/model_doc/olmo3.md            | 11 ++++---
 docs/source/en/model_doc/ovis2.md            |  1 +
 docs/source/en/model_doc/qwen3_next.md       | 12 ++++---
 docs/source/en/model_doc/qwen3_vl.md         |  2 +-
 docs/source/en/model_doc/qwen3_vl_moe.md     |  2 +-
 docs/source/en/model_doc/seed_oss.md         | 33 +++++++++++---------
 docs/source/en/model_doc/vaultgemma.md       |  3 +-
 13 files changed, 56 insertions(+), 45 deletions(-)
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index 65411024d4a3..432da370a9c6 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -441,6 +441,8 @@
         title: DeBERTa
       - local: model_doc/deberta-v2
         title: DeBERTa-v2
+      - local: model_doc/deepseek_v2
+        title: DeepSeek-V2
       - local: model_doc/deepseek_v3
         title: DeepSeek-V3
       - local: model_doc/dialogpt
@@ -761,12 +763,6 @@
         title: D-FINE
       - local: model_doc/dab-detr
         title: DAB-DETR
-      - local: model_doc/deepseek_v2
-        title: DeepSeek-V2
-      - local: model_doc/deepseek_vl
-        title: DeepseekVL
-      - local: model_doc/deepseek_vl_hybrid
-        title: DeepseekVLHybrid
       - local: model_doc/deformable_detr
         title: Deformable DETR
       - local: model_doc/deit
@@ -849,10 +845,16 @@
         title: RT-DETR
       - local: model_doc/rt_detr_v2
         title: RT-DETRv2
+      - local: model_doc/sam2
+        title: SAM2
       - local: model_doc/segformer
         title: SegFormer
       - local: model_doc/seggpt
         title: SegGpt
+      - local: model_doc/sam
+        title: Segment Anything
+      - local: model_doc/sam_hq
+        title: Segment Anything High Quality
       - local: model_doc/superglue
         title: SuperGlue
       - local: model_doc/superpoint
@@ -975,6 +977,8 @@
         title: XLSR-Wav2Vec2
       title: Audio models
     - sections:
+      - local: model_doc/sam2_video
+        title: SAM2 Video
       - local: model_doc/timesformer
         title: TimeSformer
       - local: model_doc/vjepa2
@@ -1019,6 +1023,10 @@
         title: ColQwen2
       - local: model_doc/data2vec
         title: Data2Vec
+      - local: model_doc/deepseek_vl
+        title: DeepseekVL
+      - local: model_doc/deepseek_vl_hybrid
+        title: DeepseekVLHybrid
       - local: model_doc/deplot
         title: DePlot
       - local: model_doc/donut
@@ -1137,14 +1145,6 @@
         title: Qwen3VL
       - local: model_doc/qwen3_vl_moe
         title: Qwen3VLMoe
-      - local: model_doc/sam2
-        title: SAM2
-      - local: model_doc/sam2_video
-        title: SAM2 Video
-      - local: model_doc/sam
-        title: Segment Anything
-      - local: model_doc/sam_hq
-        title: Segment Anything High Quality
       - local: model_doc/shieldgemma2
         title: ShieldGemma2
       - local: model_doc/siglip
diff --git a/docs/source/en/model_doc/bert-generation.md b/docs/source/en/model_doc/bert-generation.md
index 38cbe2137eb7..b5be3458db7d 100644
--- a/docs/source/en/model_doc/bert-generation.md
+++ b/docs/source/en/model_doc/bert-generation.md
@@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on 2019-07-29 and added to Hugging Face Transformers on 2020-11-16.*
 
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
diff --git a/docs/source/en/model_doc/hunyuan_v1_dense.md b/docs/source/en/model_doc/hunyuan_v1_dense.md
index f87ca422c8ed..520c68b7fd9d 100644
--- a/docs/source/en/model_doc/hunyuan_v1_dense.md
+++ b/docs/source/en/model_doc/hunyuan_v1_dense.md
@@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2025-08-22.*
 
 # HunYuanDenseV1
 
diff --git a/docs/source/en/model_doc/hunyuan_v1_moe.md b/docs/source/en/model_doc/hunyuan_v1_moe.md
index c66846cc0881..36a53742715d 100644
--- a/docs/source/en/model_doc/hunyuan_v1_moe.md
+++ b/docs/source/en/model_doc/hunyuan_v1_moe.md
@@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2025-08-22.*
 
 # HunYuanMoEV1
 
diff --git a/docs/source/en/model_doc/longcat_flash.md b/docs/source/en/model_doc/longcat_flash.md
index b2c2d7a00646..d9a9a4a7f603 100644
--- a/docs/source/en/model_doc/longcat_flash.md
+++ b/docs/source/en/model_doc/longcat_flash.md
@@ -16,8 +16,7 @@ limitations under the License.
 ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer.
 
 -->
-*This model was released on 2025-09-01 and added to Hugging Face Transformers on 2025-09-15.*
-
+*This model was released on 2025-09-01 and added to Hugging Face Transformers on 2025-09-17.*
 
 # LongCatFlash
 
@@ -70,7 +69,7 @@ outputs = model.generate(inputs, max_new_tokens=30)
 print(tokenizer.batch_decode(outputs))
 ```
 
-To run with TP, you will need torchrun: 
+To run with TP, you will need torchrun:
 
 ```bash
 torchrun  --nproc_per_node=8 --nnodes=2 --node_rank=0 | 1  --rdzv-id <an_id> --rdzv-backend c10d --rdzv-endpoint $NODE_ID:$NODE_PORT  --log-dir ./logs_longcat launch_longcat.py
diff --git a/docs/source/en/model_doc/ministral.md b/docs/source/en/model_doc/ministral.md
index 07692c6163e5..13b6f3d6c04b 100644
--- a/docs/source/en/model_doc/ministral.md
+++ b/docs/source/en/model_doc/ministral.md
@@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2025-09-11.*
 
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
diff --git a/docs/source/en/model_doc/olmo3.md b/docs/source/en/model_doc/olmo3.md
index e320181925ca..8e88a175d463 100644
--- a/docs/source/en/model_doc/olmo3.md
+++ b/docs/source/en/model_doc/olmo3.md
@@ -16,7 +16,8 @@ limitations under the License.
 ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer.
 
 -->
-*This model was released on {release_date} and added to Hugging Face Transformers on 2025-09-08.*
+*This model was released on {release_date} and added to Hugging Face Transformers on 2025-09-16.*
+
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
         <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
@@ -46,7 +47,7 @@ pipe = pipeline(
     dtype=torch.bfloat16,
     device=0,
 )
-    
+
 result = pipe("Plants create energy through a process known as")
 print(result)
 ```
@@ -119,11 +120,11 @@ print(tokenizer.decode(output[0], skip_special_tokens=True))
 
 ## Notes
 
-- Load specific intermediate checkpoints by adding the `revision` parameter to [`~PreTrainedModel.from_pretrained`]. 
+- Load specific intermediate checkpoints by adding the `revision` parameter to [`~PreTrainedModel.from_pretrained`].
 
     ```py
     from transformers import AutoModelForCausalLM
-    
+
     model = AutoModelForCausalLM.from_pretrained("allenai/TBA", revision="stage1-step140000-tokens294B")
     ```
 
@@ -144,4 +145,4 @@ print(tokenizer.decode(output[0], skip_special_tokens=True))
 ## Olmo3PreTrainedModel
 
 [[autodoc]] Olmo3PreTrainedModel
-    - forward
\ No newline at end of file
+    - forward
diff --git a/docs/source/en/model_doc/ovis2.md b/docs/source/en/model_doc/ovis2.md
index ab1d761f19ed..342e34ef7a1b 100644
--- a/docs/source/en/model_doc/ovis2.md
+++ b/docs/source/en/model_doc/ovis2.md
@@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on 2024-05-31 and added to Hugging Face Transformers on 2025-08-18.*
 
 # Ovis2
 
diff --git a/docs/source/en/model_doc/qwen3_next.md b/docs/source/en/model_doc/qwen3_next.md
index f2e003182ee7..737934136099 100644
--- a/docs/source/en/model_doc/qwen3_next.md
+++ b/docs/source/en/model_doc/qwen3_next.md
@@ -13,18 +13,20 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2025-09-10.*
+
 ## Overview
 
-The Qwen3-Next series represents our next-generation foundation models, optimized for extreme context length and large-scale parameter efficiency. 
+The Qwen3-Next series represents our next-generation foundation models, optimized for extreme context length and large-scale parameter efficiency.
 The series introduces a suite of architectural innovations designed to maximize performance while minimizing computational cost:
-- **Hybrid Attention**: Replaces standard attention with the combination of **Gated DeltaNet** and **Gated Attention**, enabling efficient context modeling.  
+- **Hybrid Attention**: Replaces standard attention with the combination of **Gated DeltaNet** and **Gated Attention**, enabling efficient context modeling.
 - **High-Sparsity MoE**: Achieves an extreme low activation ratio as 1:50 in MoE layers — drastically reducing FLOPs per token while preserving model capacity.
 - **Multi-Token Prediction(MTP)**: Boosts pretraining model performance, and accelerates inference.
-- **Other Optimizations**: Includes techniques such as **zero-centered and weight-decayed layernorm**, **Gated Attention**, and other stabilizing enhancements for robust training.  
+- **Other Optimizations**: Includes techniques such as **zero-centered and weight-decayed layernorm**, **Gated Attention**, and other stabilizing enhancements for robust training.
 
 Built on this architecture, we trained and open-sourced Qwen3-Next-80B-A3B — 80B total parameters, only 3B active — achieving extreme sparsity and efficiency.
 
-Despite its ultra-efficiency, it outperforms Qwen3-32B on downstream tasks — while requiring **less than 1/10 of the training cost**. 
+Despite its ultra-efficiency, it outperforms Qwen3-32B on downstream tasks — while requiring **less than 1/10 of the training cost**.
 Moreover, it delivers over **10x higher inference throughput** than Qwen3-32B when handling contexts longer than 32K tokens.
 
 For more details, please visit our blog [Qwen3-Next](qwen3_next) ([blog post](https://qwenlm.github.io/blog/qwen3_next/)).
@@ -60,7 +62,7 @@ generated_ids = model.generate(
     **model_inputs,
     max_new_tokens=512
 )
-output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 
+output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
 
 content = tokenizer.decode(output_ids, skip_special_tokens=True)
 
diff --git a/docs/source/en/model_doc/qwen3_vl.md b/docs/source/en/model_doc/qwen3_vl.md
index 9e90363a1eba..c939d5da3cd9 100644
--- a/docs/source/en/model_doc/qwen3_vl.md
+++ b/docs/source/en/model_doc/qwen3_vl.md
@@ -13,7 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
-*This model was released on None and added to Hugging Face Transformers on 2025-08-16.*
+*This model was released on None and added to Hugging Face Transformers on 2025-09-15.*
 
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
diff --git a/docs/source/en/model_doc/qwen3_vl_moe.md b/docs/source/en/model_doc/qwen3_vl_moe.md
index 76d046efff2d..6e27adf915d3 100644
--- a/docs/source/en/model_doc/qwen3_vl_moe.md
+++ b/docs/source/en/model_doc/qwen3_vl_moe.md
@@ -13,7 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
-*This model was released on None and added to Hugging Face Transformers on 2025-08-17.*
+*This model was released on None and added to Hugging Face Transformers on 2025-09-15.*
 
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
diff --git a/docs/source/en/model_doc/seed_oss.md b/docs/source/en/model_doc/seed_oss.md
index 0f0dacb2be90..dbcddcb5f2c7 100644
--- a/docs/source/en/model_doc/seed_oss.md
+++ b/docs/source/en/model_doc/seed_oss.md
@@ -1,17 +1,20 @@
-<!-- 
-# Copyright 2025 Bytedance-Seed Ltd and the HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. -->
+<!--
+ Copyright 2025 Bytedance-Seed Ltd and the HuggingFace Inc. team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2025-08-22.*
 
 # SeedOss
 
@@ -54,4 +57,4 @@ To be released with the official model launch.
 ## SeedOssForQuestionAnswering
 
 [[autodoc]] SeedOssForQuestionAnswering
-    - forward
\ No newline at end of file
+    - forward
diff --git a/docs/source/en/model_doc/vaultgemma.md b/docs/source/en/model_doc/vaultgemma.md
index c9eb36124fca..94d28cc8afe2 100644
--- a/docs/source/en/model_doc/vaultgemma.md
+++ b/docs/source/en/model_doc/vaultgemma.md
@@ -16,6 +16,7 @@ limitations under the License.
 ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer.
 
 -->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2025-09-12.*
 
 # VaultGemma
 
@@ -30,7 +31,7 @@ sequence length.
 VaultGemma was trained from scratch with sequence-level differential privacy (DP). Its training data includes the same
 mixture as the [Gemma 2 models](https://huggingface.co/collections/google/gemma-2-release-667d6600fd5220e7b967f315),
 consisting of a number of documents of varying lengths. Additionally, it is trained using
-[DP stochastic gradient descent (DP-SGD)](https://arxiv.org/abs/1607.00133) and provides a
+[DP stochastic gradient descent (DP-SGD)](https://huggingface.co/papers/1607.00133) and provides a
 (ε ≤ 2.0, δ ≤ 1.1e-10)-sequence-level DP guarantee, where a sequence consists of 1024 consecutive tokens extracted from
 heterogeneous data sources. Specifically, the privacy unit of the guarantee is for the sequences after sampling and
 packing of the mixture.

From dde0af36ca230de99a0f6184f0ea30c379a8a619 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Thu, 18 Sep 2025 17:51:23 +0000
Subject: [PATCH 2/2] fix new models

---
 docs/source/en/model_doc/flex_olmo.md | 2 +-
 docs/source/en/model_doc/lfm2_vl.md   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/en/model_doc/flex_olmo.md b/docs/source/en/model_doc/flex_olmo.md
index b771fe526d06..418a660b6d23 100644
--- a/docs/source/en/model_doc/flex_olmo.md
+++ b/docs/source/en/model_doc/flex_olmo.md
@@ -16,7 +16,7 @@ limitations under the License.
 ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer.
 
 -->
-*This model was released on 2025-07-09 and added to Hugging Face Transformers on 2025-09-15.*
+*This model was released on 2025-07-09 and added to Hugging Face Transformers on 2025-09-18.*
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
         <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
diff --git a/docs/source/en/model_doc/lfm2_vl.md b/docs/source/en/model_doc/lfm2_vl.md
index 1607e3066905..3a93a8189a70 100644
--- a/docs/source/en/model_doc/lfm2_vl.md
+++ b/docs/source/en/model_doc/lfm2_vl.md
@@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2025-09-18.*
 
 <div class="flex flex-wrap space-x-1">
 <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">