add requirements.txt, annotate the script and add reference to index.rst

bowang007 · bowang007 · commit 4bc05b765e6a · 2024-05-07T21:16:43.000Z
diff --git a/docsrc/index.rst b/docsrc/index.rst
@@ -111,6 +111,9 @@ Tutorials
    tutorials/_rendered_examples/dynamo/torch_compile_transformers_example
    tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage
    tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion
+   tutorials/_rendered_examples/distributed_inference/data_parallel_gpt2
+   tutorials/_rendered_examples/distributed_inference/data_parallel_stable_diffusion
+
 
 Python API Documenation
 ------------------------
diff --git a/examples/distributed_inference/data_parallel_gpt2.py b/examples/distributed_inference/data_parallel_gpt2.py
@@ -1,3 +1,19 @@
+"""
+.. _data_parallel_gpt2:
+
+Torch-TensorRT Distributed Inference
+======================================================
+
+This interactive script is intended as a sample of distributed inference using data
+parallelism using Accelerate
+library with the Torch-TensorRT workflow on GPT2 model.
+
+"""
+
+# %%
+# Imports and Model Definition
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 import torch
 from accelerate import PartialState
 from transformers import AutoTokenizer, GPT2LMHeadModel
@@ -6,6 +22,7 @@
 
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
 
+# Set input prompts for different devices
 prompt1 = "GPT2 is a model developed by."
 prompt2 = "Llama is a model developed by "
 
@@ -14,8 +31,11 @@
 
 distributed_state = PartialState()
 
+# Import GPT2 model and load to distributed devices
 model = GPT2LMHeadModel.from_pretrained("gpt2").eval().to(distributed_state.device)
 
+
+# Instantiate model with Torch-TensorRT backend
 model.forward = torch.compile(
     model.forward,
     backend="torch_tensorrt",
@@ -27,6 +47,11 @@
     dynamic=False,
 )
 
+# %%
+# Inference
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+# Assume there are 2 processes (2 devices)
 with distributed_state.split_between_processes([input_id1, input_id2]) as prompt:
     cur_input = torch.clone(prompt[0]).to(distributed_state.device)
 
diff --git a/examples/distributed_inference/data_parallel_stable_diffusion.py b/examples/distributed_inference/data_parallel_stable_diffusion.py
@@ -1,3 +1,18 @@
+"""
+.. _data_parallel_stable_diffusion:
+
+Torch-TensorRT Distributed Inference
+======================================================
+
+This interactive script is intended as a sample of distributed inference using data
+parallelism using Accelerate
+library with the Torch-TensorRT workflow on Stable Diffusion model.
+
+"""
+
+# %%
+# Imports and Model Definition
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 import torch
 from accelerate import PartialState
 from diffusers import DiffusionPipeline
@@ -17,7 +32,10 @@
 backend = "torch_tensorrt"
 
 # Optimize the UNet portion with Torch-TensorRT
-pipe.unet = torch.compile(
+pipe.unet = torch.compile(  # %%
+    # Inference
+    # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    # Assume there are 2 processes (2 devices)
     pipe.unet,
     backend=backend,
     options={
@@ -30,6 +48,12 @@
 )
 torch_tensorrt.runtime.set_multi_device_safe_mode(True)
 
+
+# %%
+# Inference
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+# Assume there are 2 processes (2 devices)
 with distributed_state.split_between_processes(["a dog", "a cat"]) as prompt:
     print("before \n")
     result = pipe(prompt).images[0]
diff --git a/examples/distributed_inference/requirement.txt b/examples/distributed_inference/requirement.txt
@@ -0,0 +1,3 @@
+accelerate
+transformers
+diffusers

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+accelerate`
	`2`	`+transformers`
	`3`	`+diffusers`