Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
steps:
- label: "Dummy pipeline"
if: "false"
agents:
queue: cpu_queue_premerge_us_east_1
- label: ":toolbox: Qwen2.5 omni offline example"
commands:
- "echo 'This pipeline is intentionally a no-op.'"
- ".buildkite/scripts/run_qwen_offline_example.sh"
agents:
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
33 changes: 33 additions & 0 deletions .buildkite/scripts/run_qwen_offline_example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash

set -euo pipefail

# Move to repo root (script lives in .buildkite/scripts/)
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
cd "${ROOT_DIR}"

UV_INSTALL_DIR="${ROOT_DIR}/.local/bin"
case ":${PATH}:" in
*":${UV_INSTALL_DIR}:"*) ;;
*) export PATH="${UV_INSTALL_DIR}:${PATH}" ;;
esac

if ! command -v uv >/dev/null 2>&1; then
mkdir -p "${UV_INSTALL_DIR}"
curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="${UV_INSTALL_DIR}" sh
fi

UV_BIN=(uv)

"${UV_BIN[@]}" python install 3.12
"${UV_BIN[@]}" venv --python 3.12 .venv
source .venv/bin/activate

"${UV_BIN[@]}" pip install -e .
"${UV_BIN[@]}" pip install vllm==0.11.0 --torch-backend=auto

EXAMPLE_DIR="examples/offline_inference/qwen2_5_omni"
cd "${EXAMPLE_DIR}"

python end2end.py --query-type text \
--no-save-results
55 changes: 35 additions & 20 deletions examples/offline_inference/qwen2_5_omni/end2end.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
with the correct prompt format on Qwen2.5-Omni
"""

import argparse
import os
from typing import NamedTuple

Expand Down Expand Up @@ -195,35 +196,43 @@ def main(args):

omni_outputs = omni_llm.generate(prompts, sampling_params_list)

# Determine output directory: prefer --output-dir; fallback to --output-wav
output_dir = args.output_dir if getattr(args, "output_dir", None) else args.output_wav
os.makedirs(output_dir, exist_ok=True)
output_dir = None
if args.save_results:
# Determine output directory: prefer --output-dir; fallback to --output-wav
output_dir = args.output_dir if getattr(args, "output_dir", None) else args.output_wav
os.makedirs(output_dir, exist_ok=True)
for stage_outputs in omni_outputs:
if stage_outputs.final_output_type == "text":
for output in stage_outputs.request_output:
request_id = int(output.request_id)
text_output = output.outputs[0].text
# Save aligned text file per request
prompt_text = prompts[request_id]["prompt"]
out_txt = os.path.join(output_dir, f"{request_id:05d}.txt")
lines = []
lines.append("Prompt:\n")
lines.append(str(prompt_text) + "\n")
lines.append("vllm_text_output:\n")
lines.append(str(text_output).strip() + "\n")
try:
with open(out_txt, "w", encoding="utf-8") as f:
f.writelines(lines)
except Exception as e:
print(f"[Warn] Failed writing text file {out_txt}: {e}")
print(f"Request ID: {request_id}, Text saved to {out_txt}")
if args.save_results:
# Save aligned text file per request
prompt_text = prompts[request_id]["prompt"]
out_txt = os.path.join(output_dir, f"{request_id:05d}.txt")
lines = []
lines.append("Prompt:\n")
lines.append(str(prompt_text) + "\n")
lines.append("vllm_text_output:\n")
lines.append(str(text_output).strip() + "\n")
try:
with open(out_txt, "w", encoding="utf-8") as f:
f.writelines(lines)
except Exception as e:
print(f"[Warn] Failed writing text file {out_txt}: {e}")
print(f"Request ID: {request_id}, Text saved to {out_txt}")
else:
print(f"Request ID: {request_id}, Text output:\n{text_output}")
elif stage_outputs.final_output_type == "audio":
for output in stage_outputs.request_output:
request_id = int(output.request_id)
audio_tensor = output.multimodal_output["audio"]
output_wav = os.path.join(output_dir, f"output_{output.request_id}.wav")
sf.write(output_wav, audio_tensor.detach().cpu().numpy(), samplerate=24000)
print(f"Request ID: {request_id}, Saved audio to {output_wav}")
if args.save_results:
output_wav = os.path.join(output_dir, f"output_{output.request_id}.wav")
sf.write(output_wav, audio_tensor.detach().cpu().numpy(), samplerate=24000)
print(f"Request ID: {request_id}, Saved audio to {output_wav}")
else:
print(f"Request ID: {request_id}, Received audio output (not saved).")


def parse_args():
Expand Down Expand Up @@ -266,6 +275,12 @@ def parse_args():
default=65536,
help="Threshold for using shared memory in bytes (default: 65536)",
)
parser.add_argument(
"--save-results",
action=argparse.BooleanOptionalAction,
default=True,
help="Save text/audio outputs to files (default: enabled). Use --no-save-results to disable.",
)
parser.add_argument(
"--output-wav",
default="output_audio",
Expand Down