from sgl_jax.srt.entrypoints.engine import Engine
if __name__ == '__main__':
engine = Engine(model_path = 'Qwen/Qwen-7B-Chat', trust_remote_code = True, dist_init_addr = '0.0.0.0:10011', nnodes = 1 , tp_size = 4, device = 'tpu' ,random_seed = 3, node_rank = 0, mem_fraction_static = 0.4, chunked_prefill_size = 8192, download_dir = '/tmp', dtype = 'bfloat16', precompile_bs_paddings = [64], max_running_requests = 64, skip_server_warmup = True, attention_backend = 'fa',precompile_token_paddings = [8192], page_size = 64 ,log_requests = True, log_requests_level = 3)
output = engine.generate(prompt = ['您好', "hello"], sampling_params = {"n":2, "temperature": 0.7}, return_logprob=True)
print(len(list(output)), output)
Checklist
Describe the bug
rt
Reproduction
Environment
v6e