We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 828d0d0 + f56285b commit da2bf89Copy full SHA for da2bf89
evaluation/launch_deepseekr1_fp8.sh
@@ -0,0 +1,22 @@
1
+model=/data/models/Deepseek-R1
2
+TP=8
3
+EP=1
4
+
5
+echo "launching ${model}"
6
+echo "TP=${TP}"
7
+echo "EP=${EP}"
8
9
+python3 -m sglang.launch_server \
10
+ --model-path ${model} \
11
+ --host localhost \
12
+ --port 9000 \
13
+ --tp-size ${TP} \
14
+ --ep-size ${EP} \
15
+ --trust-remote-code \
16
+ --chunked-prefill-size 196608 \
17
+ --mem-fraction-static 0.9 \
18
+ --disable-radix-cache \
19
+ --num-continuous-decode-steps 4 \
20
+ --max-prefill-tokens 196608 \
21
+ --cuda-graph-max-bs 128 \
22
+ 2>&1 | tee log.server.log &
0 commit comments