huggingface · linkofrivia · Feb 22, 2026
diff --git a/chapters/en/chapter2/8.mdx b/chapters/en/chapter2/8.mdx
@@ -166,19 +166,14 @@ cd llama.cpp
 make
 
 # Download the SmolLM2-1.7B-Instruct-GGUF model
-curl -L -O https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/resolve/main/smollm2-1.7b-instruct.Q4_K_M.gguf
+curl -L -o smollm2-1.7b-instruct-q4_k_m.gguf "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/resolve/main/smollm2-1.7b-instruct-q4_k_m.gguf?download=1"
 ```
 
 Then, launch the server (with OpenAI API compatibility):
 
 ```sh
 # Start the server
-./server \
-    -m smollm2-1.7b-instruct.Q4_K_M.gguf \
-    --host 0.0.0.0 \
-    --port 8080 \
-    -c 4096 \
-    --n-gpu-layers 0  # Set to a higher number to use GPU
+./build/bin/llama-server -m /mnt/c/Users/yassi/llama.cpp/smollm2-1.7b-instruct-q4_k_m.gguf --host 0.0.0.0 --port 8080 -c 4096 --n-gpu-layers 0
 ```
 
 Interact with the server using Hugging Face's InferenceClient: