@@ -334,6 +334,7 @@ struct cmd_params {
334334 std::vector<std::vector<float >> tensor_split;
335335 std::vector<std::vector<llama_model_tensor_buft_override>> tensor_buft_overrides;
336336 std::vector<bool > use_mmap;
337+ std::vector<bool > use_direct_io;
337338 std::vector<bool > embeddings;
338339 std::vector<bool > no_op_offload;
339340 std::vector<bool > no_host;
@@ -372,6 +373,7 @@ static const cmd_params cmd_params_defaults = {
372373 /* tensor_split */ { std::vector<float >(llama_max_devices (), 0 .0f ) },
373374 /* tensor_buft_overrides*/ { std::vector<llama_model_tensor_buft_override>{ { nullptr , nullptr } } },
374375 /* use_mmap */ { true },
376+ /* use_direct_io */ { true },
375377 /* embeddings */ { false },
376378 /* no_op_offload */ { false },
377379 /* no_host */ { false },
@@ -449,6 +451,8 @@ static void print_usage(int /* argc */, char ** argv) {
449451 printf (" -dev, --device <dev0/dev1/...> (default: auto)\n " );
450452 printf (" -mmp, --mmap <0|1> (default: %s)\n " ,
451453 join (cmd_params_defaults.use_mmap , " ," ).c_str ());
454+ printf (" -dio, --direct-io <0|1> (default: %s)\n " ,
455+ join (cmd_params_defaults.use_direct_io , " ," ).c_str ());
452456 printf (" -embd, --embeddings <0|1> (default: %s)\n " ,
453457 join (cmd_params_defaults.embeddings , " ," ).c_str ());
454458 printf (" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n " );
@@ -772,6 +776,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
772776 }
773777 auto p = string_split<bool >(argv[i], split_delim);
774778 params.use_mmap .insert (params.use_mmap .end (), p.begin (), p.end ());
779+ } else if (arg == " -dio" || arg == " --direct-io" ) {
780+ if (++i >= argc) {
781+ invalid_param = true ;
782+ break ;
783+ }
784+ auto p = string_split<bool >(argv[i], split_delim);
785+ params.use_direct_io .insert (params.use_direct_io .end (), p.begin (), p.end ());
775786 } else if (arg == " -embd" || arg == " --embeddings" ) {
776787 if (++i >= argc) {
777788 invalid_param = true ;
@@ -1008,6 +1019,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
10081019 if (params.use_mmap .empty ()) {
10091020 params.use_mmap = cmd_params_defaults.use_mmap ;
10101021 }
1022+ if (params.use_direct_io .empty ()) {
1023+ params.use_direct_io = cmd_params_defaults.use_direct_io ;
1024+ }
10111025 if (params.embeddings .empty ()) {
10121026 params.embeddings = cmd_params_defaults.embeddings ;
10131027 }
@@ -1056,6 +1070,7 @@ struct cmd_params_instance {
10561070 std::vector<float > tensor_split;
10571071 std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
10581072 bool use_mmap;
1073+ bool use_direct_io;
10591074 bool embeddings;
10601075 bool no_op_offload;
10611076 bool no_host;
@@ -1067,11 +1082,12 @@ struct cmd_params_instance {
10671082 if (!devices.empty ()) {
10681083 mparams.devices = const_cast <ggml_backend_dev_t *>(devices.data ());
10691084 }
1070- mparams.split_mode = split_mode;
1071- mparams.main_gpu = main_gpu;
1072- mparams.tensor_split = tensor_split.data ();
1073- mparams.use_mmap = use_mmap;
1074- mparams.no_host = no_host;
1085+ mparams.split_mode = split_mode;
1086+ mparams.main_gpu = main_gpu;
1087+ mparams.tensor_split = tensor_split.data ();
1088+ mparams.use_mmap = use_mmap;
1089+ mparams.use_direct_io = use_direct_io;
1090+ mparams.no_host = no_host;
10751091
10761092 if (n_cpu_moe <= 0 ) {
10771093 if (tensor_buft_overrides.empty ()) {
@@ -1115,7 +1131,8 @@ struct cmd_params_instance {
11151131 bool equal_mparams (const cmd_params_instance & other) const {
11161132 return model == other.model && n_gpu_layers == other.n_gpu_layers && n_cpu_moe == other.n_cpu_moe &&
11171133 split_mode == other.split_mode &&
1118- main_gpu == other.main_gpu && use_mmap == other.use_mmap && tensor_split == other.tensor_split &&
1134+ main_gpu == other.main_gpu && tensor_split == other.tensor_split &&
1135+ use_mmap == other.use_mmap && use_direct_io == other.use_direct_io &&
11191136 devices == other.devices &&
11201137 no_host == other.no_host &&
11211138 vec_tensor_buft_override_equal (tensor_buft_overrides, other.tensor_buft_overrides );
@@ -1153,6 +1170,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
11531170 for (const auto & ts : params.tensor_split )
11541171 for (const auto & ot : params.tensor_buft_overrides )
11551172 for (const auto & mmp : params.use_mmap )
1173+ for (const auto & dio : params.use_direct_io )
11561174 for (const auto & noh : params.no_host )
11571175 for (const auto & embd : params.embeddings )
11581176 for (const auto & nopo : params.no_op_offload )
@@ -1194,6 +1212,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
11941212 /* .tensor_split = */ ts,
11951213 /* .tensor_buft_overrides = */ ot,
11961214 /* .use_mmap = */ mmp,
1215+ /* .use_direct_io= */ dio,
11971216 /* .embeddings = */ embd,
11981217 /* .no_op_offload= */ nopo,
11991218 /* .no_host = */ noh,
@@ -1228,6 +1247,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
12281247 /* .tensor_split = */ ts,
12291248 /* .tensor_buft_overrides = */ ot,
12301249 /* .use_mmap = */ mmp,
1250+ /* .use_direct_io= */ dio,
12311251 /* .embeddings = */ embd,
12321252 /* .no_op_offload= */ nopo,
12331253 /* .no_host = */ noh,
@@ -1262,6 +1282,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
12621282 /* .tensor_split = */ ts,
12631283 /* .tensor_buft_overrides = */ ot,
12641284 /* .use_mmap = */ mmp,
1285+ /* .use_direct_io= */ dio,
12651286 /* .embeddings = */ embd,
12661287 /* .no_op_offload= */ nopo,
12671288 /* .no_host = */ noh,
@@ -1301,6 +1322,7 @@ struct test {
13011322 std::vector<float > tensor_split;
13021323 std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
13031324 bool use_mmap;
1325+ bool use_direct_io;
13041326 bool embeddings;
13051327 bool no_op_offload;
13061328 bool no_host;
@@ -1338,6 +1360,7 @@ struct test {
13381360 tensor_split = inst.tensor_split ;
13391361 tensor_buft_overrides = inst.tensor_buft_overrides ;
13401362 use_mmap = inst.use_mmap ;
1363+ use_direct_io = inst.use_direct_io ;
13411364 embeddings = inst.embeddings ;
13421365 no_op_offload = inst.no_op_offload ;
13431366 no_host = inst.no_host ;
@@ -1397,9 +1420,9 @@ struct test {
13971420 " n_ubatch" , " n_threads" , " cpu_mask" , " cpu_strict" , " poll" ,
13981421 " type_k" , " type_v" , " n_gpu_layers" , " n_cpu_moe" , " split_mode" ,
13991422 " main_gpu" , " no_kv_offload" , " flash_attn" , " devices" , " tensor_split" ,
1400- " tensor_buft_overrides" , " use_mmap" , " embeddings " , " no_op_offload " ,
1401- " no_host" , " n_prompt" , " n_gen" , " n_depth" , " test_time " ,
1402- " avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts"
1423+ " tensor_buft_overrides" , " use_mmap" , " use_direct_io " , " embeddings " ,
1424+ " no_op_offload " , " no_host" , " n_prompt" , " n_gen" , " n_depth" ,
1425+ " test_time " , " avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts"
14031426 };
14041427 return fields;
14051428 }
@@ -1414,7 +1437,7 @@ struct test {
14141437 return INT;
14151438 }
14161439 if (field == " f16_kv" || field == " no_kv_offload" || field == " cpu_strict" || field == " flash_attn" ||
1417- field == " use_mmap" || field == " embeddings" || field == " no_host" ) {
1440+ field == " use_mmap" || field == " use_direct_io " || field == " embeddings" || field == " no_host" ) {
14181441 return BOOL;
14191442 }
14201443 if (field == " avg_ts" || field == " stddev_ts" ) {
@@ -1487,6 +1510,7 @@ struct test {
14871510 tensor_split_str,
14881511 tensor_buft_overrides_str,
14891512 std::to_string (use_mmap),
1513+ std::to_string (use_direct_io),
14901514 std::to_string (embeddings),
14911515 std::to_string (no_op_offload),
14921516 std::to_string (no_host),
@@ -1672,6 +1696,9 @@ struct markdown_printer : public printer {
16721696 if (field == " use_mmap" ) {
16731697 return 4 ;
16741698 }
1699+ if (field == " use_direct_io" ) {
1700+ return 3 ;
1701+ }
16751702 if (field == " test" ) {
16761703 return 15 ;
16771704 }
@@ -1709,6 +1736,9 @@ struct markdown_printer : public printer {
17091736 if (field == " use_mmap" ) {
17101737 return " mmap" ;
17111738 }
1739+ if (field == " use_direct_io" ) {
1740+ return " dio" ;
1741+ }
17121742 if (field == " embeddings" ) {
17131743 return " embd" ;
17141744 }
@@ -1793,6 +1823,9 @@ struct markdown_printer : public printer {
17931823 if (params.use_mmap .size () > 1 || params.use_mmap != cmd_params_defaults.use_mmap ) {
17941824 fields.emplace_back (" use_mmap" );
17951825 }
1826+ if (params.use_direct_io .size () > 1 || params.use_direct_io != cmd_params_defaults.use_direct_io ) {
1827+ fields.emplace_back (" use_direct_io" );
1828+ }
17961829 if (params.embeddings .size () > 1 || params.embeddings != cmd_params_defaults.embeddings ) {
17971830 fields.emplace_back (" embeddings" );
17981831 }
0 commit comments