diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f6c52b2a7f..13dd918282b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause -cmake_minimum_required(VERSION 3.18 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18) project(nvfuser) + enable_language(CUDA) cmake_policy(SET CMP0063 NEW) # make symbol visibility always apply @@ -25,6 +26,7 @@ set(NVFUSER_THIRD_PARTY_DIR "${NVFUSER_ROOT}/third_party") option(NVFUSER_STANDALONE_BUILD_WITH_UCC "" OFF) option(NVFUSER_EXPLICIT_ERROR_CHECK "" OFF) +option(NVFUSER_ENABLE_DEPENDENCY_REPORT "Enable Python-based dependency reporting and log capture" ON) if(NVFUSER_EXPLICIT_ERROR_CHECK) add_compile_definitions(NVFUSER_EXPLICIT_ERROR_CHECK) @@ -41,13 +43,6 @@ if(NVFUSER_DISTRIBUTED) add_compile_definitions(NVFUSER_DISTRIBUTED) endif() -message(STATUS "Setting NVFUSER_DISTRIBUTED=${NVFUSER_DISTRIBUTED}") - -# We try to update which C++ standard we use together in lockstep across all -# built libraries, and these variables control which that is. Generally we are -# on C++20, but we still support a version of CUDA (11) that does not recognize -# C++20 and so we drop back to 17 there. Also, we allow all of these to be -# overridden by the user. # Note we do not use a global set_property on e.g. CXX_STANDARD. CMake globals # are footguns that should generally be avoided, because they are difficult to # target where and *only* where they are needed. See e.g.: @@ -56,144 +51,72 @@ set(NVFUSER_C_STANDARD 20 CACHE STRING "C standard to use for C code") set(NVFUSER_CPP_STANDARD 20 CACHE STRING "C++ standard to use for C++ code") set(NVFUSER_CUDA_STANDARD 17 CACHE STRING "C++ standard to use for CUDA code") -if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - # TODO: gcc 11.4 has been end of life according to https://gcc.gnu.org/ - # I believe we should bump up the version below to 12.x. - # However, because gcc 11.4 is well tested and stable, let's defer this - # rejection until the day that we find a bug in gcc 11.4. - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.4) - message(FATAL_ERROR "GCC < 11.4 has compiler bugs and can not compile nvFuser.") - endif() -endif() - string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi") -# -------------------------- -# nvMatmulHeuristics support -# -------------------------- -set(NVMMH_INCLUDE_DIR "NVMMH_INCLUDE_DIR-NOTFOUND" CACHE PATH "Directory containing nvMatmulHeuristics.h") -if (NOT IS_DIRECTORY "${NVMMH_INCLUDE_DIR}") - find_path(NVMMH_INCLUDE_DIR nvMatmulHeuristics.h - PATHS /usr/local/lib/python3.12/dist-packages/nvidia/nvMatmulHeuristics/include $ENV{HOME}/.local/lib/python3.12/dist-packages/nvidia/nvMatmulHeuristics/include) +# ----------------------------------------------------------------------------- +# nvFuser Requirements & Dependencies +# ----------------------------------------------------------------------------- +include(cmake/DependencyRequirements.cmake) +include(cmake/DependencyUtilities.cmake) +include(cmake/LogCapture.cmake) + +# Include all dependency handler functions +include(cmake/deps/handle_ninja.cmake) +include(cmake/deps/handle_compiler.cmake) +include(cmake/deps/handle_python.cmake) +include(cmake/deps/handle_cuda_toolkit.cmake) +include(cmake/deps/handle_torch.cmake) +include(cmake/deps/handle_pybind11.cmake) +include(cmake/deps/handle_llvm.cmake) +include(cmake/deps/handle_nvmmh.cmake) +include(cmake/deps/handle_git_submodules.cmake) + +# Initialize success flag +set(NVFUSER_DEPENDENCIES_OK TRUE) + +if(NVFUSER_ENABLE_DEPENDENCY_REPORT) + start_capture() endif() -if(IS_DIRECTORY "${NVMMH_INCLUDE_DIR}") - set(NVMMH_FOUND TRUE) - string(APPEND CMAKE_CXX_FLAGS " -DHAS_NVMMH=1") -else() - set(NVMMH_FOUND FALSE) - message(WARNING "nvMatmulHeuristics headers not found – building without nvMatmulHeuristics support") +# Handle each dependency explicitly +handle_compiler() +handle_git_submodules() +handle_ninja() +handle_cuda_toolkit() +handle_python() +handle_torch() # Must come AFTER python and cudatoolkit. +handle_pybind11() +handle_llvm() +handle_nvmmh() # Must come AFTER python to query correct site-packages + +if(NVFUSER_ENABLE_DEPENDENCY_REPORT) + stop_capture(DEP_LOGS) + + # Show the dependency report (Python-based) + report_dependencies() + + # Dump detailed logs + message("") + message(STATUS "******** Detailed Dependency Output ********") + dump_captured_logs("${DEP_LOGS}") + message(STATUS "******** End Dependency Output ********") + message("") endif() -# -------------------------- - -find_package(Torch REQUIRED) -find_package(Python REQUIRED Development.Module Interpreter) -find_package(pybind11 REQUIRED) -find_package(CUDAToolkit REQUIRED) - -# need this since the pytorch execution uses a different name -set(PYTHON_EXECUTABLE ${Python_EXECUTABLE}) - -# CXX flags is necessary since https://github.com/pytorch/pytorch/issues/98093 -string(APPEND CMAKE_CXX_FLAGS " ${TORCH_CXX_FLAGS}") -include(cmake/FlatBuffers.cmake) -include(cmake/Dependencies.cmake) - -# set CUDA_ARCH for cu tests. -if(TORCH_CUDA_ARCH_LIST) - set(ARCH_FLAGS) - cuda_select_nvcc_arch_flags(ARCH_FLAGS ${TORCH_CUDA_ARCH_LIST}) - list(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) -endif() - -add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/lib/dynamic_type) - -set(CUTLASS_STATUS "N/A") -if(BUILD_CUTLASS) - enable_language(CUDA) - - if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.8) - message(WARNING "Skip building CUTLASS because of incompatible CUDA ${CMAKE_CUDA_COMPILER_VERSION}") - set(CUTLASS_STATUS "DISABLED") - else() - add_compile_definitions(NVFUSER_ENABLE_CUTLASS) - set(CUTLASS_STATUS "ENABLED") - - find_package(CUDAToolkit REQUIRED) - - set(NVF_CUTLASS_CUDA_FLAGS - "-DCUTE_USE_PACKED_TUPLE=1" - "-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1" - "-DCUTLASS_VERSIONS_GENERATED" - "-DCUTLASS_TEST_LEVEL=0" - "-DCUTLASS_TEST_ENABLE_CACHED_RESULTS=1" - "-DCUTLASS_DEBUG_TRACE_LEVEL=0" - "--expt-relaxed-constexpr" - "--expt-extended-lambda" - # Equivalent to --threads= - "--threads=0" - # ----------------- - # Suppress warnings - # ----------------- - "-Xcompiler=-Wconversion" - "-Xcompiler=-fno-strict-aliasing" - # CUDA 13 has deprecated old vector types such as ulong4: https://developer.nvidia.com/blog/whats-new-and-important-in-cuda-toolkit-13-0 - "-Xcompiler=-Wno-deprecated-declarations" - ) - set(NVFUSER_CUTLASS_SRCS) - list(APPEND NVFUSER_CUTLASS_SRCS - ${NVFUSER_CUTLASS}/group_mm.cu - ${NVFUSER_CUTLASS}/mxfp8_scaled_mm.cu - ${NVFUSER_CUTLASS}/nvfp4_scaled_mm.cu - ${NVFUSER_CUTLASS}/nvfp4_scaled_mm_blockscale.cu - ${NVFUSER_CUTLASS}/nvfp4_scaled_group_mm.cu - ${NVFUSER_CUTLASS}/nvf_cutlass.cpp - ${NVFUSER_CUTLASS}/cutlass_utils.cpp - ) - add_library(nvf_cutlass SHARED ${NVFUSER_CUTLASS_SRCS}) +# Check if any required dependencies failed +if(NOT NVFUSER_DEPENDENCIES_OK) + message(FATAL_ERROR "Please install or upgrade the required dependencies listed above.") +endif() - target_include_directories(nvf_cutlass PRIVATE ${NVFUSER_THIRD_PARTY_DIR}/cutlass/include) - target_include_directories(nvf_cutlass PRIVATE ${NVFUSER_THIRD_PARTY_DIR}/cutlass/tools/util/include) - target_compile_options(nvf_cutlass PRIVATE $<$:${NVF_CUTLASS_CUDA_FLAGS}>) - if(NOT MSVC) - set(NVF_LIB_SUFFIX ".so") - else() - set(NVF_LIB_SUFFIX ".pyd") - endif() +# Setup submodule related logic +include(cmake/Submodules.cmake) +# ----------------------------------------------------------------------------- - target_include_directories(nvf_cutlass PUBLIC - "$" - "$" - "$" - ) - target_link_libraries(nvf_cutlass PRIVATE "${TORCH_LIBRARIES}" c10) - set_target_properties(nvf_cutlass PROPERTIES - C_STANDARD ${NVFUSER_C_STANDARD} - CUDA_STANDARD ${NVFUSER_CUDA_STANDARD} - CXX_STANDARD ${NVFUSER_CPP_STANDARD} - CXX_STANDARD_REQUIRED ON - CXX_VISIBILITY_PRESET hidden - INSTALL_RPATH - "$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN/../../nvidia/cuda_cupti/lib:$ORIGIN/../../torch/lib" - POSITION_INDEPENDENT_CODE Yes - VISIBILITY_INLINES_HIDDEN Yes - CUDA_ARCHITECTURES "100a" - ) - # Our CUTLASS kernels require substantially more memory to compile—up to 6 - # GB per file. To avoid exhausting system memory, it's helpful to limit - # concurrency specifically for these kernels, without throttling other - # compilation units. A pool of size 2 works well in practice: there are - # only a handful of CUTLASS kernel files, so this restriction has little - # impact on overall build time. - set(CUTLASS_MAX_JOBS 0 CACHE STRING "Max concurrent CUTLASS CUDA compiles (0 = no limit)") - if(CUTLASS_MAX_JOBS GREATER 0) - set_property(GLOBAL PROPERTY JOB_POOLS cutlass=${CUTLASS_MAX_JOBS}) - set_target_properties(nvf_cutlass PROPERTIES JOB_POOL_COMPILE cutlass) - endif() - install(TARGETS nvf_cutlass EXPORT NvfuserTargets DESTINATION lib) +add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/lib/dynamic_type) - endif() +if(NVFUSER_USE_CUTLASS) + add_compile_definitions(NVFUSER_ENABLE_CUTLASS) + add_subdirectory(cutlass) endif() # ------------------------------ @@ -439,25 +362,6 @@ list(APPEND NVFUSER_SRCS ${NVFUSER_SRCS_DIR}/validator_utils.cpp ) -# Add LLVM JIT related dependencies -set(LLVM_MINIMUM_VERSION "18.1") -find_package(LLVM REQUIRED CONFIG) -if(${LLVM_VERSION} VERSION_LESS ${LLVM_MINIMUM_VERSION}) - message(FATAL_ERROR "LLVM ${LLVM_VERSION} does not meet the minimum version required: ${LLVM_MINIMUM_VERSION}") -endif() -llvm_map_components_to_libnames(LLVM_LIBS - support - core - orcjit - executionengine - irreader - nativecodegen - Target - Analysis - JITLink - Demangle -) - add_library(LLVM_JIT INTERFACE) target_include_directories(LLVM_JIT SYSTEM INTERFACE ${LLVM_INCLUDE_DIRS}) target_compile_definitions(LLVM_JIT INTERFACE ${LLVM_DEFINITIONS}) @@ -559,7 +463,7 @@ target_link_libraries(codegen_internal PUBLIC ${TORCH_LIBRARIES} dl ) -if (BUILD_CUTLASS AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) +if (NVFUSER_USE_CUTLASS) target_link_libraries(codegen_internal PUBLIC nvf_cutlass) target_compile_definitions(codegen_internal PUBLIC "-DNVFUSER_CUTLASS_KERNEL_ENABLED") endif() @@ -586,13 +490,13 @@ if(NVFUSER_USE_PCH) target_precompile_headers(nvfuser_codegen REUSE_FROM codegen_internal) endif() -if (BUILD_CUTLASS AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) +if (NVFUSER_USE_CUTLASS) target_link_libraries(nvfuser_codegen PUBLIC nvf_cutlass) endif() # Conditionally link CUTLASS using generator expression to avoid export issues target_link_libraries(nvfuser_codegen PRIVATE - $<$,$>:nvf_cutlass> + $<$,$>:nvf_cutlass> ) if(NVFUSER_BUILD_WITH_ASAN) @@ -726,9 +630,9 @@ if(BUILD_PYTHON) add_custom_command( OUTPUT ${NVFUSER_PYTHON_DIR}/nvfuser/version.py COMMAND - "${PYTHON_EXECUTABLE}" -c \"from pathlib import Path\; Path('${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py') .touch() \" + "${Python_EXECUTABLE}" -c \"from pathlib import Path\; Path('${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py') .touch() \" COMMAND - "${PYTHON_EXECUTABLE}" ${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py nvfuser + "${Python_EXECUTABLE}" ${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py nvfuser DEPENDS ${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py DEPENDS ${NVFUSER_PYTHON_DIR}/version.txt WORKING_DIRECTORY ${NVFUSER_PYTHON_DIR}/tools/ @@ -791,12 +695,12 @@ if(BUILD_PYTHON) VISIBILITY_INLINES_HIDDEN Yes ) - if (BUILD_CUTLASS AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) + if (NVFUSER_USE_CUTLASS) target_link_libraries(nvf_py_internal PRIVATE nvf_cutlass) endif() if (NOT MSVC) - target_link_libraries(nvf_py_internal PRIVATE CUDA::cupti) + target_link_libraries(nvf_py_internal PRIVATE CUDA::cupti) endif() target_link_libraries(nvf_py_internal PRIVATE @@ -853,9 +757,9 @@ if(BUILD_PYTHON) add_custom_command( OUTPUT ${NVFUSER_PYTHON_DIR}/nvfuser_direct/version.py COMMAND - "${PYTHON_EXECUTABLE}" -c \"from pathlib import Path\; Path('${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py') .touch() \" + "${Python_EXECUTABLE}" -c \"from pathlib import Path\; Path('${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py') .touch() \" COMMAND - "${PYTHON_EXECUTABLE}" ${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py nvfuser_direct + "${Python_EXECUTABLE}" ${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py nvfuser_direct DEPENDS ${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py DEPENDS ${NVFUSER_PYTHON_DIR}/version.txt WORKING_DIRECTORY ${NVFUSER_PYTHON_DIR}/tools/ @@ -922,21 +826,14 @@ if(BUILD_PYTHON) target_include_directories(nvf_py_direct_internal PUBLIC ${NVFUSER_PYTHON_DIRECT_BINDINGS}) target_include_directories(nvf_py_direct_internal PUBLIC ${NVFUSER_PYTHON_COMMON}) - if (BUILD_CUTLASS AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) - target_link_libraries(nvf_py_direct_internal PRIVATE - nvfuser_codegen - nvf_cutlass - "${TORCH_INSTALL_PREFIX}/lib/libtorch_python.so" - pybind11::pybind11 pybind11::headers - CUDA::cupti - ) - else() - target_link_libraries(nvf_py_direct_internal PRIVATE - nvfuser_codegen - "${TORCH_INSTALL_PREFIX}/lib/libtorch_python.so" - pybind11::pybind11 pybind11::headers - CUDA::cupti - ) + target_link_libraries(nvf_py_direct_internal PRIVATE + nvfuser_codegen + "${TORCH_INSTALL_PREFIX}/lib/libtorch_python.so" + pybind11::pybind11 pybind11::headers + CUDA::cupti + ) + if (NVFUSER_USE_CUTLASS) + target_link_libraries(nvf_py_direct_internal PRIVATE nvf_cutlass) endif() target_link_libraries(nvfuser_direct PRIVATE @@ -1301,7 +1198,7 @@ if(BUILD_TEST) ${NVFUSER_ROOT}/tests/cpp/test_matmul_scheduler.cpp ${NVFUSER_ROOT}/tests/cpp/test_mma.cpp ) - if (BUILD_CUTLASS AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) + if (NVFUSER_USE_CUTLASS) list(APPEND MATMUL_TEST_SRCS ${NVFUSER_ROOT}/tests/cpp/test_cutlass_scheduler.cpp ) @@ -1518,7 +1415,7 @@ foreach(src ${NVFUSER_RUNTIME_FILES}) COMMENT "Stringify NVFUSER runtime source file ${src}" OUTPUT ${dst} DEPENDS ${src} "${NVFUSER_STRINGIFY_TOOL}" - COMMAND ${PYTHON_EXECUTABLE} ${NVFUSER_STRINGIFY_TOOL} -i ${src} -o ${dst} + COMMAND ${Python_EXECUTABLE} ${NVFUSER_STRINGIFY_TOOL} -i ${src} -o ${dst} ) add_custom_target(nvfuser_rt_${filename} DEPENDS ${dst}) add_dependencies(codegen_internal nvfuser_rt_${filename}) @@ -1528,7 +1425,7 @@ foreach(src ${NVFUSER_RUNTIME_FILES}) if(${src} IS_NEWER_THAN ${dst}) # also generate the resource headers during the configuration step # (so tools like clang-tidy can run w/o requiring a real build) - execute_process(COMMAND ${PYTHON_EXECUTABLE} ${NVFUSER_STRINGIFY_TOOL} -i ${src} -o ${dst}) + execute_process(COMMAND ${Python_EXECUTABLE} ${NVFUSER_STRINGIFY_TOOL} -i ${src} -o ${dst}) endif() endforeach() @@ -1543,9 +1440,11 @@ file(CREATE_LINK "${CMAKE_BINARY_DIR}" "${NVFUSER_ROOT}/bin" SYMBOLIC) file(CREATE_LINK "${NVFUSER_ROOT}/python/nvfuser" "${NVFUSER_ROOT}/nvfuser" SYMBOLIC) file(CREATE_LINK "${NVFUSER_ROOT}/python/nvfuser_direct" "${NVFUSER_ROOT}/nvfuser_direct" SYMBOLIC) -message(STATUS "") message(STATUS "******** Nvfuser configuration summary ********") -message(STATUS " BUILD_CUTLASS: ${CUTLASS_STATUS}") +message(STATUS " NVFUSER_USE_CUTLASS : ${NVFUSER_USE_CUTLASS}") +if(NVFUSER_USE_CUTLASS) + message(STATUS " CUTLASS_MAX_JOBS : ${CUTLASS_MAX_JOBS}") +endif() message(STATUS " UCC_FOUND: ${UCC_FOUND}") if(NVFUSER_STANDALONE_BUILD_WITH_UCC) message(STATUS " UCC_HOME: $ENV{UCC_HOME}") @@ -1557,6 +1456,7 @@ message(STATUS " NVFUSER_STANDALONE_BUILD_WITH_UCC : ${NVFUSER_STANDALONE_BUIL message(STATUS " NVFUSER_BUILD_WITH_ASAN : ${NVFUSER_BUILD_WITH_ASAN}") message(STATUS " NVFUSER_DISTRIBUTED : ${NVFUSER_DISTRIBUTED}") message(STATUS " NVFUSER_CPP_STANDARD : ${NVFUSER_CPP_STANDARD}") -message(STATUS " NVMMH_INCLUDE_DIR : ${NVMMH_INCLUDE_DIR}") +message(STATUS " NVMMH_FOUND : ${NVMMH_FOUND}") message(STATUS " NVFUSER_USE_PCH : ${NVFUSER_USE_PCH}") message(STATUS "******** End of Nvfuser configuration summary ********") +message("") diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake deleted file mode 100644 index 0cdafaeedb7..00000000000 --- a/cmake/Dependencies.cmake +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause - -# For gtest, we will simply embed it into our test binaries, so we will not need to install it. -set(INSTALL_GTEST OFF CACHE BOOL "Install gtest." FORCE) -set(BUILD_GMOCK ON CACHE BOOL "Build gmock." FORCE) -set(gtest_hide_internal_symbols ON CACHE BOOL "Use symbol visibility" FORCE) - -add_subdirectory(${CMAKE_SOURCE_DIR}/third_party/googletest) - -# We will not need to test benchmark lib itself. -set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.") -# We will not need to install benchmark since we link it statically. -set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "Disable benchmark install to avoid overwriting vendor install.") - -if(NOT USE_SYSTEM_BENCHMARK) - add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/benchmark) -else() - add_library(benchmark SHARED IMPORTED) - find_library(BENCHMARK_LIBRARY benchmark) - if(NOT BENCHMARK_LIBRARY) - message(FATAL_ERROR "Cannot find google benchmark library") - endif() - message("-- Found benchmark: ${BENCHMARK_LIBRARY}") - set_property(TARGET benchmark PROPERTY IMPORTED_LOCATION ${BENCHMARK_LIBRARY}) -endif() - -# Cacheing variables to enable incremental build. -# Without this is cross compiling we end up having to blow build directory -# and rebuild from scratch. -if(CMAKE_CROSSCOMPILING) - if(COMPILE_HAVE_STD_REGEX) - set(RUN_HAVE_STD_REGEX 0 CACHE INTERNAL "Cache RUN_HAVE_STD_REGEX output for cross-compile.") - endif() -endif() diff --git a/cmake/DependencyRequirements.cmake b/cmake/DependencyRequirements.cmake new file mode 100644 index 00000000000..6b941dc4c62 --- /dev/null +++ b/cmake/DependencyRequirements.cmake @@ -0,0 +1,45 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ============================================================================== +# nvFuser Dependency Requirements +# ============================================================================== +# +# This file centralizes all dependency requirement metadata for nvFuser. +# Each requirement entry contains: +# - VERSION_MIN: Minimum version required (can be empty for "any version") +# - OPTIONAL: TRUE/FALSE (default FALSE) +# - COMPONENTS: Components required (for find_package, semicolon-separated) +# +# ============================================================================== + +# Ninja +set(NVFUSER_REQUIREMENT_Ninja_OPTIONAL "TRUE") + +# Compiler (GCC or Clang) +set(NVFUSER_REQUIREMENT_GNU_VERSION_MIN "13.1") +set(NVFUSER_REQUIREMENT_Clang_VERSION_MIN "19") + +# Python +set(NVFUSER_REQUIREMENT_Python_VERSION_MIN "3.10") +set(NVFUSER_REQUIREMENT_Python_COMPONENTS "Interpreter;Development.Module") + +# Torch +set(NVFUSER_REQUIREMENT_Torch_VERSION_MIN "2.9") + +# pybind11 +set(NVFUSER_REQUIREMENT_pybind11_VERSION_MIN "3.0") + +# CUDAToolkit +set(NVFUSER_REQUIREMENT_CUDAToolkit_VERSION_MIN "12.6") +set(NVFUSER_REQUIREMENT_CUDAToolkit_COMPONENTS "Cupti;cuda_driver") + +# LLVM +set(NVFUSER_REQUIREMENT_LLVM_VERSION_MIN "18.1") + +# NVMMH +set(NVFUSER_REQUIREMENT_NVMMH_OPTIONAL "TRUE") + +# Git Submodules (required for build) +# No version requirement - just checks if submodules are initialized diff --git a/cmake/DependencyUtilities.cmake b/cmake/DependencyUtilities.cmake new file mode 100644 index 00000000000..45125b557e5 --- /dev/null +++ b/cmake/DependencyUtilities.cmake @@ -0,0 +1,139 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ============================================================================== +# nvFuser Dependency Utilities +# ============================================================================== +# +# This file provides utilities for dependency status tracking and reporting. +# Dependency metadata is defined in DependencyRequirements.cmake +# Individual dependency handlers are in cmake/deps/ +# +# ============================================================================== + +# -------------------------- +# Conditionally add REQUIRED to find_package calls +# -------------------------- + +# When dependency report is enabled, we don't use REQUIRED so we can collect +# all failures and show a comprehensive Python report. +# When disabled, we use REQUIRED for immediate failure with standard CMake errors. +if(NVFUSER_ENABLE_DEPENDENCY_REPORT) + set(MAYBE_REQUIRED "") +else() + set(MAYBE_REQUIRED "REQUIRED") +endif() + +# -------------------------- +# Status Tracking for JSON Export +# -------------------------- + +# Set dependency status based on found state and version check +macro(set_dependency_report_status name) + set(optional "${NVFUSER_REQUIREMENT_${name}_OPTIONAL}") + + if(${name}_FOUND) + # Check version compatibility + set(min_version "${NVFUSER_REQUIREMENT_${name}_VERSION_MIN}") + set(version "${${name}_VERSION}") + + if(DEFINED min_version AND NOT "${min_version}" STREQUAL "") + if("${version}" VERSION_GREATER_EQUAL "${min_version}") + set(NVFUSER_REQUIREMENT_${name}_STATUS "SUCCESS") + else() + set(NVFUSER_REQUIREMENT_${name}_STATUS "INCOMPATIBLE") + # Mark dependencies as failed if this is a required dependency + if(NOT optional) + set(NVFUSER_DEPENDENCIES_OK FALSE) + endif() + endif() + else() + set(NVFUSER_REQUIREMENT_${name}_STATUS "SUCCESS") + endif() + else() + set(NVFUSER_REQUIREMENT_${name}_STATUS "NOT_FOUND") + # Mark dependencies as failed if this is a required dependency + if(NOT optional) + set(NVFUSER_DEPENDENCIES_OK FALSE) + endif() + endif() +endmacro() + +# -------------------------- +# Python Export for Dependency Reporting +# -------------------------- + +function(export_dependency_json output_file) + # Get all CMake variables + get_cmake_property(all_vars VARIABLES) + + # Write JSON file with flat variable dict + file(WRITE "${output_file}" "{\n") + file(APPEND "${output_file}" " \"cmake_vars\": {\n") + + # Export all variables (sorted for consistency) + list(SORT all_vars) + list(LENGTH all_vars var_count) + set(var_index 0) + foreach(var ${all_vars}) + set(value "${${var}}") + # Escape for JSON strings + string(REPLACE "\\" "\\\\" value "${value}") + string(REPLACE "\"" "\\\"" value "${value}") + string(REPLACE "\n" "\\n" value "${value}") + string(REPLACE "\t" "\\t" value "${value}") + string(REPLACE "\r" "\\r" value "${value}") + + # Add comma if not last item + math(EXPR var_index "${var_index} + 1") + if(var_index LESS var_count) + file(APPEND "${output_file}" " \"${var}\": \"${value}\",\n") + else() + file(APPEND "${output_file}" " \"${var}\": \"${value}\"\n") + endif() + endforeach() + + file(APPEND "${output_file}" " }\n") + file(APPEND "${output_file}" "}\n") +endfunction() + +# -------------------------- +# Report Dependencies (Python-based with fallback) +# -------------------------- + +macro(report_dependencies) + # Export dependency data to JSON with error handling + set(json_file "${CMAKE_BINARY_DIR}/nvfuser_dependencies.json") + export_dependency_json("${json_file}") + if(NOT EXISTS "${json_file}") + message(WARNING "Failed to export dependency data to ${json_file} - skipping enhanced dependency report") + else() + + # Try to use Python script for enhanced reporting + set(python_script "${CMAKE_SOURCE_DIR}/python/tools/check_dependencies.py") + + if(NOT EXISTS "${python_script}") + message(WARNING "Python reporting script not found: ${python_script}") + elseif(NOT DEFINED Python_EXECUTABLE OR NOT Python_FOUND) + message(WARNING "Python is not available - skipping enhanced dependency report") + else() + # Run Python reporting script + execute_process( + COMMAND ${Python_EXECUTABLE} ${python_script} ${json_file} + RESULT_VARIABLE python_result + OUTPUT_VARIABLE python_output + ERROR_VARIABLE python_error + ) + + if(NOT python_result EQUAL 0) + message(WARNING "Python reporting failed (${python_result}): ${python_error} To reproduce: ${Python_EXECUTABLE} ${python_script} ${json_file}") + else() + # Display Python output + message("${python_output}") + endif() + endif() + + endif() + +endmacro() diff --git a/cmake/FlatBuffers.cmake b/cmake/FlatBuffers.cmake deleted file mode 100644 index e17728dec8d..00000000000 --- a/cmake/FlatBuffers.cmake +++ /dev/null @@ -1,10 +0,0 @@ - -set(FlatBuffers_Src_Dir ${PROJECT_SOURCE_DIR}/third_party/flatbuffers) - -option(FLATBUFFERS_BUILD_TESTS "Enable the build of tests and samples." OFF) -option(FLATBUFFERS_BUILD_FLATC "Enable the build of the flatbuffers compiler" ON) -option(FLATBUFFERS_STATIC_FLATC "Build flatbuffers compiler with -static flag" OFF) -option(FLATBUFFERS_BUILD_FLATHASH "Enable the build of flathash" OFF) - -# Add FlatBuffers directly to our build. This defines the `flatbuffers` target. -add_subdirectory(${FlatBuffers_Src_Dir}) diff --git a/cmake/LogCapture.cmake b/cmake/LogCapture.cmake new file mode 100644 index 00000000000..391f7f3362f --- /dev/null +++ b/cmake/LogCapture.cmake @@ -0,0 +1,90 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ============================================================================== +# nvFuser Log Capture Utilities +# ============================================================================== + +# Global switch to control logging behavior +set(LOG_CAPTURE_MODE FALSE CACHE INTERNAL "") +set(GLOBAL_LOG_BUFFER "" CACHE INTERNAL "") + +# 1. Override message() ONCE for the whole project. +# This acts as a "Gatekeeper" for BOTH capture and suppression. +function(message) + # Handle empty lines + if(NOT ARGV) + if(LOG_CAPTURE_MODE) + # Store empty line marker when capturing + set_property(GLOBAL APPEND PROPERTY GLOBAL_LOG_BUFFER "EMPTY_LINE") + elseif(NOT SUPPRESS_MESSAGE_OUTPUT) + # Print empty line when not suppressing + _message("") + endif() + return() + endif() + + # Get the message type (STATUS, WARNING, FATAL_ERROR, etc.) + list(GET ARGV 0 type) + + # Pass through FATAL_ERROR and SEND_ERROR immediately (Fail Fast) + if(type STREQUAL "FATAL_ERROR" OR type STREQUAL "SEND_ERROR") + _message(${ARGV}) + return() + endif() + + # Logic: Capture, Suppress, or Print? + if(LOG_CAPTURE_MODE) + # CAPTURE MODE: Store type and content separately + # Remove the type from ARGV to get just the content + set(_argv_copy ${ARGV}) + list(REMOVE_AT _argv_copy 0) + string(JOIN " " msg_content ${_argv_copy}) + # Use a delimiter (<<>>) to separate type from content + set_property(GLOBAL APPEND PROPERTY GLOBAL_LOG_BUFFER "${type}<<>>${msg_content}") + elseif(SUPPRESS_MESSAGE_OUTPUT) + # SUPPRESS MODE: Block all non-critical messages (already handled errors above) + # Do nothing + else() + # NORMAL MODE: Pass through to internal CMake message + _message(${ARGV}) + endif() +endfunction() + +# 2. Macros to control the switch +macro(start_capture) + set(LOG_CAPTURE_MODE TRUE) + set_property(GLOBAL PROPERTY GLOBAL_LOG_BUFFER "") # Clear buffer +endmacro() + +macro(stop_capture target_var) + set(LOG_CAPTURE_MODE FALSE) + # Move global buffer to user variable + get_property(_logs GLOBAL PROPERTY GLOBAL_LOG_BUFFER) + set(${target_var} "${_logs}") +endmacro() + +# 3. Helper to print the logs later +function(dump_captured_logs log_list) + foreach(entry ${log_list}) + if("${entry}" STREQUAL "EMPTY_LINE") + _message("") + else() + # Split "TYPE|||CONTENT" + # Split "TYPE<<>>CONTENT" + string(FIND "${entry}" "<<>>" pos) + if(pos EQUAL -1) + message(WARNING "Log entry missing delimiter: ${entry}") + continue() + endif() + string(SUBSTRING "${entry}" 0 ${pos} type) + math(EXPR content_start "${pos} + 11") + string(SUBSTRING "${entry}" ${content_start} -1 content) + + # Print using the original type (STATUS, WARNING, etc.) + # This preserves color and formatting! + _message(${type} "${content}") + endif() + endforeach() +endfunction() diff --git a/cmake/Submodules.cmake b/cmake/Submodules.cmake new file mode 100644 index 00000000000..4542b5daf08 --- /dev/null +++ b/cmake/Submodules.cmake @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ----------------------------------------------------------------------------- +# gtest +# ----------------------------------------------------------------------------- + +message(STATUS "Configuring Google Test submodule ...") +list(APPEND CMAKE_MESSAGE_INDENT " ") +# For gtest, we will simply embed it into our test binaries, so we will not need to install it. +set(INSTALL_GTEST OFF CACHE BOOL "Install gtest." FORCE) +set(BUILD_GMOCK ON CACHE BOOL "Build gmock." FORCE) +set(gtest_hide_internal_symbols ON CACHE BOOL "Use symbol visibility" FORCE) + +add_subdirectory(${CMAKE_SOURCE_DIR}/third_party/googletest) +list(POP_BACK CMAKE_MESSAGE_INDENT) +message(STATUS "End of Google Test configuration.") +message() + +# ----------------------------------------------------------------------------- +# benchmark +# ----------------------------------------------------------------------------- + +message(STATUS "Setting up Google Benchmark submodule ...") +list(APPEND CMAKE_MESSAGE_INDENT " ") +# We will not need to test benchmark lib itself. +set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.") +# We will not need to install benchmark since we link it statically. +set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "Disable benchmark install to avoid overwriting vendor install.") + +if(NOT USE_SYSTEM_BENCHMARK) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/benchmark) +else() + add_library(benchmark SHARED IMPORTED) + find_library(BENCHMARK_LIBRARY benchmark) + if(NOT BENCHMARK_LIBRARY) + message(FATAL_ERROR "Cannot find google benchmark library") + endif() + message("-- Found benchmark: ${BENCHMARK_LIBRARY}") + set_property(TARGET benchmark PROPERTY IMPORTED_LOCATION ${BENCHMARK_LIBRARY}) +endif() +list(POP_BACK CMAKE_MESSAGE_INDENT) +message(STATUS "End of Google Benchmark configuration.") +message() + +# ----------------------------------------------------------------------------- +# FlatBuffer +# ----------------------------------------------------------------------------- + +message(STATUS "Setting up FlatBuffer submodule ...") +list(APPEND CMAKE_MESSAGE_INDENT " ") +set(FlatBuffers_Src_Dir ${PROJECT_SOURCE_DIR}/third_party/flatbuffers) + +option(FLATBUFFERS_BUILD_TESTS "Enable the build of tests and samples." OFF) +option(FLATBUFFERS_BUILD_FLATC "Enable the build of the flatbuffers compiler" ON) +option(FLATBUFFERS_STATIC_FLATC "Build flatbuffers compiler with -static flag" OFF) +option(FLATBUFFERS_BUILD_FLATHASH "Enable the build of flathash" OFF) + +# Add FlatBuffers directly to our build. This defines the `flatbuffers` target. +add_subdirectory(${FlatBuffers_Src_Dir}) +list(POP_BACK CMAKE_MESSAGE_INDENT) +message(STATUS "End of FlatBuffer configuration.") +message() + +# ----------------------------------------------------------------------------- +# CUTLASS +# ----------------------------------------------------------------------------- + +message(STATUS "Setting up CUTLASS submodule ...") +list(APPEND CMAKE_MESSAGE_INDENT " ") +if(NVFUSER_DISABLE_CUTLASS) + message(STATUS "CUTLASS Support DISABLED.") + set(NVFUSER_USE_CUTLASS FALSE) +elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.8) + message(WARNING "CUTLASS Support DISABLED: Requires CUDA 12.8+") + set(NVFUSER_USE_CUTLASS FALSE) +else() + message(STATUS "CUTLASS Support ENABLED.") + set(NVFUSER_USE_CUTLASS TRUE) +endif() +list(POP_BACK CMAKE_MESSAGE_INDENT) +message(STATUS "End of CUTLASS configuration.") +message() diff --git a/cmake/deps/handle_compiler.cmake b/cmake/deps/handle_compiler.cmake new file mode 100644 index 00000000000..ca78f82cca4 --- /dev/null +++ b/cmake/deps/handle_compiler.cmake @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# C++ Compiler Handler +# ------------------------------------------------------------------------------ + +macro(handle_compiler) + # Always found (we're already running CMake) + set(Compiler_FOUND TRUE) + set(Compiler_VERSION "${CMAKE_CXX_COMPILER_VERSION}") + + set(NVFUSER_REQUIREMENT_Compiler_OPTIONAL FALSE) + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(NVFUSER_REQUIREMENT_Compiler_VERSION_MIN ${NVFUSER_REQUIREMENT_GNU_VERSION_MIN}) + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(NVFUSER_REQUIREMENT_Compiler_VERSION_MIN ${NVFUSER_REQUIREMENT_Clang_VERSION_MIN}) + else() + # We do not have minimum version requirements for other compilers, + # set optional to true to allow the build to continue in those cases. + set(NVFUSER_REQUIREMENT_Compiler_OPTIONAL TRUE) + message(WARNING "Unknown compiler '${CMAKE_CXX_COMPILER_ID}' - cannot validate") + endif() + + set_dependency_report_status(Compiler) + + # Caching variables to enable incremental build. + # Without this is cross compiling we end up having to blow build directory + # and rebuild from scratch. + if(CMAKE_CROSSCOMPILING) + if(COMPILE_HAVE_STD_REGEX) + set(RUN_HAVE_STD_REGEX 0 CACHE INTERNAL "Cache RUN_HAVE_STD_REGEX output for cross-compile.") + endif() + endif() + +endmacro() diff --git a/cmake/deps/handle_cuda_toolkit.cmake b/cmake/deps/handle_cuda_toolkit.cmake new file mode 100644 index 00000000000..dcb5bd26362 --- /dev/null +++ b/cmake/deps/handle_cuda_toolkit.cmake @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# CUDA Toolkit Handler +# ------------------------------------------------------------------------------ + +macro(handle_cuda_toolkit) + message("") + message("Finding CUDAToolkit...") + + # Direct find_package call with components + find_package(CUDAToolkit ${MAYBE_REQUIRED} COMPONENTS ${NVFUSER_REQUIREMENT_CUDAToolkit_COMPONENTS}) + + # Use common status function + set_dependency_report_status(CUDAToolkit) +endmacro() diff --git a/cmake/deps/handle_git_submodules.cmake b/cmake/deps/handle_git_submodules.cmake new file mode 100644 index 00000000000..4135f5a84b6 --- /dev/null +++ b/cmake/deps/handle_git_submodules.cmake @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# Git Submodules Handler +# ------------------------------------------------------------------------------ + +macro(handle_git_submodules) + message("") + message("Checking Git Submodules...") + + # Find git executable + find_package(Git QUIET) + + if(GIT_FOUND) + # Use 'git submodule status' which only reads state, never modifies + # This command shows: + # - (no prefix) = submodule is initialized and up to date + # - '-' prefix = submodule is not initialized + # - '+' prefix = submodule is initialized but checked out to different commit than expected + # - 'U' prefix = submodule has merge conflicts + execute_process( + COMMAND "${GIT_EXECUTABLE}" submodule status + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE SUBMODULE_STATUS + ERROR_VARIABLE SUBMODULE_ERROR + RESULT_VARIABLE SUBMODULE_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if(SUBMODULE_RESULT EQUAL 0) + # Parse the output to check for uninitialized submodules (lines starting with '-') + string(REGEX MATCH "(^|\\n)-" HAS_UNINITIALIZED_SUBMODULES "${SUBMODULE_STATUS}") + + if(HAS_UNINITIALIZED_SUBMODULES) + set(GitSubmodules_FOUND FALSE) + message(STATUS "Git submodules: NOT initialized") + message(STATUS " Run: git submodule update --init --recursive") + else() + set(GitSubmodules_FOUND TRUE) + message(STATUS "Git submodules: initialized") + endif() + else() + message(WARNING "Failed to check git submodule status: ${SUBMODULE_ERROR}") + set(GitSubmodules_FOUND FALSE) + endif() + else() + message(WARNING "Git not found - cannot check submodule status") + set(GitSubmodules_FOUND FALSE) + endif() + + # Use common status function + set_dependency_report_status(GitSubmodules) +endmacro() diff --git a/cmake/deps/handle_llvm.cmake b/cmake/deps/handle_llvm.cmake new file mode 100644 index 00000000000..7842330df9b --- /dev/null +++ b/cmake/deps/handle_llvm.cmake @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# LLVM Handler with Component Mapping +# ------------------------------------------------------------------------------ + +macro(handle_llvm) + message("") + message("Finding LLVM...") + + # Direct find_package call + find_package(LLVM ${MAYBE_REQUIRED}) + + # Use common status function + set_dependency_report_status(LLVM) + + # Additional validation: Map LLVM components to library names + if(LLVM_FOUND) + llvm_map_components_to_libnames(LLVM_LIBS + support + core + orcjit + executionengine + irreader + nativecodegen + Target + Analysis + JITLink + Demangle + ) + endif() +endmacro() diff --git a/cmake/deps/handle_ninja.cmake b/cmake/deps/handle_ninja.cmake new file mode 100644 index 00000000000..6ade14ea4f6 --- /dev/null +++ b/cmake/deps/handle_ninja.cmake @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# Ninja Build Tool Handler +# ------------------------------------------------------------------------------ +# +# Checks if the Ninja build system is being used as the CMake generator. +# Note: This check happens after generator selection, so it only reports status. +# To use Ninja, specify it when running CMake: cmake -G Ninja .. + +macro(handle_ninja) + message("") + message("Finding Ninja...") + + # Check if using Ninja generator (CMAKE_GENERATOR is already set by this point) + if(CMAKE_GENERATOR STREQUAL "Ninja") + set(Ninja_FOUND TRUE) + else() + set(Ninja_FOUND FALSE) + endif() + + set_dependency_report_status(Ninja) +endmacro() diff --git a/cmake/deps/handle_nvmmh.cmake b/cmake/deps/handle_nvmmh.cmake new file mode 100644 index 00000000000..14898648280 --- /dev/null +++ b/cmake/deps/handle_nvmmh.cmake @@ -0,0 +1,35 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# nvidia-matmul-heuristics Handler +# ------------------------------------------------------------------------------ + +macro(handle_nvmmh) + message("") + message("Finding nvidia-matmul-heuristics...") + + set(NVMMH_INCLUDE_DIR "NVMMH_INCLUDE_DIR-NOTFOUND" CACHE PATH "Directory containing nvMatmulHeuristics.h") + + if (NOT IS_DIRECTORY "${NVMMH_INCLUDE_DIR}") + # Search in Python's site-packages first, then fall back to common locations + find_path(NVMMH_INCLUDE_DIR nvMatmulHeuristics.h + PATHS + "${Python_SITELIB}/nvidia/nvMatmulHeuristics/include" + NO_DEFAULT_PATH + ) + endif() + + if(IS_DIRECTORY "${NVMMH_INCLUDE_DIR}") + set(NVMMH_FOUND TRUE) + string(APPEND CMAKE_CXX_FLAGS " -DHAS_NVMMH=1") + message(STATUS "Found nvidia-matmul-heuristics: ${NVMMH_INCLUDE_DIR}") + else() + set(NVMMH_FOUND FALSE) + message(WARNING "nvidia-matmul-heuristics headers not found – building without nvMatmulHeuristics support") + endif() + + # Use common status function + set_dependency_report_status(NVMMH) +endmacro() diff --git a/cmake/deps/handle_pybind11.cmake b/cmake/deps/handle_pybind11.cmake new file mode 100644 index 00000000000..3b2584fd2b2 --- /dev/null +++ b/cmake/deps/handle_pybind11.cmake @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# pybind11 Handler +# ------------------------------------------------------------------------------ + +macro(handle_pybind11) + message("") + message("Finding pybind11...") + + # Direct find_package call + find_package(pybind11 ${MAYBE_REQUIRED}) + + # Use common status function + set_dependency_report_status(pybind11) +endmacro() diff --git a/cmake/deps/handle_python.cmake b/cmake/deps/handle_python.cmake new file mode 100644 index 00000000000..d624fa31615 --- /dev/null +++ b/cmake/deps/handle_python.cmake @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# Python Handler +# ------------------------------------------------------------------------------ + +macro(handle_python) + message("") + message("Finding Python...") + + # Direct find_package call + find_package(Python ${MAYBE_REQUIRED} COMPONENTS ${NVFUSER_REQUIREMENT_Python_COMPONENTS}) + + # Use common status function + set_dependency_report_status(Python) +endmacro() diff --git a/cmake/deps/handle_torch.cmake b/cmake/deps/handle_torch.cmake new file mode 100644 index 00000000000..e15b75691ce --- /dev/null +++ b/cmake/deps/handle_torch.cmake @@ -0,0 +1,94 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +# ------------------------------------------------------------------------------ +# PyTorch Handler with CUDA Constraint Validation +# ------------------------------------------------------------------------------ + +macro(handle_torch) + message("") + message("Finding Torch...") + + # Setup: Query Python for Torch path + if(Python_FOUND) + execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import torch; print(torch.utils.cmake_prefix_path)" + OUTPUT_VARIABLE TORCH_CMAKE_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE TORCH_FIND_RESULT + ) + + if(TORCH_FIND_RESULT EQUAL 0) + # Setup environment for Torch find_package + list(APPEND CMAKE_PREFIX_PATH "${TORCH_CMAKE_PATH}") + + # Direct find_package call + find_package(Torch ${MAYBE_REQUIRED}) + + else() + set(Torch_FOUND FALSE) + endif() + else() + set(Torch_FOUND FALSE) + endif() + + # Use common status function for basic version check + set_dependency_report_status(Torch) + + if(Torch_FOUND) + # Set CUDA_ARCH for cu tests + if(TORCH_CUDA_ARCH_LIST) + set(ARCH_FLAGS) + cuda_select_nvcc_arch_flags(ARCH_FLAGS ${TORCH_CUDA_ARCH_LIST}) + list(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) + endif() + + # CXX flags necessary for https://github.com/pytorch/pytorch/issues/98093 + string(APPEND CMAKE_CXX_FLAGS " ${TORCH_CXX_FLAGS}") + endif() + + # Additional validation: Check CUDA constraint + # This must happen AFTER set_dependency_status since we need Torch to be found + if(Torch_FOUND AND CUDAToolkit_FOUND) + # Query torch Python package for CUDA version + execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import torch; print(torch.version.cuda if torch.version.cuda else 'N/A')" + OUTPUT_VARIABLE torch_cuda_version + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE torch_cuda_result + ) + + if(torch_cuda_result EQUAL 0 AND NOT torch_cuda_version STREQUAL "N/A" AND NOT torch_cuda_version STREQUAL "None") + # Get CUDAToolkit version (major.minor only for comparison) + set(cuda_toolkit_version "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}") + + # Extract major.minor from Torch CUDA version + string(REGEX MATCH "^([0-9]+\\.[0-9]+)" torch_cuda_major_minor "${torch_cuda_version}") + + # Check if regex succeeded + if(NOT torch_cuda_major_minor OR torch_cuda_major_minor STREQUAL "") + # Regex failed - unable to parse version + set(Torch_CUDA_constraint_status "not_available") + message(WARNING "Unable to parse Torch CUDA version: ${torch_cuda_version}") + elseif(NOT torch_cuda_major_minor STREQUAL cuda_toolkit_version) + # Version mismatch + set(Torch_CUDA_constraint_status "mismatch") + set(Torch_CUDA_constraint_found "${torch_cuda_major_minor}") + set(Torch_CUDA_constraint_required "${cuda_toolkit_version}") + # Mark dependencies as failed (Torch_CUDA constraint is required) + set(NVFUSER_DEPENDENCIES_OK FALSE) + else() + # Versions match! + set(Torch_CUDA_constraint_status "match") + set(Torch_CUDA_constraint_version "${torch_cuda_major_minor}") + endif() + else() + # Torch might not have CUDA support or query failed + set(Torch_CUDA_constraint_status "not_available") + endif() + else() + # Can't validate if CUDAToolkit wasn't found + set(Torch_CUDA_constraint_status "not_available") + endif() +endmacro() diff --git a/cutlass/CMakeLists.txt b/cutlass/CMakeLists.txt new file mode 100644 index 00000000000..fd21d79f7fb --- /dev/null +++ b/cutlass/CMakeLists.txt @@ -0,0 +1,72 @@ +set(NVF_CUTLASS_CUDA_FLAGS + "-DCUTE_USE_PACKED_TUPLE=1" + "-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1" + "-DCUTLASS_VERSIONS_GENERATED" + "-DCUTLASS_TEST_LEVEL=0" + "-DCUTLASS_TEST_ENABLE_CACHED_RESULTS=1" + "-DCUTLASS_DEBUG_TRACE_LEVEL=0" + "--expt-relaxed-constexpr" + "--expt-extended-lambda" + # Equivalent to --threads= + "--threads=0" + # ----------------- + # Suppress warnings + # ----------------- + "-Xcompiler=-Wconversion" + "-Xcompiler=-fno-strict-aliasing" + # CUDA 13 has deprecated old vector types such as ulong4: https://developer.nvidia.com/blog/whats-new-and-important-in-cuda-toolkit-13-0 + "-Xcompiler=-Wno-deprecated-declarations" +) + +set(NVFUSER_CUTLASS_SRCS) +list(APPEND NVFUSER_CUTLASS_SRCS + ${NVFUSER_CUTLASS}/group_mm.cu + ${NVFUSER_CUTLASS}/mxfp8_scaled_mm.cu + ${NVFUSER_CUTLASS}/nvfp4_scaled_mm.cu + ${NVFUSER_CUTLASS}/nvfp4_scaled_mm_blockscale.cu + ${NVFUSER_CUTLASS}/nvfp4_scaled_group_mm.cu + ${NVFUSER_CUTLASS}/nvf_cutlass.cpp + ${NVFUSER_CUTLASS}/cutlass_utils.cpp +) +add_library(nvf_cutlass SHARED ${NVFUSER_CUTLASS_SRCS}) + +target_include_directories(nvf_cutlass PRIVATE ${NVFUSER_THIRD_PARTY_DIR}/cutlass/include) +target_include_directories(nvf_cutlass PRIVATE ${NVFUSER_THIRD_PARTY_DIR}/cutlass/tools/util/include) +target_compile_options(nvf_cutlass PRIVATE $<$:${NVF_CUTLASS_CUDA_FLAGS}>) +if(NOT MSVC) + set(NVF_LIB_SUFFIX ".so") +else() + set(NVF_LIB_SUFFIX ".pyd") +endif() + +target_include_directories(nvf_cutlass PUBLIC + "$" + "$" + "$" +) +target_link_libraries(nvf_cutlass PRIVATE "${TORCH_LIBRARIES}" c10) +set_target_properties(nvf_cutlass PROPERTIES + C_STANDARD ${NVFUSER_C_STANDARD} + CUDA_STANDARD ${NVFUSER_CUDA_STANDARD} + CXX_STANDARD ${NVFUSER_CPP_STANDARD} + CXX_STANDARD_REQUIRED ON + CXX_VISIBILITY_PRESET hidden + INSTALL_RPATH + "$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN/../../nvidia/cuda_cupti/lib:$ORIGIN/../../torch/lib" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" + POSITION_INDEPENDENT_CODE Yes + VISIBILITY_INLINES_HIDDEN Yes + CUDA_ARCHITECTURES "100a" +) +# Our CUTLASS kernels require substantially more memory to compile—up to 6 +# GB per file. To avoid exhausting system memory, it's helpful to limit +# concurrency specifically for these kernels, without throttling other +# compilation units. A pool of size 2 works well in practice: there are +# only a handful of CUTLASS kernel files, so this restriction has little +# impact on overall build time. +set(CUTLASS_MAX_JOBS 2 CACHE STRING "Max concurrent CUTLASS CUDA compiles (0 = no limit)" ) +if(CUTLASS_MAX_JOBS GREATER 0) + set_property(GLOBAL PROPERTY JOB_POOLS cutlass=${CUTLASS_MAX_JOBS}) + set_target_properties(nvf_cutlass PROPERTIES JOB_POOL_COMPILE cutlass) +endif() +install(TARGETS nvf_cutlass EXPORT NvfuserTargets DESTINATION lib) diff --git a/python/tools/check_dependencies.py b/python/tools/check_dependencies.py new file mode 100644 index 00000000000..43628e52314 --- /dev/null +++ b/python/tools/check_dependencies.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +""" +nvFuser Dependency Report Generator + +Reads dependency data from JSON (generated by CMake) and prints +a comprehensive, user-friendly report with colored output and +actionable installation instructions for missing dependencies. + +IMPORTANT: CMake is the source of truth for all dependency requirements. +This script only formats output and provides help text. +""" + +import json +import sys +from pathlib import Path +from typing import Dict + +from prereqs import detect_platform, format_platform_info +from prereqs.colors import colorize, Colors +from prereqs.requirements import ( + PythonRequirement, + TorchRequirement, + LLVMRequirement, + CUDAToolkitRequirement, + Pybind11Requirement, + CompilerRequirement, + NinjaRequirement, + NVMMHRequirement, + GitSubmodulesRequirement, +) + + +class DependencyReporter: + """ + Generates formatted dependency reports from JSON data with help text. + + CMake provides all dependency data via JSON. This class formats the output + and generates platform-specific installation instructions for failures. + """ + + def __init__(self, deps_path: Path): + # Load CMake variables + cmake_vars = self._load_cmake_vars(deps_path) + + self.colors = Colors() + self.platform_info = detect_platform() + + # Create requirement objects - each class defines its own name and variable names + self.requirements = [] + self.requirements.append(NinjaRequirement(cmake_vars)) + self.requirements.append(GitSubmodulesRequirement(cmake_vars)) + self.requirements.append(CompilerRequirement(cmake_vars)) + self.requirements.append(PythonRequirement(cmake_vars)) + self.requirements.append(CUDAToolkitRequirement(cmake_vars)) + self.requirements.append(TorchRequirement(cmake_vars)) + self.requirements.append(Pybind11Requirement(cmake_vars)) + self.requirements.append(LLVMRequirement(cmake_vars)) + self.requirements.append(NVMMHRequirement(cmake_vars)) + + def _load_cmake_vars(self, deps_path: Path) -> Dict: + """Load CMake variables from JSON file""" + try: + with open(deps_path, "r") as f: + data = json.load(f) + return data.get("cmake_vars", {}) + except FileNotFoundError: + print(f"Error: {deps_path} not found", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error loading dependencies: {e}", file=sys.stderr) + sys.exit(1) + + def generate_report(self): + """Main entry point - prints formatted report""" + print() # Blank line before report + print("=" * 80) + self._print_header() + print("=" * 80) + print() # Blank line after header + self._print_dependencies() + print() # Blank line after dependencies + print("* Optional requirement") + print("=" * 80) + + # Collect failures and issues to show help for + # Include required failures (is_failure=True) and optional issues (not SUCCESS) + failures = [] + for req in self.requirements: + if hasattr(req, "is_failure") and req.is_failure(): + # Required dependency that failed + failures.append(req) + elif ( + hasattr(req, "status") + and hasattr(req, "optional") + and req.optional + and req.status != "SUCCESS" + ): + # Optional dependency with issues (NOT_FOUND or INCOMPATIBLE) + failures.append(req) + + if failures: + self._print_help_section(failures) + # Only print failure summary if there are actual (non-optional) failures + required_failures = [ + req + for req in self.requirements + if hasattr(req, "is_failure") and req.is_failure() + ] + if required_failures: + self._print_failure_summary() + + print() # Blank line at end + + def _print_header(self): + """Print report header with platform information""" + platform_str = format_platform_info(self.platform_info) + print( + colorize( + self.colors.BOLD_GREEN, "[nvFuser] Validating build prerequisites..." + ) + ) + print(colorize(self.colors.CYAN, "Platform: ") + platform_str) + + def _print_failure_summary(self): + """Print failure summary message""" + print() + print(colorize(self.colors.BOLD_RED, "Build prerequisite validation FAILED")) + print("See installation instructions above") + + def _print_dependencies(self): + """Print status for each dependency using OOP requirement classes""" + for req in self.requirements: + if hasattr(req, "format_status_line"): + # OOP: use requirement's format method + print(req.format_status_line(self.colors)) + else: + # Fallback: use legacy dict-based formatting (shouldn't happen) + print(f"[nvFuser] ? {getattr(req, 'name', 'Unknown')}") + + def _print_help_section(self, failures): + """Print help section header and help for each failed dependency""" + print() + print("=" * 70) + print("Installation Instructions") + print("=" * 70) + print() + + for req in failures: + self._print_help_for_requirement(req) + + def _print_help_for_requirement(self, req): + """Call requirement's help generation method""" + if hasattr(req, "generate_help"): + req.generate_help(self.platform_info) + else: + # Fallback for requirements without help + print(f"{req.name} installation help not available") + print() + + +def main(): + if len(sys.argv) != 2: + print("Usage: check_dependencies.py ", file=sys.stderr) + sys.exit(1) + + json_path = Path(sys.argv[1]) + if not json_path.exists(): + print(f"Error: {json_path} not found", file=sys.stderr) + sys.exit(1) + + # Generate report + reporter = DependencyReporter(json_path) + reporter.generate_report() + + # Report generated successfully + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/python/tools/prereqs/__init__.py b/python/tools/prereqs/__init__.py new file mode 100644 index 00000000000..6627bbd5f46 --- /dev/null +++ b/python/tools/prereqs/__init__.py @@ -0,0 +1,105 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +""" +nvFuser prerequisite utilities package. + +IMPORTANT DESIGN PRINCIPLE: +=========================== +CMake is the source of truth for ALL dependency requirements and validation. + +- CMake defines version requirements in: cmake/DependencyRequirements.cmake +- CMake finds dependencies and validates versions +- CMake exports all data to JSON: build/nvfuser_dependencies.json +- Python reads JSON and formats output with helpful instructions + +This package provides ONLY: +- Platform detection utilities +- Version parsing/formatting utilities +- URL generators for downloads +- Utilities for formatting help text + +This package does NOT: +- Define version requirements (CMake does) +- Find or validate dependencies (CMake does) +- Determine build success/failure (CMake does) + +Usage: + from prereqs import detect_platform, pytorch_index_url, llvm_download_url + + platform_info = detect_platform() + if platform_info["ubuntu_based"]: + print("Use apt for installation") + + url = pytorch_index_url((13, 1)) + print(f"Install PyTorch: pip install torch --index-url {url}") +""" + +# Platform detection +from .platform import detect_platform, format_platform_info + +# Version utilities +from .requirement_utils import ( + Requirement as RequirementUtil, + parse_version, + format_version, + CUDA_AVAILABLE, +) + +# URL generators +from .requirement_utils import ( + pytorch_index_url, + pytorch_install_instructions, + llvm_download_url, + cuda_toolkit_download_url, +) + +# Exception (included but not used in reporting) +from .exceptions import PrerequisiteMissingError + +# Requirement classes (OOP abstraction) +from .requirements import ( + VersionRequirement, + BooleanRequirement, + RequirementStatus, + # Explicit requirement classes + PythonRequirement, + TorchRequirement, + LLVMRequirement, + CUDAToolkitRequirement, + Pybind11Requirement, + CompilerRequirement, + GitSubmodulesRequirement, + NinjaRequirement, +) + +__all__ = [ + # Platform + "detect_platform", + "format_platform_info", + # Requirements (legacy utility class) + "RequirementUtil", + "parse_version", + "format_version", + "CUDA_AVAILABLE", + # URL generators + "pytorch_index_url", + "pytorch_install_instructions", + "llvm_download_url", + "cuda_toolkit_download_url", + # Exception + "PrerequisiteMissingError", + # Base requirement classes + "VersionRequirement", + "BooleanRequirement", + "RequirementStatus", + # Explicit requirement classes + "PythonRequirement", + "TorchRequirement", + "LLVMRequirement", + "CUDAToolkitRequirement", + "Pybind11Requirement", + "CompilerRequirement", + "GitSubmodulesRequirement", + "NinjaRequirement", +] diff --git a/python/tools/prereqs/colors.py b/python/tools/prereqs/colors.py new file mode 100644 index 00000000000..14d0352abe6 --- /dev/null +++ b/python/tools/prereqs/colors.py @@ -0,0 +1,51 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +""" +Color utilities for terminal output. +""" + +import os + + +class Colors: + """ANSI color codes for terminal output""" + + _codes = { + "RESET": "\033[m", + "BOLD": "\033[1m", + # Regular colors + "GREEN": "\033[32m", + "YELLOW": "\033[33m", + "CYAN": "\033[36m", + "WHITE": "\033[37m", + # Bold colors + "BOLD_RED": "\033[1;31m", + "BOLD_GREEN": "\033[1;32m", + "BOLD_WHITE": "\033[1;37m", + } + + def __init__(self): + use_colors = os.environ.get("NVFUSER_BUILD_DISABLE_COLOR") is None + + for name, code in self._codes.items(): + setattr(self, name, code if use_colors else "") + + +def colorize(color: str, text: str) -> str: + """Helper to wrap text with color and reset codes. + + Args: + color: The color code (e.g., colors.GREEN, colors.BOLD_RED) + text: The text to colorize + + Returns: + Text wrapped with color codes: text + + Example: + >>> colors = Colors() + >>> print(colorize(colors.GREEN, "Success") + " - operation completed") + # Prints "Success" in green, followed by plain text + """ + RESET = "\033[m" + return f"{color}{text}{RESET}" diff --git a/python/tools/prereqs/exceptions.py b/python/tools/prereqs/exceptions.py new file mode 100644 index 00000000000..2830ed5c48c --- /dev/null +++ b/python/tools/prereqs/exceptions.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +""" +Custom exceptions for nvFuser prerequisite validation. + +These exceptions provide structured error handling for build prerequisite +checks, enabling clear and actionable error messages. +""" + + +class PrerequisiteMissingError(Exception): + """ + Raised when a prerequisite for building nvFuser is missing or has an incorrect version. + + This exception should include: + - What prerequisite is missing or incorrect + - Why it's required + - Exact commands to install or fix it + - Platform-specific guidance when applicable + """ + + pass diff --git a/python/tools/prereqs/platform.py b/python/tools/prereqs/platform.py new file mode 100644 index 00000000000..18558c05b9d --- /dev/null +++ b/python/tools/prereqs/platform.py @@ -0,0 +1,129 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +""" +Platform detection utilities for nvFuser build system. + +Detects OS, architecture, and Linux distribution to provide platform-specific +error messages and installation guidance. +""" + +import platform +from typing import Dict, Optional + + +def detect_platform() -> Dict[str, Optional[str]]: + """ + Detect the current platform and return structured information. + + Returns: + dict: Platform information with keys: + - 'os': Operating system (Linux, Darwin, Windows, etc.) + - 'arch': Architecture (x86_64, aarch64, arm64, etc.) + - 'distro': Linux distribution ID (ubuntu, debian, rhel, etc.) or None + - 'distro_version': Distribution version (22.04, 20.04, etc.) or None + - 'distro_name': Human-readable distribution name or None + - 'ubuntu_based': Boolean indicating if this is Ubuntu-based distro + + Example: + >>> info = detect_platform() + >>> print(info['os']) + 'Linux' + >>> print(info['distro']) + 'ubuntu' + """ + system = platform.system() + machine = platform.machine() + + # Initialize distro information + distro_info = {} + distro_id = None + distro_version = None + distro_name = None + ubuntu_based = False + + # Detect Linux distribution from /etc/os-release + if system == "Linux": + try: + with open("/etc/os-release") as f: + for line in f: + line = line.strip() + if "=" in line: + key, value = line.split("=", 1) + # Remove quotes from value + distro_info[key] = value.strip('"').strip("'") + + distro_id = distro_info.get("ID", "unknown") + distro_version = distro_info.get("VERSION_ID", "unknown") + distro_name = distro_info.get("NAME", "unknown") + + # Check if Ubuntu-based (useful for PPA availability) + ubuntu_based = distro_id in ( + "ubuntu", + "debian", + "linuxmint", + "pop", + "zorin", + ) + + except FileNotFoundError: + # /etc/os-release doesn't exist (not a standard Linux or very old system) + distro_id = "unknown" + distro_version = "unknown" + distro_name = "unknown" + except Exception as e: + # Other errors reading/parsing the file + distro_id = f"error: {e}" + distro_version = "unknown" + distro_name = "unknown" + + return { + "os": system, + "arch": machine, + "distro": distro_id, + "distro_version": distro_version, + "distro_name": distro_name, + "ubuntu_based": ubuntu_based, + } + + +def format_platform_info( + platform_info: Optional[Dict[str, Optional[str]]] = None +) -> str: + """ + Format platform information as a human-readable string. + + Args: + platform_info: Platform information dict from detect_platform(). + If None, will call detect_platform() automatically. + + Returns: + str: Formatted platform string like "Linux x86_64 (Ubuntu 22.04)" + + Example: + >>> print(format_platform_info()) + 'Linux x86_64 (Ubuntu 22.04)' + """ + if platform_info is None: + platform_info = detect_platform() + + os_name = platform_info["os"] + arch = platform_info["arch"] + + # Build distro info if available + distro_parts = [] + if platform_info.get("distro") and platform_info["distro"] not in ( + "unknown", + "error", + ): + distro_parts.append(platform_info["distro"].capitalize()) + if ( + platform_info.get("distro_version") + and platform_info["distro_version"] != "unknown" + ): + distro_parts.append(platform_info["distro_version"]) + + if distro_parts: + return f"{os_name} {arch} ({' '.join(distro_parts)})" + else: + return f"{os_name} {arch}" diff --git a/python/tools/prereqs/requirement_utils.py b/python/tools/prereqs/requirement_utils.py new file mode 100644 index 00000000000..7b29d3851e1 --- /dev/null +++ b/python/tools/prereqs/requirement_utils.py @@ -0,0 +1,257 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +""" +Utility functions for nvFuser dependency reporting. + +IMPORTANT: This module provides ONLY utility functions for formatting and URL generation. +Version requirements and dependency validation are handled by CMake. + +CMake defines requirements in: cmake/DependencyRequirements.cmake +CMake exports status to: build/nvfuser_dependencies.json +Python reads JSON and uses these utilities to format help text. +""" + +import platform +import re +from dataclasses import dataclass +from typing import Optional, Tuple + + +# ============================================================================= +# VERSION CONVERSION UTILITIES +# ============================================================================= + + +def parse_version(version_str: str) -> Tuple[int, ...]: + """ + Parse version string to tuple. + + Args: + version_str: Version string like "3.8", "18.1.8", "13", "18.1.8git" + + Returns: + Tuple of integers: (3, 8), (18, 1, 8), (13,), (18, 1, 8) + + Examples: + >>> parse_version("3.8") + (3, 8) + >>> parse_version("18.1.8") + (18, 1, 8) + >>> parse_version("18.1.8git") # strips non-numeric suffix + (18, 1, 8) + + Raises: + ValueError: If version string cannot be parsed + """ + # Strip common suffixes like "git", "rc1", "+cu128", etc. + clean = re.match(r"^[\d.]+", version_str.strip()) + if not clean: + raise ValueError(f"Cannot parse version: {version_str}") + + parts = clean.group().rstrip(".").split(".") + return tuple(int(p) for p in parts if p) + + +def format_version(version: Tuple[int, ...]) -> str: + """ + Format version tuple to string. + + Args: + version: Tuple of integers like (3, 8), (18, 1, 8), (13,) + + Returns: + Version string: "3.8", "18.1.8", "13" + + Examples: + >>> format_version((3, 8)) + '3.8' + >>> format_version((18, 1, 8)) + '18.1.8' + >>> format_version((13,)) + '13' + """ + return ".".join(map(str, version)) + + +# ============================================================================= +# REQUIREMENT DATACLASS (Utility Only) +# ============================================================================= + + +@dataclass +class Requirement: + """ + A version requirement utility class (NOT the source of truth). + + NOTE: This is for utility methods only. Actual version requirements come from + CMake's DependencyRequirements.cmake and are exported via JSON. + + Attributes: + name: Human-readable name (e.g., "CMake", "LLVM") + min_version: Minimum required version tuple, or None for "any version" + recommended: Recommended version tuple for download URLs (optional) + """ + + name: str + min_version: Optional[Tuple[int, ...]] + recommended: Optional[Tuple[int, ...]] = None + + @property + def min_str(self) -> str: + """Minimum version as string: '3.18' or 'any'""" + if self.min_version is None: + return "any" + return format_version(self.min_version) + + @property + def min_display(self) -> str: + """Minimum version for display: '3.18+' or 'any version'""" + if self.min_version is None: + return "any version" + return f"{self.min_str}+" + + @property + def recommended_str(self) -> str: + """Recommended version as string, falls back to min_str""" + if self.recommended is None: + return self.min_str + return format_version(self.recommended) + + def check(self, detected: Tuple[int, ...]) -> bool: + """ + Check if detected version meets minimum requirement. + + Args: + detected: Detected version tuple (e.g., from parse_version) + + Returns: + True if detected >= min_version (or min_version is None) + + Note: + Compares only as many parts as min_version specifies. + So (3, 22, 1) >= (3, 18) compares (3, 22) >= (3, 18) -> True + """ + if self.min_version is None: + return True + # Compare only as many parts as min_version specifies + return detected[: len(self.min_version)] >= self.min_version + + +# ============================================================================= +# CUDA VERSIONS - For PyTorch wheel URLs +# ============================================================================= + +# PyTorch wheel CUDA versions currently available (newest first) +CUDA_AVAILABLE = [(13, 1), (13, 0), (12, 8)] + + +# ============================================================================= +# URL GENERATORS +# ============================================================================= + + +def cuda_wheel_suffix(cuda: Tuple[int, int]) -> str: + """ + Convert CUDA version tuple to PyTorch wheel suffix. + + Examples: + >>> cuda_wheel_suffix((12, 8)) + 'cu128' + >>> cuda_wheel_suffix((13, 1)) + 'cu131' + """ + return f"cu{cuda[0]}{cuda[1]}" + + +def pytorch_index_url(cuda: Tuple[int, int]) -> str: + """ + Generate PyTorch wheel index URL for a CUDA version. + + Examples: + >>> pytorch_index_url((12, 8)) + 'https://download.pytorch.org/whl/cu128' + >>> pytorch_index_url((13, 1)) + 'https://download.pytorch.org/whl/cu131' + """ + return f"https://download.pytorch.org/whl/{cuda_wheel_suffix(cuda)}" + + +def pytorch_install_instructions(cuda_major: Optional[int] = None) -> str: + """ + Generate PyTorch installation instructions. + + Args: + cuda_major: If specified, only show instructions for this CUDA major version. + Otherwise show all available versions. + + Returns: + Multi-line string with pip install commands + """ + if cuda_major is not None: + # Filter to matching CUDA major version + matching = [cuda for cuda in CUDA_AVAILABLE if cuda[0] == cuda_major] + versions_to_show = matching if matching else CUDA_AVAILABLE + else: + versions_to_show = CUDA_AVAILABLE + + lines = [] + for cuda in versions_to_show: + lines.append(f" # For CUDA {format_version(cuda)}:") + lines.append(f" pip install torch --index-url {pytorch_index_url(cuda)}") + return "\n".join(lines) + + +def llvm_download_url(version: Optional[Tuple[int, ...]] = None) -> str: + """ + Generate LLVM prebuilt binary download URL. + + Args: + version: LLVM version tuple. If None, uses (18, 1, 8) as default. + + Returns: + GitHub release URL for prebuilt binary matching current platform + + Raises: + NotImplementedError: If platform is not supported + + Example: + >>> llvm_download_url((18, 1, 8)) # doctest: +SKIP + 'https://github.com/llvm/llvm-project/releases/download/llvmorg-18.1.8/clang+llvm-18.1.8-x86_64-linux-gnu-ubuntu-18.04.tar.xz' + """ + if version is None: + version = (18, 1, 8) + + v = format_version(version) + machine = platform.machine() + + if machine == "x86_64": + return ( + f"https://github.com/llvm/llvm-project/releases/download/" + f"llvmorg-{v}/clang+llvm-{v}-x86_64-linux-gnu-ubuntu-18.04.tar.xz" + ) + elif machine == "aarch64": + return ( + f"https://github.com/llvm/llvm-project/releases/download/" + f"llvmorg-{v}/clang+llvm-{v}-aarch64-linux-gnu.tar.xz" + ) + elif machine.startswith("arm64"): + # 64-bit ARM (macOS) + return ( + f"https://github.com/llvm/llvm-project/releases/download/" + f"llvmorg-{v}/clang+llvm-{v}-arm64-apple-macos11.tar.xz" + ) + else: + raise NotImplementedError( + f"LLVM prebuilt binaries not available for: {machine}" + ) + + +def cuda_toolkit_download_url() -> str: + """ + Return NVIDIA CUDA Toolkit download page URL. + + Returns: + URL to CUDA downloads page + """ + return "https://developer.nvidia.com/cuda-downloads" diff --git a/python/tools/prereqs/requirements/__init__.py b/python/tools/prereqs/requirements/__init__.py new file mode 100644 index 00000000000..e5c6f0059e6 --- /dev/null +++ b/python/tools/prereqs/requirements/__init__.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""Requirement class registry and factory.""" + +from .base import Requirement, VersionRequirement, BooleanRequirement, RequirementStatus +from .python import PythonRequirement +from .torch import TorchRequirement +from .llvm import LLVMRequirement +from .cuda_toolkit import CUDAToolkitRequirement +from .pybind11 import Pybind11Requirement +from .compiler import CompilerRequirement +from .git_submodules import GitSubmodulesRequirement +from .ninja import NinjaRequirement +from .nvmmh import NVMMHRequirement + +__all__ = [ + # Base classes + "Requirement", + "VersionRequirement", + "BooleanRequirement", + "RequirementStatus", + # Specific requirement classes + "PythonRequirement", + "TorchRequirement", + "LLVMRequirement", + "CUDAToolkitRequirement", + "Pybind11Requirement", + "CompilerRequirement", + "GitSubmodulesRequirement", + "NinjaRequirement", + "NVMMHRequirement", +] diff --git a/python/tools/prereqs/requirements/base.py b/python/tools/prereqs/requirements/base.py new file mode 100644 index 00000000000..5c46a7dfa0e --- /dev/null +++ b/python/tools/prereqs/requirements/base.py @@ -0,0 +1,274 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +""" +Base classes for requirement types. + +This module provides the foundational classes for all dependency requirements. +Each requirement knows how to format its status and determine if it represents a failure. +""" + +from abc import ABC, abstractmethod +from typing import Optional, Dict +from dataclasses import dataclass + +from ..colors import colorize + + +@dataclass +class RequirementStatus: + """Validation status constants.""" + + SUCCESS = "SUCCESS" + NOT_FOUND = "NOT_FOUND" + INCOMPATIBLE = "INCOMPATIBLE" + + +class Requirement(ABC): + """ + Base class for all requirement types. + + All requirements must implement: + - format_status_line(): Format output for terminal + - is_failure(): Determine if this represents a build failure + """ + + def __init__( + self, + name: str, + cmake_vars: Dict, + found_var: str, + status_var: str, + optional_var: str, + location_var: Optional[str] = None, + ): + """ + Initialize from CMake variable names. + + Args: + name: Dependency name + cmake_vars: Dictionary of all CMake variables + found_var: Name of CMake variable for found status (e.g., "Python_FOUND") + status_var: Name of CMake variable for validation status (e.g., "Python_STATUS") + optional_var: Name of CMake variable for optional flag (e.g., "NVFUSER_REQUIREMENT_Python_OPTIONAL") + location_var: Optional name of CMake variable for location (e.g., "Python_EXECUTABLE") + """ + self.name = name + self.found = self._to_bool(cmake_vars.get(found_var, "FALSE")) + self.status = cmake_vars.get(status_var, "UNKNOWN") + self.optional = self._to_bool(cmake_vars.get(optional_var, "FALSE")) + + # Look up location if location_var is provided + self.location = cmake_vars.get(location_var) if location_var else None + + @staticmethod + def _to_bool(value) -> bool: + """Convert CMake boolean string to Python bool.""" + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.upper() in ("TRUE", "ON", "YES", "1") + return bool(value) + + @abstractmethod + def format_status_line(self, colors) -> str: + """Format terminal status line for this requirement.""" + pass + + def is_failure(self) -> bool: + """Check if this requirement represents a failure.""" + return not self.optional and self.status != RequirementStatus.SUCCESS + + def get_failure_data(self): + """Get data for help text generation.""" + # Return a dict compatible with help system + return { + "name": self.name, + "status": self.status, + "found": self.found, + "optional": self.optional, + "location": self.location, + } + + @abstractmethod + def generate_help(self, platform_info): + """Generate help text for this requirement when it fails. + + Subclasses should override this to provide specific installation instructions. + """ + pass # Default: no help text + + +class VersionRequirement(Requirement): + """ + Base class for requirements with version checking. + + Provides standard version display formatting. + Subclasses inherit all version comparison logic. + """ + + def __init__( + self, + name: str, + cmake_vars: Dict, + found_var: str, + status_var: str, + optional_var: str, + version_found_var: Optional[str] = None, + version_required_var: Optional[str] = None, + location_var: Optional[str] = None, + ): + """ + Initialize version requirement. + + Args: + name: Dependency name + cmake_vars: Dictionary of all CMake variables + found_var: Name of CMake variable for found status + status_var: Name of CMake variable for validation status + optional_var: Name of CMake variable for optional flag + version_found_var: Name of CMake variable for detected version (e.g., "Python_VERSION") + version_required_var: Name of CMake variable for minimum required version (e.g., "NVFUSER_REQUIREMENT_Python_VERSION_MIN") + location_var: Optional name of CMake variable for location + """ + super().__init__( + name, cmake_vars, found_var, status_var, optional_var, location_var + ) + self.version_found = ( + cmake_vars.get(version_found_var) if version_found_var else None + ) + self.version_required = ( + cmake_vars.get(version_required_var) if version_required_var else None + ) + + def format_status_line(self, colors) -> str: + """Format status line with version information.""" + if self.status == RequirementStatus.SUCCESS: + return self._format_success(colors) + elif self.status == RequirementStatus.NOT_FOUND: + return self._format_not_found(colors) + elif self.status == RequirementStatus.INCOMPATIBLE: + return self._format_incompatible(colors) + else: + return colorize(colors.BOLD_RED, f"[nvFuser] ✗ {self.name} unknown status") + + def _format_success(self, colors) -> str: + """For example: + Format success: [nvFuser] ✓ Python 3.12.3 >= 3.10 (/usr/bin/python3) + """ + # Add asterisk for optional requirements + name_with_marker = f"{self.name}*" if self.optional else self.name + # Status symbol and name in white/green with padding + name_padded = f"{name_with_marker:<15}" # Left-align with 15 char width + status_part = colorize(colors.GREEN, "[nvFuser] ✓ ") + name_padded + + # Version info in green + if self.version_found and self.version_required: + version_part = colorize( + colors.GREEN, f"{self.version_found} >= {self.version_required}" + ) + elif self.version_found: + version_part = colorize(colors.GREEN, self.version_found) + else: + version_part = "" + + # Combine parts + if version_part: + main_line = f"{status_part} {version_part}" + else: + main_line = status_part + + # Add location in cyan if available + if self.location: + main_line += " " + colorize(colors.CYAN, f"({self.location})") + + return main_line + + def _format_not_found(self, colors) -> str: + """Format not found line.""" + # Add asterisk for optional requirements + name_with_marker = f"{self.name}*" if self.optional else self.name + name_padded = f"{name_with_marker:<15}" # Left-align with 15 char width + + if self.optional: + status_part = colorize(colors.YELLOW, "[nvFuser] ○ ") + name_padded + if self.version_required: + return ( + status_part + + " " + + colorize( + colors.YELLOW, + f"Not found (optional, v{self.version_required}+ recommended)", + ) + ) + else: + return ( + status_part + " " + colorize(colors.YELLOW, "Not found (optional)") + ) + else: + status_part = colorize(colors.BOLD_RED, "[nvFuser] ✗ ") + name_padded + if self.version_required: + return ( + status_part + + " " + + colorize( + colors.BOLD_RED, + f"Not found (requires {self.version_required}+)", + ) + ) + else: + return status_part + " " + colorize(colors.BOLD_RED, "Not found") + + def _format_incompatible(self, colors) -> str: + """Format incompatible: [nvFuser] ✗ Python 3.7.0 < 3.8""" + # Add asterisk for optional requirements + name_with_marker = f"{self.name}*" if self.optional else self.name + name_padded = f"{name_with_marker:<15}" # Left-align with 15 char width + status_part = colorize(colors.BOLD_RED, "[nvFuser] ✗ ") + name_padded + + if self.version_found and self.version_required: + return ( + status_part + + " " + + colorize( + colors.BOLD_RED, f"{self.version_found} < {self.version_required}" + ) + ) + else: + return status_part + " " + colorize(colors.BOLD_RED, "incompatible") + + +class BooleanRequirement(Requirement): + """ + Base class for requirements without version checking. + + Simple pass/fail validation (Git submodules, Ninja). + """ + + def format_status_line(self, colors) -> str: + """Format status line without version information.""" + # Add asterisk for optional requirements + name_with_marker = f"{self.name}*" if self.optional else self.name + name_padded = f"{name_with_marker:<15}" # Left-align with 15 char width + + if self.status == RequirementStatus.SUCCESS: + status_part = colorize(colors.GREEN, "[nvFuser] ✓ ") + name_padded + if self.location: + return status_part + " " + colorize(colors.CYAN, f"({self.location})") + return status_part + elif self.status == RequirementStatus.NOT_FOUND: + if self.optional: + status_part = colorize(colors.YELLOW, "[nvFuser] ○ ") + name_padded + return ( + status_part + " " + colorize(colors.YELLOW, "Not found (optional)") + ) + else: + status_part = colorize(colors.BOLD_RED, "[nvFuser] ✗ ") + name_padded + return status_part + " " + colorize(colors.BOLD_RED, "Not found") + else: + return ( + colorize(colors.BOLD_RED, "[nvFuser] ✗ ") + + name_padded + + " " + + colorize(colors.BOLD_RED, "validation failed") + ) diff --git a/python/tools/prereqs/requirements/compiler.py b/python/tools/prereqs/requirements/compiler.py new file mode 100644 index 00000000000..0cba454234b --- /dev/null +++ b/python/tools/prereqs/requirements/compiler.py @@ -0,0 +1,136 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""Compiler dependency requirement (GNU/Clang).""" + +from typing import Dict +from .base import VersionRequirement + + +class CompilerRequirement(VersionRequirement): + """ + C++ compiler requirement with name mapping. + + CMake variables used: + - CMAKE_CXX_COMPILER_ID: Compiler name (GNU or Clang) + - Compiler_FOUND: Whether compiler is available (always TRUE) + - CMAKE_CXX_COMPILER_VERSION: Detected compiler version + - CMAKE_CXX_COMPILER: Path to compiler executable + - NVFUSER_REQUIREMENT_Compiler_STATUS: Validation status + - NVFUSER_REQUIREMENT_Compiler_VERSION_MIN: Minimum required version (set based on compiler ID) + - NVFUSER_REQUIREMENT_Compiler_OPTIONAL: Whether compiler is optional + - NVFUSER_REQUIREMENT_GNU_VERSION_MIN: Minimum GNU version (from DependencyRequirements.cmake) + - NVFUSER_REQUIREMENT_Clang_VERSION_MIN: Minimum Clang version (from DependencyRequirements.cmake) + + Note: CMake exports "GNU" or "Clang" as the name, but variables are prefixed with "Compiler_" + + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize compiler requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Extract compiler name from CMake (GNU or Clang) + name = cmake_vars.get("CMAKE_CXX_COMPILER_ID", "Unknown") + + # Compiler uses "Compiler_" prefix for all variables, regardless of actual name + found_var = "Compiler_FOUND" + status_var = "NVFUSER_REQUIREMENT_Compiler_STATUS" + optional_var = "NVFUSER_REQUIREMENT_Compiler_OPTIONAL" + version_found_var = "CMAKE_CXX_COMPILER_VERSION" + version_required_var = "NVFUSER_REQUIREMENT_Compiler_VERSION_MIN" + location_var = "CMAKE_CXX_COMPILER" + + self.gnu_min_version = cmake_vars.get("NVFUSER_REQUIREMENT_GNU_VERSION_MIN") + self.clang_min_version = cmake_vars.get("NVFUSER_REQUIREMENT_Clang_VERSION_MIN") + + super().__init__( + name, + cmake_vars, + found_var, + status_var, + optional_var, + version_found_var, + version_required_var, + location_var, + ) + + def generate_help(self, platform_info): + """ + Generate compiler installation help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + # Use the version requirement that was set during initialization + version_min = self.version_required + + print(f"{self.name} {version_min}+ Required") + print() + print("Why: nvFuser requires a modern C++ compiler with C++20 support,") + print(" including the header.") + print() + print(f"Install {self.name} {version_min} or higher:") + print() + + os_type = platform_info["os"] + + if self.name == "GNU": + if os_type == "Linux": + if platform_info.get("ubuntu_based"): + print(" Option 1: Ubuntu PPA (recommended):") + print() + print(" sudo add-apt-repository ppa:ubuntu-toolchain-r/test") + print(" sudo apt update") + print(f" sudo apt install gcc-{version_min} g++-{version_min}") + print() + print(" # Set as default:") + print( + f" sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-{version_min} 100" + ) + print( + f" sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-{version_min} 100" + ) + print() + else: + print(" Option 1: System package manager:") + print() + print(" # Example for RHEL/CentOS:") + print(f" # sudo yum install gcc-toolset-{version_min}") + print() + + elif os_type == "Darwin": + print(" On macOS, use Clang instead:") + print() + print(" # Xcode Command Line Tools (includes Clang):") + print(" xcode-select --install") + print() + + elif self.name == "Clang": + if os_type == "Linux": + if platform_info.get("ubuntu_based"): + print(" Option 1: LLVM APT repository:") + print() + print(" wget https://apt.llvm.org/llvm.sh") + print(" chmod +x llvm.sh") + print(f" sudo ./llvm.sh {version_min}") + print() + else: + print(" Option 1: System package manager:") + print() + print(f" # Check your distribution for clang-{version_min}") + print() + + elif os_type == "Darwin": + print(" Option 1: Xcode Command Line Tools:") + print() + print(" xcode-select --install") + print() + + print(" Option 2: Build from source:") + print() + print(" # See compiler documentation for build instructions") + print() diff --git a/python/tools/prereqs/requirements/cuda_toolkit.py b/python/tools/prereqs/requirements/cuda_toolkit.py new file mode 100644 index 00000000000..8e6de01d389 --- /dev/null +++ b/python/tools/prereqs/requirements/cuda_toolkit.py @@ -0,0 +1,73 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""CUDA Toolkit dependency requirement.""" + +from typing import Dict +from .base import VersionRequirement + + +class CUDAToolkitRequirement(VersionRequirement): + """ + NVIDIA CUDA Toolkit requirement. + + CMake variables used: + - CUDAToolkit_FOUND: Whether CUDA was found + - CUDAToolkit_VERSION: Detected version (e.g., "13.1.80") + - CUDAToolkit_ROOT: Path to CUDA installation + - NVFUSER_REQUIREMENT_CUDAToolkit_STATUS: Validation status + - NVFUSER_REQUIREMENT_CUDAToolkit_VERSION_MIN: Minimum required version + - NVFUSER_REQUIREMENT_CUDAToolkit_OPTIONAL: Whether CUDA is optional + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize CUDAToolkit requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Define dependency name and CMake variable names for this requirement + name = "CUDAToolkit" + found_var = "CUDAToolkit_FOUND" + status_var = "NVFUSER_REQUIREMENT_CUDAToolkit_STATUS" + optional_var = "NVFUSER_REQUIREMENT_CUDAToolkit_OPTIONAL" + version_found_var = "CUDAToolkit_VERSION" + version_required_var = "NVFUSER_REQUIREMENT_CUDAToolkit_VERSION_MIN" + location_var = "CUDAToolkit_ROOT" + + super().__init__( + name, + cmake_vars, + found_var, + status_var, + optional_var, + version_found_var, + version_required_var, + location_var, + ) + + def generate_help(self, platform_info): + """ + Generate CUDA Toolkit installation help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + version_min = self.version_required + + print(f"CUDA Toolkit {version_min}+ Required") + print() + print("Why: nvFuser needs the CUDA compiler (nvcc) for GPU kernel generation.") + print() + print(f"Install CUDA Toolkit {version_min} or higher:") + print() + print(" Download from NVIDIA:") + print() + print(" https://developer.nvidia.com/cuda-downloads") + print() + print(" After installation, ensure CUDA is in your PATH:") + print() + print(" export PATH=/usr/local/cuda/bin:$PATH") + print(" export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH") + print() diff --git a/python/tools/prereqs/requirements/git_submodules.py b/python/tools/prereqs/requirements/git_submodules.py new file mode 100644 index 00000000000..3a48eda59d9 --- /dev/null +++ b/python/tools/prereqs/requirements/git_submodules.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""Git submodules dependency requirement.""" + +from typing import Dict +from .base import BooleanRequirement + + +class GitSubmodulesRequirement(BooleanRequirement): + """ + Git submodules initialization check. + + CMake variables used: + - GitSubmodules_FOUND: Whether submodules are initialized + - NVFUSER_REQUIREMENT_GitSubmodules_STATUS: Validation status + - NVFUSER_REQUIREMENT_GitSubmodules_OPTIONAL: Whether submodules are optional + + No version checking - simple pass/fail. + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize Git submodules requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Define dependency name and CMake variable names for this requirement + name = "Submodule status" + found_var = "GitSubmodules_FOUND" + status_var = "NVFUSER_REQUIREMENT_GitSubmodules_STATUS" + optional_var = "NVFUSER_REQUIREMENT_GitSubmodules_OPTIONAL" + location_var = "NVFUSER_REQUIREMENT_GitSubmodules_LOCATION_VAR" + + super().__init__( + name, cmake_vars, found_var, status_var, optional_var, location_var + ) + + def generate_help(self, platform_info): + """ + Generate Git submodules help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + print("Git Submodules Not Initialized") + print() + print( + "Why: nvFuser depends on third-party libraries included as Git submodules." + ) + print() + print("Initialize and update Git submodules:") + print() + print(" # From the repository root:") + print(" git submodule update --init --recursive") + print() + print(" # Or if you just cloned:") + print(" git clone --recursive ") + print() diff --git a/python/tools/prereqs/requirements/llvm.py b/python/tools/prereqs/requirements/llvm.py new file mode 100644 index 00000000000..9ec3d2d23c5 --- /dev/null +++ b/python/tools/prereqs/requirements/llvm.py @@ -0,0 +1,119 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""LLVM dependency requirement.""" + +from typing import Dict +from .base import VersionRequirement + + +class LLVMRequirement(VersionRequirement): + """ + LLVM requirement for Host IR JIT compilation. + + CMake variables used: + - LLVM_FOUND: Whether LLVM was found + - LLVM_VERSION: Detected version (e.g., "18.1.3") + - LLVM_DIR: Path to LLVM CMake config + - NVFUSER_REQUIREMENT_LLVM_STATUS: Validation status + - NVFUSER_REQUIREMENT_LLVM_VERSION_MIN: Minimum required version + - NVFUSER_REQUIREMENT_LLVM_OPTIONAL: Whether LLVM is optional + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize LLVM requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Define dependency name and CMake variable names for this requirement + name = "LLVM" + found_var = "LLVM_FOUND" + status_var = "NVFUSER_REQUIREMENT_LLVM_STATUS" + optional_var = "NVFUSER_REQUIREMENT_LLVM_OPTIONAL" + version_found_var = "LLVM_VERSION" + version_required_var = "NVFUSER_REQUIREMENT_LLVM_VERSION_MIN" + location_var = "LLVM_DIR" + + super().__init__( + name, + cmake_vars, + found_var, + status_var, + optional_var, + version_found_var, + version_required_var, + location_var, + ) + + def generate_help(self, platform_info): + """ + Generate LLVM installation help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + import re + + version_min = self.version_required + + # Parse version to recommend a specific patch version + try: + clean = re.match(r"^[\d.]+", version_min.strip()) + if clean: + parts = clean.group().rstrip(".").split(".") + version_parts = tuple(int(p) for p in parts if p) + if len(version_parts) >= 2: + recommended = f"{version_parts[0]}.{version_parts[1]}.8" + major_version = version_parts[0] + else: + recommended = f"{version_parts[0]}.1.8" + major_version = version_parts[0] + else: + recommended = "18.1.8" + major_version = 18 + except Exception: + recommended = "18.1.8" + major_version = 18 + + print(f"LLVM {version_min}+ Required") + print() + print("Why: nvFuser uses LLVM for runtime Host IR JIT compilation.") + print() + print(f"Install LLVM {recommended} (recommended):") + print() + + print(" Option 1: Prebuilt binaries (recommended, no sudo needed):") + print() + print( + f" wget https://github.com/llvm/llvm-project/releases/download/llvmorg-{recommended}/clang+llvm-{recommended}-x86_64-linux-gnu-ubuntu-18.04.tar.xz" + ) + print(f" tar -xf clang+llvm-{recommended}-*.tar.xz") + print(f" mv clang+llvm-{recommended}-* ~/.llvm/{recommended}") + print() + print(" # Add to PATH:") + print(f" export PATH=$HOME/.llvm/{recommended}/bin:$PATH") + print() + + print(" Option 2: System package manager:") + print() + + os_type = platform_info["os"] + + if os_type == "Linux": + if platform_info.get("ubuntu_based"): + print(" # Ubuntu/Debian (LLVM APT repository):") + print(" wget https://apt.llvm.org/llvm.sh") + print(" chmod +x llvm.sh") + print(f" sudo ./llvm.sh {major_version}") + print() + else: + print(" # Check your distribution's package manager") + print() + elif os_type == "Darwin": + print(f" brew install llvm@{major_version}") + print() + print(" # Add to PATH:") + print(f" export PATH=/opt/homebrew/opt/llvm@{major_version}/bin:$PATH") + print() diff --git a/python/tools/prereqs/requirements/ninja.py b/python/tools/prereqs/requirements/ninja.py new file mode 100644 index 00000000000..514f24f2d11 --- /dev/null +++ b/python/tools/prereqs/requirements/ninja.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""Ninja build system dependency requirement.""" + +from typing import Dict +from .base import BooleanRequirement + + +class NinjaRequirement(BooleanRequirement): + """ + Ninja build system check. + + CMake variables used: + - Ninja_FOUND: Whether Ninja is available + - NVFUSER_REQUIREMENT_Ninja_STATUS: Validation status + - NVFUSER_REQUIREMENT_Ninja_OPTIONAL: Whether Ninja is optional + + No version checking - just verifies Ninja is available. + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize Ninja requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Define dependency name and CMake variable names for this requirement + name = "Ninja" + found_var = "Ninja_FOUND" + status_var = "NVFUSER_REQUIREMENT_Ninja_STATUS" + optional_var = "NVFUSER_REQUIREMENT_Ninja_OPTIONAL" + location_var = "" + + super().__init__( + name, cmake_vars, found_var, status_var, optional_var, location_var + ) + + def generate_help(self, platform_info): + """ + Generate Ninja installation help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + print("Ninja Build System") + print() + print( + "Why: Ninja is a fast build system used by nvFuser for faster compilation." + ) + print() + print("Install Ninja:") + print() + + os_type = platform_info["os"] + + if os_type == "Linux": + if platform_info.get("ubuntu_based"): + print(" Option 1: Ubuntu/Debian:") + print() + print(" sudo apt update") + print(" sudo apt install ninja-build") + print() + else: + print(" Option 1: System package manager:") + print() + print(" # Example for RHEL/CentOS:") + print(" # sudo yum install ninja-build") + print() + + elif os_type == "Darwin": + print(" Option 1: Homebrew:") + print() + print(" brew install ninja") + print() + + print(" Option 2: pip:") + print() + print(" pip install ninja") + print() + print(" Configuring CMake") + print() + print(" Pass Ninja as the CMake Generator") + print(" CMake -G Ninja") + print() diff --git a/python/tools/prereqs/requirements/nvmmh.py b/python/tools/prereqs/requirements/nvmmh.py new file mode 100644 index 00000000000..8ecff36595a --- /dev/null +++ b/python/tools/prereqs/requirements/nvmmh.py @@ -0,0 +1,63 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""nvidia-matmul-heuristics dependency requirement.""" + +from typing import Dict +from .base import BooleanRequirement + + +class NVMMHRequirement(BooleanRequirement): + """ + nvidia-matmul-heuristics check. + + CMake variables used: + - NVMMH_FOUND: Whether nvidia-matmul-heuristics is available + - NVFUSER_REQUIREMENT_NVMMH_STATUS: Validation status + - NVFUSER_REQUIREMENT_NVMMH_OPTIONAL: Whether NVMMH is optional + + No version checking - just verifies nvidia-matmul-heuristics headers are available. + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize nvidia-matmul-heuristics requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Define dependency name and CMake variable names for this requirement + name = "NVMMH" + found_var = "NVMMH_FOUND" + status_var = "NVFUSER_REQUIREMENT_NVMMH_STATUS" + optional_var = "NVFUSER_REQUIREMENT_NVMMH_OPTIONAL" + location_var = "NVMMH_INCLUDE_DIR" + + super().__init__( + name, cmake_vars, found_var, status_var, optional_var, location_var + ) + + def generate_help(self, platform_info): + """ + Generate nvidia-matmul-heuristics installation help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + print("nvidia-matmul-heuristics (NVMMH)") + print() + print( + "Why: nvidia-matmul-heuristics provides optimized matrix multiplication heuristics for nvFuser." + ) + print() + print("Install nvidia-matmul-heuristics:") + print() + print(" Recommended: pip installation:") + print() + print(" pip install nvidia-matmul-heuristics") + print() + print(" Note: This is an optional dependency. nvFuser will build without it,") + print( + " but matmul operations may not have access to optimized heuristics." + ) + print() diff --git a/python/tools/prereqs/requirements/pybind11.py b/python/tools/prereqs/requirements/pybind11.py new file mode 100644 index 00000000000..a38b9f4c023 --- /dev/null +++ b/python/tools/prereqs/requirements/pybind11.py @@ -0,0 +1,68 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""pybind11 dependency requirement.""" + +from typing import Dict +from .base import VersionRequirement + + +class Pybind11Requirement(VersionRequirement): + """ + pybind11 requirement for Python bindings. + + CMake variables used: + - pybind11_FOUND: Whether pybind11 was found + - pybind11_VERSION: Detected version (e.g., "3.0.1") + - pybind11_DIR: Path to pybind11 CMake config + - NVFUSER_REQUIREMENT_pybind11_STATUS: Validation status + - NVFUSER_REQUIREMENT_pybind11_VERSION_MIN: Minimum required version + - NVFUSER_REQUIREMENT_pybind11_OPTIONAL: Whether pybind11 is optional + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize pybind11 requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Define dependency name and CMake variable names for this requirement + name = "pybind11" + found_var = "pybind11_FOUND" + status_var = "NVFUSER_REQUIREMENT_pybind11_STATUS" + optional_var = "NVFUSER_REQUIREMENT_pybind11_OPTIONAL" + version_found_var = "pybind11_VERSION" + version_required_var = "NVFUSER_REQUIREMENT_pybind11_VERSION_MIN" + location_var = "pybind11_DIR" + + super().__init__( + name, + cmake_vars, + found_var, + status_var, + optional_var, + version_found_var, + version_required_var, + location_var, + ) + + def generate_help(self, platform_info): + """ + Generate pybind11 installation help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + version_min = self.version_required + + print(f"pybind11 {version_min}+ Required") + print() + print("Why: pybind11 provides Python bindings for nvFuser's C++ code.") + print() + print(f"Install pybind11 {version_min} or higher:") + print() + print(f" pip install 'pybind11[global]>={version_min}'") + print() + print(" Note: The [global] extra provides CMake integration.") + print() diff --git a/python/tools/prereqs/requirements/python.py b/python/tools/prereqs/requirements/python.py new file mode 100644 index 00000000000..5740185b771 --- /dev/null +++ b/python/tools/prereqs/requirements/python.py @@ -0,0 +1,94 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""Python dependency requirement.""" + +from typing import Dict +from .base import VersionRequirement + + +class PythonRequirement(VersionRequirement): + """ + Python interpreter requirement. + + CMake variables used: + - Python_FOUND: Whether Python was found + - Python_VERSION: Detected version (e.g., "3.12.3") + - Python_EXECUTABLE: Path to python binary + - NVFUSER_REQUIREMENT_Python_STATUS: Validation status + - NVFUSER_REQUIREMENT_Python_VERSION_MIN: Minimum required version + - NVFUSER_REQUIREMENT_Python_OPTIONAL: Whether Python is optional + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize Python requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Define dependency name and CMake variable names for this requirement + name = "Python" + found_var = "Python_FOUND" + status_var = "NVFUSER_REQUIREMENT_Python_STATUS" + optional_var = "NVFUSER_REQUIREMENT_Python_OPTIONAL" + version_found_var = "Python_VERSION" + version_required_var = "NVFUSER_REQUIREMENT_Python_VERSION_MIN" + location_var = "Python_EXECUTABLE" + + super().__init__( + name, + cmake_vars, + found_var, + status_var, + optional_var, + version_found_var, + version_required_var, + location_var, + ) + + def generate_help(self, platform_info): + """ + Generate Python installation help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + version_min = self.version_required + + print(f"Python {version_min}+ Required") + print() + print( + "Why: nvFuser requires modern Python with type hints and language features." + ) + print() + print(f"Install Python {version_min} or higher:") + print() + + os_type = platform_info["os"] + + if os_type == "Linux": + if platform_info.get("ubuntu_based"): + print(" Option 1: Ubuntu/Debian system package:") + print() + print(" sudo apt update") + print(f" sudo apt install python{version_min}") + print() + else: + print(" Option 1: System package manager:") + print() + print(" # Example for RHEL/CentOS:") + print(f" # sudo yum install python{version_min}") + print() + + elif os_type == "Darwin": + print(" Option 1: Homebrew:") + print() + print(f" brew install python@{version_min}") + print() + + print(" Option 2: Conda:") + print() + print(f" conda create -n nvfuser python={version_min}") + print(" conda activate nvfuser") + print() diff --git a/python/tools/prereqs/requirements/torch.py b/python/tools/prereqs/requirements/torch.py new file mode 100644 index 00000000000..eec2e108546 --- /dev/null +++ b/python/tools/prereqs/requirements/torch.py @@ -0,0 +1,177 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +"""PyTorch dependency requirement with CUDA constraint validation.""" + +from typing import Dict +from .base import VersionRequirement +from ..colors import colorize + + +class TorchRequirement(VersionRequirement): + """ + PyTorch requirement with CUDA version constraint checking. + + CMake variables used: + - Torch_FOUND: Whether Torch was found + - Torch_VERSION: Detected PyTorch version + - Torch_DIR: Path to Torch CMake config + - NVFUSER_REQUIREMENT_Torch_STATUS: Validation status + - NVFUSER_REQUIREMENT_Torch_VERSION_MIN: Minimum required version + - NVFUSER_REQUIREMENT_Torch_OPTIONAL: Whether Torch is optional + - Torch_CUDA_constraint_status: CUDA constraint validation result + - "match": Torch CUDA == CUDAToolkit version + - "mismatch": Versions don't match (FAILURE) + - "not_available": Torch built without CUDA + - Torch_CUDA_constraint_version: CUDA version if match + - Torch_CUDA_constraint_found: Torch's CUDA version (if mismatch) + - Torch_CUDA_constraint_required: System's CUDA Toolkit version (if mismatch) + + Special: Also validates CUDA version constraint + """ + + def __init__(self, cmake_vars: Dict): + """ + Initialize Torch requirement. + + Args: + cmake_vars: Dictionary of all CMake variables + """ + # Define dependency name and CMake variable names for this requirement + name = "Torch" + found_var = "Torch_FOUND" + status_var = "NVFUSER_REQUIREMENT_Torch_STATUS" + optional_var = "NVFUSER_REQUIREMENT_Torch_OPTIONAL" + version_found_var = "Torch_VERSION" + version_required_var = "NVFUSER_REQUIREMENT_Torch_VERSION_MIN" + location_var = "Torch_DIR" + + super().__init__( + name, + cmake_vars, + found_var, + status_var, + optional_var, + version_found_var, + version_required_var, + location_var, + ) + + # Extract Torch CUDA constraint variables from cmake_vars + self.constraint_status = cmake_vars.get(f"{name}_CUDA_constraint_status") + self.constraint_version = cmake_vars.get(f"{name}_CUDA_constraint_version") + self.constraint_found = cmake_vars.get(f"{name}_CUDA_constraint_found") + self.constraint_required = cmake_vars.get(f"{name}_CUDA_constraint_required") + + def format_status_line(self, colors) -> str: + """Format with both Torch version and CUDA constraint.""" + # Main Torch version line (base class handles location) + main_line = super().format_status_line(colors) + + # Add CUDA constraint line + constraint_line = self._format_cuda_constraint(colors) + + if constraint_line: + return main_line + "\n" + constraint_line + else: + return main_line + + def _format_cuda_constraint(self, colors) -> str: + """Format CUDA constraint validation line.""" + if not self.constraint_status: + return "" + + # Use same padding as main dependency name + name_padded = f"{'Torch_CUDA':<15}" + + if self.constraint_status == "match": + cuda_version = self.constraint_version or "unknown" + status_part = colorize(colors.GREEN, "[nvFuser] ✓") + " " + name_padded + # Use cyan for the CUDA version/result, matching location color + version_part = colorize( + colors.CYAN, f"{cuda_version} (Torch.CUDA == CUDAToolkit)" + ) + return f"{status_part} {version_part}" + elif self.constraint_status == "mismatch": + torch_cuda = self.constraint_found or "unknown" + toolkit_cuda = self.constraint_required or "unknown" + status_part = colorize(colors.BOLD_RED, "[nvFuser] ✗") + " " + name_padded + error_part = colorize( + colors.BOLD_RED, + f"mismatch (Torch: {torch_cuda}, CUDAToolkit: {toolkit_cuda})", + ) + return f"{status_part} {error_part}" + elif self.constraint_status == "not_available": + status_part = colorize(colors.YELLOW, "[nvFuser] ○") + " " + name_padded + message_part = colorize(colors.YELLOW, "Torch built without CUDA") + return f"{status_part} {message_part}" + else: + return "" + + def is_failure(self) -> bool: + """Check for both version failure and CUDA constraint failure.""" + # Check base version requirement + if super().is_failure(): + return True + + # Check CUDA constraint + if self.constraint_status == "mismatch": + return True + + return False + + def generate_help(self, platform_info): + """ + Generate PyTorch installation help. + + Args: + platform_info: Platform detection dict from detect_platform() + """ + version_min = self.version_required + + print(f"PyTorch {version_min}+ Required") + print() + print( + "Why: nvFuser is a PyTorch extension and requires PyTorch with CUDA support." + ) + print() + print(f"Install PyTorch {version_min} or higher with CUDA:") + print() + + # Show common CUDA versions + print(" # For CUDA 13.1:") + print(" pip install torch --index-url https://download.pytorch.org/whl/cu131") + print(" # For CUDA 13.0:") + print(" pip install torch --index-url https://download.pytorch.org/whl/cu130") + print(" # For CUDA 12.8:") + print(" pip install torch --index-url https://download.pytorch.org/whl/cu128") + print() + + # If CUDA constraint mismatch, add additional help + if self.constraint_status == "mismatch": + print() + print("IMPORTANT: Torch CUDA Version Mismatch Detected") + print() + print( + "Why: PyTorch was built with a different CUDA version than your system's" + ) + print(" CUDA Toolkit. This will cause runtime errors.") + print() + print("Resolution:") + print() + print(" You have two options:") + print() + print(" Option 1: Install matching CUDA Toolkit (recommended)") + print() + print( + " Install the CUDA Toolkit version that matches your PyTorch build." + ) + print( + " Check PyTorch CUDA version: python -c 'import torch; print(torch.version.cuda)'" + ) + print() + print(" Option 2: Reinstall PyTorch for your CUDA version") + print() + print(" Reinstall PyTorch built for your system's CUDA Toolkit version.") + print(" Check system CUDA version: nvcc --version") + print() diff --git a/python/utils.py b/python/utils.py index fd26a4e5400..272d347c23e 100644 --- a/python/utils.py +++ b/python/utils.py @@ -35,7 +35,7 @@ class BuildConfig: install_requires: list = field(default_factory=list) extras_require: dict = field(default_factory=dict) cpp_standard: int = 20 - cutlass_max_jobs: int = 0 + cutlass_max_jobs: int | None = None enable_pch: bool = False @@ -472,15 +472,16 @@ def on_or_off(flag: bool) -> str: f"-DNVFUSER_EXPLICIT_ERROR_CHECK={on_or_off(config.explicit_error_check)}", f"-DBUILD_TEST={on_or_off(not config.no_test)}", f"-DBUILD_PYTHON={on_or_off(not config.no_python)}", - f"-DBUILD_CUTLASS={on_or_off(not config.no_cutlass)}", + f"-DNVFUSER_DISABLE_CUTLASS={on_or_off(config.no_cutlass)}", f"-DPython_EXECUTABLE={sys.executable}", f"-DBUILD_NVFUSER_BENCHMARK={on_or_off(not config.no_benchmark)}", f"-DNVFUSER_DISTRIBUTED={on_or_off(not config.build_without_distributed)}", - f"-DCUTLASS_MAX_JOBS={config.cutlass_max_jobs}", f"-DNVFUSER_USE_PCH={on_or_off(config.enable_pch)}", "-B", cmake_build_dir, ] + if config.cutlass_max_jobs: + cmd_str.append(f"-DCUTLASS_MAX_JOBS={config.cutlass_max_jobs}") if config.nvmmh_include_dir: cmd_str.append(f"-DNVMMH_INCLUDE_DIR={config.nvmmh_include_dir}") if not config.no_ninja: diff --git a/requirements.txt b/requirements.txt index 7f3639c99fc..f3e8ee3b6a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ -pybind11[global] +pybind11[global]>=3.0 ninja +nvidia-matmul-heuristics