hf_text-generation-inference/backends/trtllm/build.rs

use std::env;
use std::path::PathBuf;

use cxx_build::CFG;

const ADDITIONAL_BACKEND_LINK_LIBRARIES: [&str; 2] = ["spdlog", "fmt"];

fn main() {
    // Misc variables
    let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
    let build_profile = env::var("PROFILE").unwrap();
    let is_debug = match build_profile.as_ref() {
        "debug" => true,
        _ => false,
    };

    // Build the backend implementation through CMake
    let backend_path = cmake::Config::new(".")
        .uses_cxx11()
        .generator("Ninja")
        .profile(match is_debug {
            true => "Debug",
            false => "Release",
        })
        .define("CMAKE_CUDA_COMPILER", "/usr/local/cuda/bin/nvcc")
        .build();

    // Additional transitive CMake dependencies
    let deps_folder = out_dir.join("build").join("_deps");

    for dependency in ADDITIONAL_BACKEND_LINK_LIBRARIES {
        let dep_name = match build_profile.as_ref() {
            "debug" => format!("{}d", dependency),
            _ => String::from(dependency),
        };
        println!("cargo:rustc-link-lib=static={}", dep_name);
    }

    // Build the FFI layer calling the backend above
    CFG.include_prefix = "backends/trtllm";
    cxx_build::bridge("src/lib.rs")
        .static_flag(true)
        .include(deps_folder.join("fmt-src").join("include"))
        .include(deps_folder.join("spdlog-src").join("include"))
        .include(deps_folder.join("json-src").join("include"))
        .include(deps_folder.join("trtllm-src").join("cpp").join("include"))
        .include("/usr/local/cuda/include")
        .include("/usr/local/tensorrt/include")
        .file("src/ffi.cpp")
        .std("c++20")
        .compile("tgi_trtllm_backend");

    println!("cargo:rerun-if-changed=CMakeLists.txt");
    println!("cargo:rerun-if-changed=include/backend.h");
    println!("cargo:rerun-if-changed=lib/backend.cpp");
    println!("cargo:rerun-if-changed=include/ffi.h");
    println!("cargo:rerun-if-changed=src/ffi.cpp");

    // Emit linkage information
    // - tgi_trtllm_backend (i.e. FFI layer - src/ffi.cpp)
    let trtllm_lib_path = deps_folder
        .join("trtllm-src")
        .join("cpp")
        .join("tensorrt_llm");

    let trtllm_executor_linker_search_path =
        trtllm_lib_path.join("executor").join("x86_64-linux-gnu");

    // TRTLLM libtensorrt_llm_nvrtc_wrapper.so
    let trtllm_nvrtc_linker_search_path = trtllm_lib_path
        .join("kernels")
        .join("decoderMaskedMultiheadAttention")
        .join("decoderXQAImplJIT")
        .join("nvrtcWrapper")
        .join("x86_64-linux-gnu");

    println!(r"cargo:rustc-link-search=native=/usr/local/cuda/lib64");
    println!(r"cargo:rustc-link-search=native=/usr/local/cuda/lib64/stubs");
    println!(r"cargo:rustc-link-search=native=/usr/local/tensorrt/lib");
    println!(r"cargo:rustc-link-search=native={}", backend_path.display());
    // println!(
    //     r"cargo:rustc-link-search=native={}/build",
    //     backend_path.display()
    // );
    println!(
        r"cargo:rustc-link-search=native={}",
        backend_path.join("lib").display()
    );
    println!(
        r"cargo:rustc-link-search=native={}",
        trtllm_executor_linker_search_path.display()
    );
    println!(
        r"cargo:rustc-link-search=native={}",
        trtllm_nvrtc_linker_search_path.display()
    );
    println!("cargo:rustc-link-lib=dylib=cuda");
    println!("cargo:rustc-link-lib=dylib=cudart");
    println!("cargo:rustc-link-lib=dylib=cublas");
    println!("cargo:rustc-link-lib=dylib=cublasLt");
    println!("cargo:rustc-link-lib=dylib=mpi");
    println!("cargo:rustc-link-lib=dylib=nvidia-ml");
    println!("cargo:rustc-link-lib=dylib=nvinfer");
    println!("cargo:rustc-link-lib=dylib=nvinfer_plugin_tensorrt_llm");
    println!("cargo:rustc-link-lib=dylib=tensorrt_llm_nvrtc_wrapper");
    println!("cargo:rustc-link-lib=static=tensorrt_llm_executor_static");
    println!("cargo:rustc-link-lib=dylib=tensorrt_llm");
    println!("cargo:rustc-link-lib=static=tgi_trtllm_backend_impl");
    println!("cargo:rustc-link-lib=static=tgi_trtllm_backend");
}
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`use std::env;`
			`use std::path::PathBuf;`

make sure to track include/ffi.h to trigger rebuild from cargo 2024-07-12 13:26:04 -06:00			`use cxx_build::CFG;`

Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`const ADDITIONAL_BACKEND_LINK_LIBRARIES: [&str; 2] = ["spdlog", "fmt"];`

Initial setup for CXX binding to TRTLLM 2024-06-30 15:37:20 -06:00			`fn main() {`
Enable end to end CMake build 2024-07-03 02:27:53 -06:00			`// Misc variables`
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());`
			`let build_profile = env::var("PROFILE").unwrap();`
Enable end to end CMake build 2024-07-03 02:27:53 -06:00			`let is_debug = match build_profile.as_ref() {`
			`"debug" => true,`
			`_ => false,`
			`};`

Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`// Build the backend implementation through CMake`
Initial setup for CXX binding to TRTLLM 2024-06-30 15:37:20 -06:00			`let backend_path = cmake::Config::new(".")`
			`.uses_cxx11()`
			`.generator("Ninja")`
Enable end to end CMake build 2024-07-03 02:27:53 -06:00			`.profile(match is_debug {`
			`true => "Debug",`
			`false => "Release",`
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`})`
include guard to build example in cmakelists 2024-07-11 15:24:01 -06:00			`.define("CMAKE_CUDA_COMPILER", "/usr/local/cuda/bin/nvcc")`
Initial setup for CXX binding to TRTLLM 2024-06-30 15:37:20 -06:00			`.build();`

include guard to build example in cmakelists 2024-07-11 15:24:01 -06:00			`// Additional transitive CMake dependencies`
			`let deps_folder = out_dir.join("build").join("_deps");`

			`for dependency in ADDITIONAL_BACKEND_LINK_LIBRARIES {`
			`let dep_name = match build_profile.as_ref() {`
			`"debug" => format!("{}d", dependency),`
			`_ => String::from(dependency),`
			`};`
			`println!("cargo:rustc-link-lib=static={}", dep_name);`
			`}`

Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`// Build the FFI layer calling the backend above`
Initial setup for CXX binding to TRTLLM 2024-06-30 15:37:20 -06:00			`CFG.include_prefix = "backends/trtllm";`
			`cxx_build::bridge("src/lib.rs")`
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`.static_flag(true)`
include guard to build example in cmakelists 2024-07-11 15:24:01 -06:00			`.include(deps_folder.join("fmt-src").join("include"))`
			`.include(deps_folder.join("spdlog-src").join("include"))`
			`.include(deps_folder.join("json-src").join("include"))`
			`.include(deps_folder.join("trtllm-src").join("cpp").join("include"))`
			`.include("/usr/local/cuda/include")`
			`.include("/usr/local/tensorrt/include")`
Initial setup for CXX binding to TRTLLM 2024-06-30 15:37:20 -06:00			`.file("src/ffi.cpp")`
			`.std("c++20")`
			`.compile("tgi_trtllm_backend");`

Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`println!("cargo:rerun-if-changed=CMakeLists.txt");`
Initial setup for CXX binding to TRTLLM 2024-06-30 15:37:20 -06:00			`println!("cargo:rerun-if-changed=include/backend.h");`
			`println!("cargo:rerun-if-changed=lib/backend.cpp");`
make sure to track include/ffi.h to trigger rebuild from cargo 2024-07-12 13:26:04 -06:00			`println!("cargo:rerun-if-changed=include/ffi.h");`
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`println!("cargo:rerun-if-changed=src/ffi.cpp");`

			`// Emit linkage information`
			`// - tgi_trtllm_backend (i.e. FFI layer - src/ffi.cpp)`
include guard to build example in cmakelists 2024-07-11 15:24:01 -06:00			`let trtllm_lib_path = deps_folder`
			`.join("trtllm-src")`
			`.join("cpp")`
			`.join("tensorrt_llm");`
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00
include guard to build example in cmakelists 2024-07-11 15:24:01 -06:00			`let trtllm_executor_linker_search_path =`
			`trtllm_lib_path.join("executor").join("x86_64-linux-gnu");`

			`// TRTLLM libtensorrt_llm_nvrtc_wrapper.so`
			`let trtllm_nvrtc_linker_search_path = trtllm_lib_path`
			`.join("kernels")`
			`.join("decoderMaskedMultiheadAttention")`
			`.join("decoderXQAImplJIT")`
			`.join("nvrtcWrapper")`
			`.join("x86_64-linux-gnu");`

			`println!(r"cargo:rustc-link-search=native=/usr/local/cuda/lib64");`
			`println!(r"cargo:rustc-link-search=native=/usr/local/cuda/lib64/stubs");`
			`println!(r"cargo:rustc-link-search=native=/usr/local/tensorrt/lib");`
			`println!(r"cargo:rustc-link-search=native={}", backend_path.display());`
			`// println!(`
			`// r"cargo:rustc-link-search=native={}/build",`
			`// backend_path.display()`
			`// );`
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`println!(`
include guard to build example in cmakelists 2024-07-11 15:24:01 -06:00			`r"cargo:rustc-link-search=native={}",`
			`backend_path.join("lib").display()`
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`);`
include guard to build example in cmakelists 2024-07-11 15:24:01 -06:00			`println!(`
			`r"cargo:rustc-link-search=native={}",`
			`trtllm_executor_linker_search_path.display()`
			`);`
			`println!(`
			`r"cargo:rustc-link-search=native={}",`
			`trtllm_nvrtc_linker_search_path.display()`
			`);`
			`println!("cargo:rustc-link-lib=dylib=cuda");`
			`println!("cargo:rustc-link-lib=dylib=cudart");`
			`println!("cargo:rustc-link-lib=dylib=cublas");`
			`println!("cargo:rustc-link-lib=dylib=cublasLt");`
			`println!("cargo:rustc-link-lib=dylib=mpi");`
			`println!("cargo:rustc-link-lib=dylib=nvidia-ml");`
			`println!("cargo:rustc-link-lib=dylib=nvinfer");`
			`println!("cargo:rustc-link-lib=dylib=nvinfer_plugin_tensorrt_llm");`
			`println!("cargo:rustc-link-lib=dylib=tensorrt_llm_nvrtc_wrapper");`
			`println!("cargo:rustc-link-lib=static=tensorrt_llm_executor_static");`
			`println!("cargo:rustc-link-lib=dylib=tensorrt_llm");`
Working FFI call for TGI and TRTLLM backend 2024-07-01 07:53:23 -06:00			`println!("cargo:rustc-link-lib=static=tgi_trtllm_backend_impl");`
include guard to build example in cmakelists 2024-07-11 15:24:01 -06:00			`println!("cargo:rustc-link-lib=static=tgi_trtllm_backend");`
Initial setup for CXX binding to TRTLLM 2024-06-30 15:37:20 -06:00			`}`