hf_text-generation-inference/backends/trtllm/build.rs

111 lines
4.0 KiB
Rust
Raw Normal View History

use std::env;
use std::path::PathBuf;
use cxx_build::CFG;
const ADDITIONAL_BACKEND_LINK_LIBRARIES: [&str; 2] = ["spdlog", "fmt"];
fn main() {
2024-07-03 02:27:53 -06:00
// Misc variables
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let build_profile = env::var("PROFILE").unwrap();
2024-07-03 02:27:53 -06:00
let is_debug = match build_profile.as_ref() {
"debug" => true,
_ => false,
};
// Build the backend implementation through CMake
let backend_path = cmake::Config::new(".")
.uses_cxx11()
.generator("Ninja")
2024-07-03 02:27:53 -06:00
.profile(match is_debug {
true => "Debug",
false => "Release",
})
.define("CMAKE_CUDA_COMPILER", "/usr/local/cuda/bin/nvcc")
.build();
// Additional transitive CMake dependencies
let deps_folder = out_dir.join("build").join("_deps");
for dependency in ADDITIONAL_BACKEND_LINK_LIBRARIES {
let dep_name = match build_profile.as_ref() {
"debug" => format!("{}d", dependency),
_ => String::from(dependency),
};
println!("cargo:rustc-link-lib=static={}", dep_name);
}
// Build the FFI layer calling the backend above
CFG.include_prefix = "backends/trtllm";
cxx_build::bridge("src/lib.rs")
.static_flag(true)
.include(deps_folder.join("fmt-src").join("include"))
.include(deps_folder.join("spdlog-src").join("include"))
.include(deps_folder.join("json-src").join("include"))
.include(deps_folder.join("trtllm-src").join("cpp").join("include"))
.include("/usr/local/cuda/include")
.include("/usr/local/tensorrt/include")
.file("src/ffi.cpp")
.std("c++20")
.compile("tgi_trtllm_backend");
println!("cargo:rerun-if-changed=CMakeLists.txt");
println!("cargo:rerun-if-changed=include/backend.h");
println!("cargo:rerun-if-changed=lib/backend.cpp");
println!("cargo:rerun-if-changed=include/ffi.h");
println!("cargo:rerun-if-changed=src/ffi.cpp");
// Emit linkage information
// - tgi_trtllm_backend (i.e. FFI layer - src/ffi.cpp)
let trtllm_lib_path = deps_folder
.join("trtllm-src")
.join("cpp")
.join("tensorrt_llm");
let trtllm_executor_linker_search_path =
trtllm_lib_path.join("executor").join("x86_64-linux-gnu");
// TRTLLM libtensorrt_llm_nvrtc_wrapper.so
let trtllm_nvrtc_linker_search_path = trtllm_lib_path
.join("kernels")
.join("decoderMaskedMultiheadAttention")
.join("decoderXQAImplJIT")
.join("nvrtcWrapper")
.join("x86_64-linux-gnu");
println!(r"cargo:rustc-link-search=native=/usr/local/cuda/lib64");
println!(r"cargo:rustc-link-search=native=/usr/local/cuda/lib64/stubs");
println!(r"cargo:rustc-link-search=native=/usr/local/tensorrt/lib");
println!(r"cargo:rustc-link-search=native={}", backend_path.display());
// println!(
// r"cargo:rustc-link-search=native={}/build",
// backend_path.display()
// );
println!(
r"cargo:rustc-link-search=native={}",
backend_path.join("lib").display()
);
println!(
r"cargo:rustc-link-search=native={}",
trtllm_executor_linker_search_path.display()
);
println!(
r"cargo:rustc-link-search=native={}",
trtllm_nvrtc_linker_search_path.display()
);
println!("cargo:rustc-link-lib=dylib=cuda");
println!("cargo:rustc-link-lib=dylib=cudart");
println!("cargo:rustc-link-lib=dylib=cublas");
println!("cargo:rustc-link-lib=dylib=cublasLt");
println!("cargo:rustc-link-lib=dylib=mpi");
println!("cargo:rustc-link-lib=dylib=nvidia-ml");
println!("cargo:rustc-link-lib=dylib=nvinfer");
println!("cargo:rustc-link-lib=dylib=nvinfer_plugin_tensorrt_llm");
println!("cargo:rustc-link-lib=dylib=tensorrt_llm_nvrtc_wrapper");
println!("cargo:rustc-link-lib=static=tensorrt_llm_executor_static");
println!("cargo:rustc-link-lib=dylib=tensorrt_llm");
println!("cargo:rustc-link-lib=static=tgi_trtllm_backend_impl");
println!("cargo:rustc-link-lib=static=tgi_trtllm_backend");
}