hf_text-generation-inference/backends/llamacpp/CMakeLists.txt

cmake_minimum_required(VERSION 3.24)

project(tgi-llama-cpp-backend VERSION 1.0.0)
set(CMAKE_CXX_STANDARD 23)

include(FetchContent)

set(LLAMA_CPP_TARGET_VERSION "b3837" CACHE STRING "Version of llama.cpp to build against")
set(LLAMA_BUILD_COMMON ON)
set(LLAMA_CPP_TARGET_CUDA_ARCHS "75-real;80-real;86-real;89-real;90-real" CACHE STRING "CUDA arch(s) to build")
option(LLAMA_CPP_BUILD_OFFLINE_RUNNER "Flag to build the standalone c++ backend runner")
option(LLAMA_CPP_BUILD_CUDA "Flag to build CUDA enabled inference through llama.cpp")

if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" AND ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
    message(STATUS "Targeting libc++")
    set(CMAKE_CXX_FLAGS -stdlib=libc++ ${CMAKE_CXX_FLAGS})
else ()
    message(STATUS "Not using libc++ ${CMAKE_CXX_COMPILER_ID} ${CMAKE_SYSTEM_NAME}")
endif ()

# Add dependencies
include(cmake/spdlog.cmake)

if (${LLAMA_CPP_BUILD_CUDA})
    message(STATUS "Enabling llama.cpp CUDA support")

    if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
        set(CMAKE_CUDA_ARCHITECTURES ${LLAMA_CPP_TARGET_CUDA_ARCHS})
    endif ()
    set(GGML_CUDA ON)
endif ()

# Download llama.cpp repo at the specific version
fetchcontent_declare(
        llama
        URL https://github.com/ggerganov/llama.cpp/archive/refs/tags/b4048.tar.gz
)

fetchcontent_makeavailable(llama)

add_library(tgi_llamacpp_backend_impl STATIC csrc/backend.hpp csrc/backend.cpp)
target_compile_features(tgi_llamacpp_backend_impl PRIVATE cxx_std_11)
target_link_libraries(tgi_llamacpp_backend_impl PUBLIC spdlog::spdlog llama common)
install(TARGETS tgi_llamacpp_backend_impl spdlog llama common)

if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
    target_compile_definitions(tgi_llamacpp_backend_impl PRIVATE TGI_LLAMACPP_BACKEND_DEBUG=1)
endif ()

if (${LLAMA_CPP_BUILD_OFFLINE_RUNNER})
    message(STATUS "Building llama.cpp offline runner")
    add_executable(tgi_llamacpp_offline_runner offline/main.cpp)

    target_link_libraries(tgi_llamacpp_offline_runner PUBLIC tgi_llamacpp_backend_impl llama common spdlog::spdlog)
endif ()
misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`cmake_minimum_required(VERSION 3.24)`
feat(llamacpp): initial commit # Conflicts: # Cargo.lock 2024-10-03 06:00:17 -06:00
			`project(tgi-llama-cpp-backend VERSION 1.0.0)`
misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`set(CMAKE_CXX_STANDARD 23)`
feat(llamacpp): initial commit # Conflicts: # Cargo.lock 2024-10-03 06:00:17 -06:00
			`include(FetchContent)`

misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`set(LLAMA_CPP_TARGET_VERSION "b3837" CACHE STRING "Version of llama.cpp to build against")`
feat(backend): tell cmake to build llama-common and link to it 2024-10-22 07:23:16 -06:00			`set(LLAMA_BUILD_COMMON ON)`
misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`set(LLAMA_CPP_TARGET_CUDA_ARCHS "75-real;80-real;86-real;89-real;90-real" CACHE STRING "CUDA arch(s) to build")`
feat(llamacpp): initial end2end build 2024-10-04 02:42:31 -06:00			`option(LLAMA_CPP_BUILD_OFFLINE_RUNNER "Flag to build the standalone c++ backend runner")`
			`option(LLAMA_CPP_BUILD_CUDA "Flag to build CUDA enabled inference through llama.cpp")`
feat(llamacpp): initial commit # Conflicts: # Cargo.lock 2024-10-03 06:00:17 -06:00
feat(backend): build and link through build.rs 2024-10-24 08:42:50 -06:00			`if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" AND ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")`
feat(backend): wip Rust binding 2024-10-24 01:56:40 -06:00			`message(STATUS "Targeting libc++")`
			`set(CMAKE_CXX_FLAGS -stdlib=libc++ ${CMAKE_CXX_FLAGS})`
feat(backend): build and link through build.rs 2024-10-24 08:42:50 -06:00			`else ()`
feat(backend): wip Rust binding 2024-10-24 01:56:40 -06:00			`message(STATUS "Not using libc++ ${CMAKE_CXX_COMPILER_ID} ${CMAKE_SYSTEM_NAME}")`
feat(backend): build and link through build.rs 2024-10-24 08:42:50 -06:00			`endif ()`
feat(backend): wip Rust binding 2024-10-24 01:56:40 -06:00
feat(llamacpp): initial commit # Conflicts: # Cargo.lock 2024-10-03 06:00:17 -06:00			`# Add dependencies`
			`include(cmake/spdlog.cmake)`

misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`if (${LLAMA_CPP_BUILD_CUDA})`
feat(llamacpp): initial end2end build 2024-10-04 02:42:31 -06:00			`message(STATUS "Enabling llama.cpp CUDA support")`
misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00
			`if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)`
			`set(CMAKE_CUDA_ARCHITECTURES ${LLAMA_CPP_TARGET_CUDA_ARCHS})`
			`endif ()`
feat(llamacpp): initial end2end build 2024-10-04 02:42:31 -06:00			`set(GGML_CUDA ON)`
misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`endif ()`
feat(llamacpp): initial end2end build 2024-10-04 02:42:31 -06:00
feat(llamacpp): initial commit # Conflicts: # Cargo.lock 2024-10-03 06:00:17 -06:00			`# Download llama.cpp repo at the specific version`
			`fetchcontent_declare(`
misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`llama`
misc(cmake): use URL base llama.cpp repo 2024-11-07 16:54:05 -07:00			`URL https://github.com/ggerganov/llama.cpp/archive/refs/tags/b4048.tar.gz`
feat(llamacpp): initial commit # Conflicts: # Cargo.lock 2024-10-03 06:00:17 -06:00			`)`

			`fetchcontent_makeavailable(llama)`

feat(backend): build and link through build.rs 2024-10-24 08:42:50 -06:00			`add_library(tgi_llamacpp_backend_impl STATIC csrc/backend.hpp csrc/backend.cpp)`
			`target_compile_features(tgi_llamacpp_backend_impl PRIVATE cxx_std_11)`
misc(cmake): use URL base llama.cpp repo 2024-11-07 16:54:05 -07:00			`target_link_libraries(tgi_llamacpp_backend_impl PUBLIC spdlog::spdlog llama common)`
feat(backend): build and link through build.rs 2024-10-24 08:42:50 -06:00			`install(TARGETS tgi_llamacpp_backend_impl spdlog llama common)`
feat(llamacpp): initial end2end build 2024-10-04 02:42:31 -06:00
misc(build): refactor build type detection in cmake 2024-10-25 00:02:45 -06:00			`if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")`
			`target_compile_definitions(tgi_llamacpp_backend_impl PRIVATE TGI_LLAMACPP_BACKEND_DEBUG=1)`
			`endif ()`

misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`if (${LLAMA_CPP_BUILD_OFFLINE_RUNNER})`
feat(llamacpp): initial end2end build 2024-10-04 02:42:31 -06:00			`message(STATUS "Building llama.cpp offline runner")`
misc(build): refactor build type detection in cmake 2024-10-25 00:02:45 -06:00			`add_executable(tgi_llamacpp_offline_runner offline/main.cpp)`
feat(backend): tell cmake to build llama-common and link to it 2024-10-22 07:23:16 -06:00
misc(cmake): use URL base llama.cpp repo 2024-11-07 16:54:05 -07:00			`target_link_libraries(tgi_llamacpp_offline_runner PUBLIC tgi_llamacpp_backend_impl llama common spdlog::spdlog)`
misc(cmake): add parameter to build specific cuda arch 2024-10-18 09:10:22 -06:00			`endif ()`
feat(llamacpp): initial end2end build 2024-10-04 02:42:31 -06:00