2024-10-18 09:10:22 -06:00
|
|
|
cmake_minimum_required(VERSION 3.24)
|
2024-10-03 06:00:17 -06:00
|
|
|
|
|
|
|
project(tgi-llama-cpp-backend VERSION 1.0.0)
|
2024-10-18 09:10:22 -06:00
|
|
|
set(CMAKE_CXX_STANDARD 23)
|
2024-10-03 06:00:17 -06:00
|
|
|
|
|
|
|
include(FetchContent)
|
|
|
|
|
2024-10-18 09:10:22 -06:00
|
|
|
set(LLAMA_CPP_TARGET_VERSION "b3837" CACHE STRING "Version of llama.cpp to build against")
|
|
|
|
set(LLAMA_CPP_TARGET_CUDA_ARCHS "75-real;80-real;86-real;89-real;90-real" CACHE STRING "CUDA arch(s) to build")
|
2024-10-04 02:42:31 -06:00
|
|
|
option(LLAMA_CPP_BUILD_OFFLINE_RUNNER "Flag to build the standalone c++ backend runner")
|
|
|
|
option(LLAMA_CPP_BUILD_CUDA "Flag to build CUDA enabled inference through llama.cpp")
|
2024-10-03 06:00:17 -06:00
|
|
|
|
2024-10-24 08:42:50 -06:00
|
|
|
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" AND ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
2024-10-24 01:56:40 -06:00
|
|
|
message(STATUS "Targeting libc++")
|
|
|
|
set(CMAKE_CXX_FLAGS -stdlib=libc++ ${CMAKE_CXX_FLAGS})
|
2024-10-24 08:42:50 -06:00
|
|
|
else ()
|
2024-10-24 01:56:40 -06:00
|
|
|
message(STATUS "Not using libc++ ${CMAKE_CXX_COMPILER_ID} ${CMAKE_SYSTEM_NAME}")
|
2024-10-24 08:42:50 -06:00
|
|
|
endif ()
|
2024-10-24 01:56:40 -06:00
|
|
|
|
2024-10-03 06:00:17 -06:00
|
|
|
# Add dependencies
|
2024-11-20 16:03:05 -07:00
|
|
|
include(cmake/numa.cmake)
|
2024-10-03 06:00:17 -06:00
|
|
|
include(cmake/spdlog.cmake)
|
|
|
|
|
2024-10-18 09:10:22 -06:00
|
|
|
if (${LLAMA_CPP_BUILD_CUDA})
|
2024-10-04 02:42:31 -06:00
|
|
|
message(STATUS "Enabling llama.cpp CUDA support")
|
2024-10-18 09:10:22 -06:00
|
|
|
|
|
|
|
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
|
|
set(CMAKE_CUDA_ARCHITECTURES ${LLAMA_CPP_TARGET_CUDA_ARCHS})
|
|
|
|
endif ()
|
2024-10-04 02:42:31 -06:00
|
|
|
set(GGML_CUDA ON)
|
2024-10-18 09:10:22 -06:00
|
|
|
endif ()
|
2024-10-04 02:42:31 -06:00
|
|
|
|
2024-10-03 06:00:17 -06:00
|
|
|
# Download llama.cpp repo at the specific version
|
|
|
|
fetchcontent_declare(
|
2024-10-18 09:10:22 -06:00
|
|
|
llama
|
2024-11-14 01:04:06 -07:00
|
|
|
URL https://github.com/ggerganov/llama.cpp/archive/refs/tags/b4077.tar.gz
|
2024-10-03 06:00:17 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
fetchcontent_makeavailable(llama)
|
|
|
|
|
2024-10-24 08:42:50 -06:00
|
|
|
add_library(tgi_llamacpp_backend_impl STATIC csrc/backend.hpp csrc/backend.cpp)
|
|
|
|
target_compile_features(tgi_llamacpp_backend_impl PRIVATE cxx_std_11)
|
2024-11-14 01:38:13 -07:00
|
|
|
target_link_libraries(tgi_llamacpp_backend_impl PUBLIC spdlog::spdlog llama)
|
2024-11-20 16:03:05 -07:00
|
|
|
|
|
|
|
if (NUMA_FOUND)
|
|
|
|
target_link_libraries(tgi_llamacpp_backend_impl PUBLIC numa)
|
|
|
|
endif ()
|
|
|
|
|
2024-11-14 01:38:13 -07:00
|
|
|
install(TARGETS tgi_llamacpp_backend_impl spdlog llama)
|
2024-10-04 02:42:31 -06:00
|
|
|
|
2024-10-25 00:02:45 -06:00
|
|
|
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
|
|
|
target_compile_definitions(tgi_llamacpp_backend_impl PRIVATE TGI_LLAMACPP_BACKEND_DEBUG=1)
|
|
|
|
endif ()
|
|
|
|
|
2024-10-18 09:10:22 -06:00
|
|
|
if (${LLAMA_CPP_BUILD_OFFLINE_RUNNER})
|
2024-10-04 02:42:31 -06:00
|
|
|
message(STATUS "Building llama.cpp offline runner")
|
2024-10-25 00:02:45 -06:00
|
|
|
add_executable(tgi_llamacpp_offline_runner offline/main.cpp)
|
2024-10-22 07:23:16 -06:00
|
|
|
|
2024-11-14 01:38:13 -07:00
|
|
|
target_link_libraries(tgi_llamacpp_offline_runner PUBLIC tgi_llamacpp_backend_impl llama spdlog::spdlog)
|
2024-10-18 09:10:22 -06:00
|
|
|
endif ()
|
2024-10-04 02:42:31 -06:00
|
|
|
|
|
|
|
|