hf_text-generation-inference/server/Makefile-fbgemm

fbgemm_commit := v0.8.0

build-fbgemm:
	@if [ ! -d "fbgemm" ]; then \
		git clone https://github.com/pytorch/FBGEMM.git fbgemm; \
	fi
	cd fbgemm && git fetch && git checkout $(fbgemm_commit)  && \
	git submodule update --init --recursive && \
	cd fbgemm_gpu && \
	pip install -r requirements.txt && \
	CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py --package_variant genai build

install-fbgemm: build-fbgemm
	cd fbgemm/fbgemm_gpu &&  \
	CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py --package_variant genai install
Upgrade fbgemm (#2398) * Upgrade fbgemm * Fix fbgemm version 2024-08-12 06:08:38 -06:00			`fbgemm_commit := v0.8.0`
feat(fp8): use fbgemm kernels and load fp8 weights directly (#2248) * feat(fp8): add support for fbgemm * allow loading fp8 weights directly * update outlines * fix makefile * build fbgemm * avoid circular import and fix dockerfile * add default dtype * refactored weights loader * fix auto conversion * fix quantization config parsing * force new nccl on install * missing get_weights implementation * increase timeout 2024-07-20 11:02:04 -06:00
			`build-fbgemm:`
Add Directory Check to Prevent Redundant Cloning in Build Process (#2486) Update Makefile-fbgemm Added Directory check for FBGEMM repository cloning. 2024-09-07 05:19:43 -06:00			`@if [ ! -d "fbgemm" ]; then \`
			`git clone https://github.com/pytorch/FBGEMM.git fbgemm; \`
			`fi`
chore: update to torch 2.4 (#2259) * chore: update to torch 2.4 * remove un-necessary patch * fix 2024-07-23 14:39:43 -06:00			`cd fbgemm && git fetch && git checkout $(fbgemm_commit) && \`
feat(fp8): use fbgemm kernels and load fp8 weights directly (#2248) * feat(fp8): add support for fbgemm * allow loading fp8 weights directly * update outlines * fix makefile * build fbgemm * avoid circular import and fix dockerfile * add default dtype * refactored weights loader * fix auto conversion * fix quantization config parsing * force new nccl on install * missing get_weights implementation * increase timeout 2024-07-20 11:02:04 -06:00			`git submodule update --init --recursive && \`
			`cd fbgemm_gpu && \`
			`pip install -r requirements.txt && \`
			`CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py --package_variant genai build`

			`install-fbgemm: build-fbgemm`
			`cd fbgemm/fbgemm_gpu && \`
			`CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py --package_variant genai install`