hf_text-generation-inference/server/Makefile-awq

# Fork that adds only the correct stream to this kernel in order
# to make cuda graphs work.
awq_commit := bd1dc2d5254345cc76ab71894651fb821275bdd4

awq:
	rm -rf llm-awq
	git clone https://github.com/huggingface/llm-awq

build-awq: awq
	cd llm-awq/ && git fetch && git checkout $(awq_commit)
	cd llm-awq/awq/kernels && python setup.py build

install-awq: build-awq
	pip uninstall awq_inference_engine -y || true
	cd llm-awq/awq/kernels && python setup.py install