From 5ba53d44a18983a4de32d122f4cb46f4a17d9ef6 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 29 Sep 2023 11:19:06 +0200 Subject: [PATCH] Fixing eetq dockerfile. (#1081) # What does this PR do? Fixes #1079 Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. --- Dockerfile | 9 +++++++++ server/text_generation_server/models/__init__.py | 2 ++ 2 files changed, 11 insertions(+) diff --git a/Dockerfile b/Dockerfile index 56f4775b..9c15f023 100644 --- a/Dockerfile +++ b/Dockerfile @@ -123,6 +123,13 @@ COPY server/Makefile-awq Makefile # Build specific version of transformers RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq +# Build eetq kernels +FROM kernel-builder as eetq-kernels-builder +WORKDIR /usr/src +COPY server/Makefile-eetq Makefile +# Build specific version of transformers +RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-eetq + # Build Transformers CUDA kernels FROM kernel-builder as custom-kernels-builder WORKDIR /usr/src @@ -178,6 +185,8 @@ COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /o COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages # Copy build artifacts from awq kernels builder COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages +# Copy build artifacts from eetq kernels builder +COPY --from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages # Copy builds artifacts from vllm builder COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index dca3612f..5b1b5715 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -297,6 +297,8 @@ def get_model( raise ValueError("awq quantization is not supported for AutoModel") elif (quantize == "bitsandbytes-fp4") or (quantize == "bitsandbytes-nf4"): raise ValueError("4bit quantization is not supported for AutoModel") + elif (quantize == "eetq"): + raise ValueError("Eetq quantization is not supported for AutoModel") if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES: return CausalLM( model_id,