Dummy CI run. (#1817)

# What does this PR do?   Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR.
2024-04-26 19:19:55 +02:00 · 2024-04-26 19:19:55 +02:00 · e9f03f822a
parent 8b8e8f6632
commit e9f03f822a
7 changed files with 17 additions and 3 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -33,7 +33,12 @@ from text_generation_server.utils import StoppingCriteria, HeterogeneousNextToke
 from text_generation_server.utils.dist import MEMORY_FRACTION
 tracer = trace.get_tracer(__name__)
-from text_generation_server.utils.import_utils import IS_CUDA_SYSTEM, IS_ROCM_SYSTEM, IS_XPU_SYSTEM
+from text_generation_server.utils.import_utils import (
    IS_CUDA_SYSTEM,
    IS_ROCM_SYSTEM,
    IS_XPU_SYSTEM,
 )
@dataclass
 class FlashCausalLMBatch(Batch):
@ -788,7 +793,9 @@ class FlashCausalLM(Model):
        if IS_CUDA_SYSTEM or IS_ROCM_SYSTEM:
            total_free_memory, _ = torch.cuda.mem_get_info(self.device)
-            total_gpu_memory = torch.cuda.get_device_properties(self.device).total_memory
+            total_gpu_memory = torch.cuda.get_device_properties(
                self.device
            ).total_memory
            free_memory = max(
                0, total_free_memory - (1 - MEMORY_FRACTION) * total_gpu_memory
--- a/server/text_generation_server/models/flash_llama.py
+++ b/server/text_generation_server/models/flash_llama.py
@ -20,6 +20,7 @@ tracer = trace.get_tracer(__name__)
 from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
 class FlashLlama(FlashCausalLM):
    def __init__(
        self,
--- a/server/text_generation_server/models/flash_neox.py
+++ b/server/text_generation_server/models/flash_neox.py
@ -15,6 +15,7 @@ from text_generation_server.utils import (
    Weights,
 )
 from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
 tracer = trace.get_tracer(__name__)
--- a/server/text_generation_server/models/flash_rw.py
+++ b/server/text_generation_server/models/flash_rw.py
@ -16,6 +16,7 @@ from text_generation_server.utils import (
    Weights,
 )
 from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
 tracer = trace.get_tracer(__name__)
--- a/server/text_generation_server/models/flash_santacoder.py
+++ b/server/text_generation_server/models/flash_santacoder.py
@ -19,6 +19,7 @@ from text_generation_server.utils import (
 )
 from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
 tracer = trace.get_tracer(__name__)
--- a/server/text_generation_server/utils/import_utils.py
+++ b/server/text_generation_server/utils/import_utils.py
@ -1,5 +1,6 @@
 import torch
 def is_xpu_available():
    try:
        import intel_extension_for_pytorch
@ -8,6 +9,7 @@ def is_xpu_available():
    return hasattr(torch, "xpu") and torch.xpu.is_available()
 IS_ROCM_SYSTEM = torch.version.hip is not None
 IS_CUDA_SYSTEM = torch.version.cuda is not None
 IS_XPU_SYSTEM = is_xpu_available()
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@ -8,6 +8,7 @@ from typing import List, Tuple, Optional
 from loguru import logger
 from functools import lru_cache
 # Dummy comment.
 HAS_BITS_AND_BYTES = True
 try:
    import bitsandbytes as bnb