From edc9ce9bebcf6fd9267c8e9faabf1ab494702e3c Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 15 May 2023 15:21:49 +0200 Subject: [PATCH] Cleanup. --- .../models/custom_modeling/flash_llama_modeling.py | 2 -- .../models/custom_modeling/flash_neox_modeling.py | 2 -- server/text_generation_server/utils/layers.py | 1 + 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index b078ff20..11f3766e 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -28,9 +28,7 @@ from transformers.activations import ACT2FN from typing import Optional # Flash attention imports -import rotary_emb import flash_attn_cuda -import dropout_layer_norm from flash_attn.layers.rotary import RotaryEmbedding from text_generation_server.utils.layers import ( diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py index fdeb4084..369e8d4f 100644 --- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py @@ -30,9 +30,7 @@ from transformers.models.gpt_neox import GPTNeoXConfig from typing import Optional # Flash attention imports -import rotary_emb import flash_attn_cuda -import dropout_layer_norm from flash_attn.layers.rotary import RotaryEmbedding from text_generation_server.utils.layers import ( diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index cbaf6d00..4c89e54e 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -1,6 +1,7 @@ import torch from torch import nn +import dropout_layer_norm HAS_BITS_AND_BYTES = True try: