re-enable xpu
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
0b6c5fea7c
commit
bd10f790ed
|
@ -21,8 +21,10 @@ from transformers.activations import ACT2FN
|
|||
from transformers.configuration_utils import PretrainedConfig
|
||||
from typing import Optional, List, Tuple, Any
|
||||
from loguru import logger
|
||||
from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
|
||||
|
||||
from vllm.model_executor.layers.fused_moe import fused_moe
|
||||
if not IS_XPU_SYSTEM:
|
||||
from vllm.model_executor.layers.fused_moe import fused_moe
|
||||
from text_generation_server.utils import paged_attention, flash_attn
|
||||
from text_generation_server.utils.layers import (
|
||||
FastLinear,
|
||||
|
|
|
@ -24,7 +24,10 @@ import torch.distributed
|
|||
import numpy as np
|
||||
|
||||
from torch import nn
|
||||
from vllm.model_executor.layers.fused_moe import fused_moe
|
||||
from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
|
||||
|
||||
if not IS_XPU_SYSTEM:
|
||||
from vllm.model_executor.layers.fused_moe import fused_moe
|
||||
from transformers.activations import ACT2FN
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from typing import Optional, List, Tuple
|
||||
|
|
|
@ -11,4 +11,4 @@ if cuda_graphs is not None:
|
|||
raise RuntimeError(
|
||||
f"Could not parse cuda graphs {cuda_graphs}, expected comma separated list for batch sizes to run on: {e}"
|
||||
)
|
||||
CUDA_GRAPHS = cuda_graphs
|
||||
CUDA_GRAPHS = cuda_graphs if torch.cuda.is_available() else None
|
||||
|
|
Loading…
Reference in New Issue