2023-12-11 06:43:40 -07:00
|
|
|
import torch
|
|
|
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
from text_generation_server.models.flash_mistral import BaseFlashMistral
|
2023-12-11 06:49:52 -07:00
|
|
|
from text_generation_server.models.custom_modeling.flash_mixtral_modeling import (
|
|
|
|
MixtralConfig,
|
|
|
|
FlashMixtralForCausalLM,
|
|
|
|
)
|
2023-12-11 06:43:40 -07:00
|
|
|
|
|
|
|
|
|
|
|
class FlashMixtral(BaseFlashMistral):
|
|
|
|
def __init__(
|
2023-12-11 06:49:52 -07:00
|
|
|
self,
|
|
|
|
model_id: str,
|
|
|
|
revision: Optional[str] = None,
|
|
|
|
quantize: Optional[str] = None,
|
2024-05-14 04:33:18 -06:00
|
|
|
speculator: Optional[str] = None,
|
2023-12-11 06:49:52 -07:00
|
|
|
dtype: Optional[torch.dtype] = None,
|
|
|
|
trust_remote_code: bool = False,
|
2023-12-11 06:43:40 -07:00
|
|
|
):
|
|
|
|
super(FlashMixtral, self).__init__(
|
|
|
|
config_cls=MixtralConfig,
|
|
|
|
model_cls=FlashMixtralForCausalLM,
|
|
|
|
model_id=model_id,
|
|
|
|
revision=revision,
|
|
|
|
quantize=quantize,
|
2024-05-14 04:33:18 -06:00
|
|
|
speculator=speculator,
|
2023-12-11 06:43:40 -07:00
|
|
|
dtype=dtype,
|
2023-12-11 06:49:52 -07:00
|
|
|
trust_remote_code=trust_remote_code,
|
2023-12-11 06:43:40 -07:00
|
|
|
)
|