fix(server): Fix Transformers fork version
This commit is contained in:
parent
4236e41b0d
commit
fa43fb71be
|
@ -1,2 +1,3 @@
|
|||
aml
|
||||
target
|
||||
target
|
||||
server/transformers
|
|
@ -2,6 +2,7 @@ FROM rust:1.64 as router-builder
|
|||
|
||||
WORKDIR /usr/src
|
||||
|
||||
COPY rust-toolchain.toml rust-toolchain.toml
|
||||
COPY proto proto
|
||||
COPY router router
|
||||
|
||||
|
@ -13,6 +14,7 @@ FROM rust:1.64 as launcher-builder
|
|||
|
||||
WORKDIR /usr/src
|
||||
|
||||
COPY rust-toolchain.toml rust-toolchain.toml
|
||||
COPY launcher launcher
|
||||
|
||||
WORKDIR /usr/src/launcher
|
||||
|
|
|
@ -8,7 +8,7 @@ environment_variables:
|
|||
MODEL_NAME: bigscience/bloom
|
||||
NUM_GPUS: 8
|
||||
environment:
|
||||
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2
|
||||
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
|
||||
inference_config:
|
||||
liveness_route:
|
||||
port: 3000
|
||||
|
@ -25,14 +25,14 @@ request_settings:
|
|||
max_concurrent_requests_per_instance: 256
|
||||
liveness_probe:
|
||||
initial_delay: 600
|
||||
timeout: 20
|
||||
timeout: 90
|
||||
period: 120
|
||||
success_threshold: 1
|
||||
failure_threshold: 3
|
||||
failure_threshold: 5
|
||||
readiness_probe:
|
||||
initial_delay: 600
|
||||
timeout: 20
|
||||
timeout: 90
|
||||
period: 120
|
||||
success_threshold: 1
|
||||
failure_threshold: 3
|
||||
failure_threshold: 5
|
||||
instance_count: 1
|
||||
|
|
|
@ -7,13 +7,13 @@ gen-server:
|
|||
touch text_generation/pb/__init__.py
|
||||
|
||||
install-transformers:
|
||||
# Install specific version of transformers
|
||||
# Install specific version of transformers with custom cuda kernels
|
||||
rm transformers || true
|
||||
rm transformers-7302a24535e8dc5637ea5b4e4572fc971d404098 || true
|
||||
curl -L -O https://github.com/OlivierDehaene/transformers/archive/7302a24535e8dc5637ea5b4e4572fc971d404098.zip
|
||||
unzip 7302a24535e8dc5637ea5b4e4572fc971d404098.zip
|
||||
rm 7302a24535e8dc5637ea5b4e4572fc971d404098.zip
|
||||
mv transformers-7302a24535e8dc5637ea5b4e4572fc971d404098 transformers
|
||||
rm transformers-b55f16c5b71aeef47a66a4270e19c154f050a7a7 || true
|
||||
curl -L -O https://github.com/OlivierDehaene/transformers/archive/b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
|
||||
unzip b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
|
||||
rm b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
|
||||
mv transformers-b55f16c5b71aeef47a66a4270e19c154f050a7a7 transformers
|
||||
cd transformers && python setup.py install
|
||||
|
||||
install-torch:
|
||||
|
|
|
@ -38,7 +38,7 @@ class BLOOMSharded(CausalLM):
|
|||
self.master = self.rank == 0
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device(f"cuda:{self.rank}")
|
||||
dtype = torch.float16
|
||||
dtype = torch.bfloat16
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
dtype = torch.float32
|
||||
|
|
Loading…
Reference in New Issue