fix(server): Fix Transformers fork version
This commit is contained in:
parent
4236e41b0d
commit
fa43fb71be
|
@ -1,2 +1,3 @@
|
||||||
aml
|
aml
|
||||||
target
|
target
|
||||||
|
server/transformers
|
|
@ -2,6 +2,7 @@ FROM rust:1.64 as router-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
|
COPY rust-toolchain.toml rust-toolchain.toml
|
||||||
COPY proto proto
|
COPY proto proto
|
||||||
COPY router router
|
COPY router router
|
||||||
|
|
||||||
|
@ -13,6 +14,7 @@ FROM rust:1.64 as launcher-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
|
COPY rust-toolchain.toml rust-toolchain.toml
|
||||||
COPY launcher launcher
|
COPY launcher launcher
|
||||||
|
|
||||||
WORKDIR /usr/src/launcher
|
WORKDIR /usr/src/launcher
|
||||||
|
|
|
@ -8,7 +8,7 @@ environment_variables:
|
||||||
MODEL_NAME: bigscience/bloom
|
MODEL_NAME: bigscience/bloom
|
||||||
NUM_GPUS: 8
|
NUM_GPUS: 8
|
||||||
environment:
|
environment:
|
||||||
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2
|
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
|
||||||
inference_config:
|
inference_config:
|
||||||
liveness_route:
|
liveness_route:
|
||||||
port: 3000
|
port: 3000
|
||||||
|
@ -25,14 +25,14 @@ request_settings:
|
||||||
max_concurrent_requests_per_instance: 256
|
max_concurrent_requests_per_instance: 256
|
||||||
liveness_probe:
|
liveness_probe:
|
||||||
initial_delay: 600
|
initial_delay: 600
|
||||||
timeout: 20
|
timeout: 90
|
||||||
period: 120
|
period: 120
|
||||||
success_threshold: 1
|
success_threshold: 1
|
||||||
failure_threshold: 3
|
failure_threshold: 5
|
||||||
readiness_probe:
|
readiness_probe:
|
||||||
initial_delay: 600
|
initial_delay: 600
|
||||||
timeout: 20
|
timeout: 90
|
||||||
period: 120
|
period: 120
|
||||||
success_threshold: 1
|
success_threshold: 1
|
||||||
failure_threshold: 3
|
failure_threshold: 5
|
||||||
instance_count: 1
|
instance_count: 1
|
||||||
|
|
|
@ -7,13 +7,13 @@ gen-server:
|
||||||
touch text_generation/pb/__init__.py
|
touch text_generation/pb/__init__.py
|
||||||
|
|
||||||
install-transformers:
|
install-transformers:
|
||||||
# Install specific version of transformers
|
# Install specific version of transformers with custom cuda kernels
|
||||||
rm transformers || true
|
rm transformers || true
|
||||||
rm transformers-7302a24535e8dc5637ea5b4e4572fc971d404098 || true
|
rm transformers-b55f16c5b71aeef47a66a4270e19c154f050a7a7 || true
|
||||||
curl -L -O https://github.com/OlivierDehaene/transformers/archive/7302a24535e8dc5637ea5b4e4572fc971d404098.zip
|
curl -L -O https://github.com/OlivierDehaene/transformers/archive/b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
|
||||||
unzip 7302a24535e8dc5637ea5b4e4572fc971d404098.zip
|
unzip b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
|
||||||
rm 7302a24535e8dc5637ea5b4e4572fc971d404098.zip
|
rm b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
|
||||||
mv transformers-7302a24535e8dc5637ea5b4e4572fc971d404098 transformers
|
mv transformers-b55f16c5b71aeef47a66a4270e19c154f050a7a7 transformers
|
||||||
cd transformers && python setup.py install
|
cd transformers && python setup.py install
|
||||||
|
|
||||||
install-torch:
|
install-torch:
|
||||||
|
|
|
@ -38,7 +38,7 @@ class BLOOMSharded(CausalLM):
|
||||||
self.master = self.rank == 0
|
self.master = self.rank == 0
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
device = torch.device(f"cuda:{self.rank}")
|
device = torch.device(f"cuda:{self.rank}")
|
||||||
dtype = torch.float16
|
dtype = torch.bfloat16
|
||||||
else:
|
else:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32
|
||||||
|
|
Loading…
Reference in New Issue