fix(server): Fix Transformers fork version

This commit is contained in:
OlivierDehaene 2022-11-08 17:42:38 +01:00
parent 4236e41b0d
commit fa43fb71be
5 changed files with 16 additions and 13 deletions

View File

@ -1,2 +1,3 @@
aml aml
target target
server/transformers

View File

@ -2,6 +2,7 @@ FROM rust:1.64 as router-builder
WORKDIR /usr/src WORKDIR /usr/src
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto COPY proto proto
COPY router router COPY router router
@ -13,6 +14,7 @@ FROM rust:1.64 as launcher-builder
WORKDIR /usr/src WORKDIR /usr/src
COPY rust-toolchain.toml rust-toolchain.toml
COPY launcher launcher COPY launcher launcher
WORKDIR /usr/src/launcher WORKDIR /usr/src/launcher

View File

@ -8,7 +8,7 @@ environment_variables:
MODEL_NAME: bigscience/bloom MODEL_NAME: bigscience/bloom
NUM_GPUS: 8 NUM_GPUS: 8
environment: environment:
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2 image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
inference_config: inference_config:
liveness_route: liveness_route:
port: 3000 port: 3000
@ -25,14 +25,14 @@ request_settings:
max_concurrent_requests_per_instance: 256 max_concurrent_requests_per_instance: 256
liveness_probe: liveness_probe:
initial_delay: 600 initial_delay: 600
timeout: 20 timeout: 90
period: 120 period: 120
success_threshold: 1 success_threshold: 1
failure_threshold: 3 failure_threshold: 5
readiness_probe: readiness_probe:
initial_delay: 600 initial_delay: 600
timeout: 20 timeout: 90
period: 120 period: 120
success_threshold: 1 success_threshold: 1
failure_threshold: 3 failure_threshold: 5
instance_count: 1 instance_count: 1

View File

@ -7,13 +7,13 @@ gen-server:
touch text_generation/pb/__init__.py touch text_generation/pb/__init__.py
install-transformers: install-transformers:
# Install specific version of transformers # Install specific version of transformers with custom cuda kernels
rm transformers || true rm transformers || true
rm transformers-7302a24535e8dc5637ea5b4e4572fc971d404098 || true rm transformers-b55f16c5b71aeef47a66a4270e19c154f050a7a7 || true
curl -L -O https://github.com/OlivierDehaene/transformers/archive/7302a24535e8dc5637ea5b4e4572fc971d404098.zip curl -L -O https://github.com/OlivierDehaene/transformers/archive/b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
unzip 7302a24535e8dc5637ea5b4e4572fc971d404098.zip unzip b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
rm 7302a24535e8dc5637ea5b4e4572fc971d404098.zip rm b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
mv transformers-7302a24535e8dc5637ea5b4e4572fc971d404098 transformers mv transformers-b55f16c5b71aeef47a66a4270e19c154f050a7a7 transformers
cd transformers && python setup.py install cd transformers && python setup.py install
install-torch: install-torch:

View File

@ -38,7 +38,7 @@ class BLOOMSharded(CausalLM):
self.master = self.rank == 0 self.master = self.rank == 0
if torch.cuda.is_available(): if torch.cuda.is_available():
device = torch.device(f"cuda:{self.rank}") device = torch.device(f"cuda:{self.rank}")
dtype = torch.float16 dtype = torch.bfloat16
else: else:
device = torch.device("cpu") device = torch.device("cpu")
dtype = torch.float32 dtype = torch.float32