diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index d89f0982..8304c8d1 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -35,6 +35,12 @@ jobs: username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }} password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }} registry: registry.internal.huggingface.tech + - name: Login to Azure Container Registry + uses: docker/login-action@v2.1.0 + with: + username: ${{ secrets.AZURE_DOCKER_USERNAME }} + password: ${{ secrets.AZURE_DOCKER_PASSWORD }} + registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v4.3.0 @@ -44,6 +50,7 @@ jobs: images: | ghcr.io/huggingface/text-generation-inference registry.internal.huggingface.tech/api-inference/community/text-generation-inference + db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference tags: | type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} diff --git a/aml/README.md b/aml/README.md index 959e2942..8e78b0ab 100644 --- a/aml/README.md +++ b/aml/README.md @@ -1,8 +1,15 @@ -```shell -docker build . -t db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation:0.1 -docker push db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation:0.1 +# Azure ML endpoint +## Create all resources + +```shell az ml model create -f model.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace az ml online-endpoint create -f endpoint.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace az ml online-deployment create -f deployment.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace +``` + +## Update deployment + +```shell +az ml online-deployment update -f deployment.yaml -g HuggingFace-BLOOM-ModelPage -w HuggingFace ``` \ No newline at end of file diff --git a/aml/deployment.yaml b/aml/deployment.yaml index 16ef3dc7..320eba24 100644 --- a/aml/deployment.yaml +++ b/aml/deployment.yaml @@ -1,14 +1,14 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json name: bloom-deployment endpoint_name: bloom-inference -model: azureml:bloom:1 +model: azureml:bloom-safetensors:1 model_mount_path: /var/azureml-model environment_variables: - HUGGINGFACE_HUB_CACHE: /var/azureml-model/bloom + WEIGHTS_CACHE_OVERRIDE: /var/azureml-model/bloom-safetensors MODEL_ID: bigscience/bloom NUM_SHARD: 8 environment: - image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1 + image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2.0 inference_config: liveness_route: port: 80 diff --git a/aml/model.yaml b/aml/model.yaml index bd490f1a..bfcdd33f 100644 --- a/aml/model.yaml +++ b/aml/model.yaml @@ -1,5 +1,3 @@ $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json name: bloom-safetensors -version: 1 -path: ./bloom-safetensors -type: custom_model +path: /data/bloom-safetensors