Fix AWS Sagemaker indentation, typo and header level
This commit is contained in:
parent
2358c2bb54
commit
47c01cb048
|
@ -139,11 +139,11 @@ for message in chat_completion:
|
||||||
|
|
||||||
TGI can be deployed on various cloud providers for scalable and robust text generation. One such provider is Amazon SageMaker, which has recently added support for TGI. Here's how you can deploy TGI on Amazon SageMaker:
|
TGI can be deployed on various cloud providers for scalable and robust text generation. One such provider is Amazon SageMaker, which has recently added support for TGI. Here's how you can deploy TGI on Amazon SageMaker:
|
||||||
|
|
||||||
## Amazon SageMaker
|
### Amazon SageMaker
|
||||||
|
|
||||||
To enable the Messages API in Amazon SageMaker you need to set the environment variable `MESSAGES_API_ENABLED=true`.
|
To enable the Messages API in Amazon SageMaker you need to set the environment variable `MESSAGES_API_ENABLED=true`.
|
||||||
|
|
||||||
This will modify the `/invocations` route to accept Messages dictonaries consisting out of role and content. See the example below on how to deploy Llama with the new Messages API.
|
This will modify the `/invocations` route to accept Messages dictionaries consisting out of role and content. See the example below on how to deploy Llama with the new Messages API.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import json
|
import json
|
||||||
|
@ -152,35 +152,35 @@ import boto3
|
||||||
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
|
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
|
||||||
|
|
||||||
try:
|
try:
|
||||||
role = sagemaker.get_execution_role()
|
role = sagemaker.get_execution_role()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
iam = boto3.client('iam')
|
iam = boto3.client('iam')
|
||||||
role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
|
role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
|
||||||
|
|
||||||
# Hub Model configuration. https://huggingface.co/models
|
# Hub Model configuration. https://huggingface.co/models
|
||||||
hub = {
|
hub = {
|
||||||
'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta',
|
'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta',
|
||||||
'SM_NUM_GPUS': json.dumps(1),
|
'SM_NUM_GPUS': json.dumps(1),
|
||||||
'MESSAGES_API_ENABLED': True
|
'MESSAGES_API_ENABLED': True
|
||||||
}
|
}
|
||||||
|
|
||||||
# create Hugging Face Model Class
|
# create Hugging Face Model Class
|
||||||
huggingface_model = HuggingFaceModel(
|
huggingface_model = HuggingFaceModel(
|
||||||
image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
|
image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
|
||||||
env=hub,
|
env=hub,
|
||||||
role=role,
|
role=role,
|
||||||
)
|
)
|
||||||
|
|
||||||
# deploy model to SageMaker Inference
|
# deploy model to SageMaker Inference
|
||||||
predictor = huggingface_model.deploy(
|
predictor = huggingface_model.deploy(
|
||||||
initial_instance_count=1,
|
initial_instance_count=1,
|
||||||
instance_type="ml.g5.2xlarge",
|
instance_type="ml.g5.2xlarge",
|
||||||
container_startup_health_check_timeout=300,
|
container_startup_health_check_timeout=300,
|
||||||
)
|
)
|
||||||
|
|
||||||
# send request
|
# send request
|
||||||
predictor.predict({
|
predictor.predict({
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "system", "content": "You are a helpful assistant." },
|
{"role": "system", "content": "You are a helpful assistant." },
|
||||||
{"role": "user", "content": "What is deep learning?"}
|
{"role": "user", "content": "What is deep learning?"}
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue