hf_text-generation-inference/clients/python/text_generation/inference_api.py

import os
import requests

from typing import Dict, Optional, List
from huggingface_hub.utils import build_hf_headers

from text_generation import Client, AsyncClient, __version__
from text_generation.types import DeployedModel
from text_generation.errors import NotSupportedError, parse_error

INFERENCE_ENDPOINT = os.environ.get(
    "HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"
)


def deployed_models(headers: Optional[Dict] = None) -> List[DeployedModel]:
    """
    Get all currently deployed models with text-generation-inference-support

    Returns:
        List[DeployedModel]: list of all currently deployed models
    """
    resp = requests.get(
        "https://api-inference.huggingface.co/framework/text-generation-inference",
        headers=headers,
        timeout=5,
    )

    payload = resp.json()
    if resp.status_code != 200:
        raise parse_error(resp.status_code, payload)

    models = [DeployedModel(**raw_deployed_model) for raw_deployed_model in payload]
    return models


def check_model_support(repo_id: str, headers: Optional[Dict] = None) -> bool:
    """
    Check if a given model is supported by text-generation-inference

    Returns:
        bool: whether the model is supported by this client
    """
    resp = requests.get(
        f"https://api-inference.huggingface.co/status/{repo_id}",
        headers=headers,
        timeout=5,
    )

    payload = resp.json()
    if resp.status_code != 200:
        raise parse_error(resp.status_code, payload)

    framework = payload["framework"]
    supported = framework == "text-generation-inference"
    return supported


class InferenceAPIClient(Client):
    """Client to make calls to the HuggingFace Inference API.

     Only supports a subset of the available text-generation or text2text-generation models that are served using
     text-generation-inference

     Example:

     ```python
     >>> from text_generation import InferenceAPIClient

     >>> client = InferenceAPIClient("bigscience/bloomz")
     >>> client.generate("Why is the sky blue?").generated_text
     ' Rayleigh scattering'

     >>> result = ""
     >>> for response in client.generate_stream("Why is the sky blue?"):
     >>>     if not response.token.special:
     >>>         result += response.token.text
     >>> result
    ' Rayleigh scattering'
     ```
    """

    def __init__(self, repo_id: str, token: Optional[str] = None, timeout: int = 10):
        """
        Init headers and API information

        Args:
            repo_id (`str`):
                Id of repository (e.g. `bigscience/bloom`).
            token (`str`, `optional`):
                The API token to use as HTTP bearer authorization. This is not
                the authentication token. You can find the token in
                https://huggingface.co/settings/token. Alternatively, you can
                find both your organizations and personal API tokens using
                `HfApi().whoami(token)`.
            timeout (`int`):
                Timeout in seconds
        """

        headers = build_hf_headers(
            token=token, library_name="text-generation", library_version=__version__
        )

        # Text Generation Inference client only supports a subset of the available hub models
        if not check_model_support(repo_id, headers):
            raise NotSupportedError(repo_id)

        base_url = f"{INFERENCE_ENDPOINT}/models/{repo_id}"

        super(InferenceAPIClient, self).__init__(
            base_url, headers=headers, timeout=timeout
        )


class InferenceAPIAsyncClient(AsyncClient):
    """Aynschronous Client to make calls to the HuggingFace Inference API.

     Only supports a subset of the available text-generation or text2text-generation models that are served using
     text-generation-inference

     Example:

     ```python
     >>> from text_generation import InferenceAPIAsyncClient

     >>> client = InferenceAPIAsyncClient("bigscience/bloomz")
     >>> response = await client.generate("Why is the sky blue?")
     >>> response.generated_text
     ' Rayleigh scattering'

     >>> result = ""
     >>> async for response in client.generate_stream("Why is the sky blue?"):
     >>>     if not response.token.special:
     >>>         result += response.token.text
     >>> result
    ' Rayleigh scattering'
     ```
    """

    def __init__(self, repo_id: str, token: Optional[str] = None, timeout: int = 10):
        """
        Init headers and API information

        Args:
            repo_id (`str`):
                Id of repository (e.g. `bigscience/bloom`).
            token (`str`, `optional`):
                The API token to use as HTTP bearer authorization. This is not
                the authentication token. You can find the token in
                https://huggingface.co/settings/token. Alternatively, you can
                find both your organizations and personal API tokens using
                `HfApi().whoami(token)`.
            timeout (`int`):
                Timeout in seconds
        """
        headers = build_hf_headers(
            token=token, library_name="text-generation", library_version=__version__
        )

        # Text Generation Inference client only supports a subset of the available hub models
        if not check_model_support(repo_id, headers):
            raise NotSupportedError(repo_id)

        base_url = f"{INFERENCE_ENDPOINT}/models/{repo_id}"

        super(InferenceAPIAsyncClient, self).__init__(
            base_url, headers=headers, timeout=timeout
        )
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`import os`
			`import requests`

fix(python-client): add auth headers to is supported requests (#234) 2023-04-25 05:55:26 -06:00			`from typing import Dict, Optional, List`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`from huggingface_hub.utils import build_hf_headers`

			`from text_generation import Client, AsyncClient, __version__`
feat(python-client): get list of currently deployed tgi models using the inference API (#191) 2023-04-17 10:43:24 -06:00			`from text_generation.types import DeployedModel`
			`from text_generation.errors import NotSupportedError, parse_error`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00
			`INFERENCE_ENDPOINT = os.environ.get(`
			`"HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"`
			`)`


fix(python-client): add auth headers to is supported requests (#234) 2023-04-25 05:55:26 -06:00			`def deployed_models(headers: Optional[Dict] = None) -> List[DeployedModel]:`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`"""`
feat(python-client): get list of currently deployed tgi models using the inference API (#191) 2023-04-17 10:43:24 -06:00			`Get all currently deployed models with text-generation-inference-support`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00
			`Returns:`
feat(python-client): get list of currently deployed tgi models using the inference API (#191) 2023-04-17 10:43:24 -06:00			`List[DeployedModel]: list of all currently deployed models`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`"""`
feat(python-client): get list of currently deployed tgi models using the inference API (#191) 2023-04-17 10:43:24 -06:00			`resp = requests.get(`
feat: add ruff and resolve issue (#2262) * feat: add ruff and resolve issue * fix: update client exports and adjust after rebase * fix: adjust syntax to avoid circular import * fix: adjust client ruff settings * fix: lint and refactor import check and avoid model enum as global names * fix: improve fbgemm_gpu check and lints * fix: update lints * fix: prefer comparing model enum over str * fix: adjust lints and ignore specific rules * fix: avoid unneeded quantize check 2024-07-26 08:29:09 -06:00			`"https://api-inference.huggingface.co/framework/text-generation-inference",`
fix(python-client): add auth headers to is supported requests (#234) 2023-04-25 05:55:26 -06:00			`headers=headers,`
feat(python-client): get list of currently deployed tgi models using the inference API (#191) 2023-04-17 10:43:24 -06:00			`timeout=5,`
			`)`

			`payload = resp.json()`
			`if resp.status_code != 200:`
			`raise parse_error(resp.status_code, payload)`

			`models = [DeployedModel(**raw_deployed_model) for raw_deployed_model in payload]`
			`return models`

feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00
fix(python-client): add auth headers to is supported requests (#234) 2023-04-25 05:55:26 -06:00			`def check_model_support(repo_id: str, headers: Optional[Dict] = None) -> bool:`
feat(python-client): get list of currently deployed tgi models using the inference API (#191) 2023-04-17 10:43:24 -06:00			`"""`
			`Check if a given model is supported by text-generation-inference`

			`Returns:`
			`bool: whether the model is supported by this client`
			`"""`
			`resp = requests.get(`
			`f"https://api-inference.huggingface.co/status/{repo_id}",`
fix(python-client): add auth headers to is supported requests (#234) 2023-04-25 05:55:26 -06:00			`headers=headers,`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`timeout=5,`
			`)`

feat(python-client): get list of currently deployed tgi models using the inference API (#191) 2023-04-17 10:43:24 -06:00			`payload = resp.json()`
			`if resp.status_code != 200:`
			`raise parse_error(resp.status_code, payload)`

			`framework = payload["framework"]`
			`supported = framework == "text-generation-inference"`
			`return supported`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00

			`class InferenceAPIClient(Client):`
			`"""Client to make calls to the HuggingFace Inference API.`

			`Only supports a subset of the available text-generation or text2text-generation models that are served using`
			`text-generation-inference`

			`Example:`

			```python
			`>>> from text_generation import InferenceAPIClient`

			`>>> client = InferenceAPIClient("bigscience/bloomz")`
			`>>> client.generate("Why is the sky blue?").generated_text`
			`' Rayleigh scattering'`

			`>>> result = ""`
			`>>> for response in client.generate_stream("Why is the sky blue?"):`
			`>>> if not response.token.special:`
			`>>> result += response.token.text`
			`>>> result`
			`' Rayleigh scattering'`
			```
			`"""`

			`def __init__(self, repo_id: str, token: Optional[str] = None, timeout: int = 10):`
			`"""`
			`Init headers and API information`

			`Args:`
			repo_id (`str`):
			Id of repository (e.g. `bigscience/bloom`).
			token (`str`, `optional`):
			`The API token to use as HTTP bearer authorization. This is not`
			`the authentication token. You can find the token in`
			`https://huggingface.co/settings/token. Alternatively, you can`
			`find both your organizations and personal API tokens using`
			`HfApi().whoami(token)`.
			timeout (`int`):
			`Timeout in seconds`
			`"""`

			`headers = build_hf_headers(`
			`token=token, library_name="text-generation", library_version=__version__`
			`)`
fix(python-client): add auth headers to is supported requests (#234) 2023-04-25 05:55:26 -06:00
			`# Text Generation Inference client only supports a subset of the available hub models`
			`if not check_model_support(repo_id, headers):`
			`raise NotSupportedError(repo_id)`

feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`base_url = f"{INFERENCE_ENDPOINT}/models/{repo_id}"`

feat(python-client): add cookies to Client constructors and requests (#132) I have a use case where we need to pass cookies (for auth reasons) to an internally hosted server. Note: I couldn't get the client tests to pass - do you need to have an HF token? ```python FAILED tests/test_client.py::test_generate - text_generation.errors.BadRequestError: Authorization header is correct, but the token seems invalid ``` 2023-03-23 11:01:01 -06:00			`super(InferenceAPIClient, self).__init__(`
			`base_url, headers=headers, timeout=timeout`
			`)`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00

			`class InferenceAPIAsyncClient(AsyncClient):`
			`"""Aynschronous Client to make calls to the HuggingFace Inference API.`

			`Only supports a subset of the available text-generation or text2text-generation models that are served using`
			`text-generation-inference`

			`Example:`

			```python
			`>>> from text_generation import InferenceAPIAsyncClient`

			`>>> client = InferenceAPIAsyncClient("bigscience/bloomz")`
			`>>> response = await client.generate("Why is the sky blue?")`
			`>>> response.generated_text`
			`' Rayleigh scattering'`

			`>>> result = ""`
			`>>> async for response in client.generate_stream("Why is the sky blue?"):`
			`>>> if not response.token.special:`
			`>>> result += response.token.text`
			`>>> result`
			`' Rayleigh scattering'`
			```
			`"""`

			`def __init__(self, repo_id: str, token: Optional[str] = None, timeout: int = 10):`
			`"""`
			`Init headers and API information`

			`Args:`
			repo_id (`str`):
			Id of repository (e.g. `bigscience/bloom`).
			token (`str`, `optional`):
			`The API token to use as HTTP bearer authorization. This is not`
			`the authentication token. You can find the token in`
			`https://huggingface.co/settings/token. Alternatively, you can`
			`find both your organizations and personal API tokens using`
			`HfApi().whoami(token)`.
			timeout (`int`):
			`Timeout in seconds`
			`"""`
fix(python-client): add auth headers to is supported requests (#234) 2023-04-25 05:55:26 -06:00			`headers = build_hf_headers(`
			`token=token, library_name="text-generation", library_version=__version__`
			`)`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00
			`# Text Generation Inference client only supports a subset of the available hub models`
fix(python-client): add auth headers to is supported requests (#234) 2023-04-25 05:55:26 -06:00			`if not check_model_support(repo_id, headers):`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`raise NotSupportedError(repo_id)`

			`base_url = f"{INFERENCE_ENDPOINT}/models/{repo_id}"`

feat(python-client): add cookies to Client constructors and requests (#132) I have a use case where we need to pass cookies (for auth reasons) to an internally hosted server. Note: I couldn't get the client tests to pass - do you need to have an HF token? ```python FAILED tests/test_client.py::test_generate - text_generation.errors.BadRequestError: Authorization header is correct, but the token seems invalid ``` 2023-03-23 11:01:01 -06:00			`super(InferenceAPIAsyncClient, self).__init__(`
			`base_url, headers=headers, timeout=timeout`
			`)`