From b927244eb57d69fd92ecf36525f97e48f5b8b54f Mon Sep 17 00:00:00 2001
From: OlivierDehaene <olivier@huggingface.co>
Date: Mon, 17 Apr 2023 18:43:24 +0200
Subject: [PATCH] feat(python-client): get list of currently deployed tgi
 models using the inference API (#191)

---
 README.md                                     |  2 +-
 clients/python/README.md                      | 13 ++++
 clients/python/pyproject.toml                 |  2 +-
 clients/python/tests/test_inference_api.py    | 16 +++--
 .../python/text_generation/inference_api.py   | 63 +++++++++++--------
 clients/python/text_generation/types.py       |  6 ++
 6 files changed, 70 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index 60c1a6ba..0c63f36b 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ to power LLMs api-inference widgets.
 ## Table of contents
 
 - [Features](#features)
-- [Officially Supported Models](#officially-supported-models)
+- [Optimized Architectures](#optimized-architectures)
 - [Get Started](#get-started)
   - [Docker](#docker)
   - [API Documentation](#api-documentation)
diff --git a/clients/python/README.md b/clients/python/README.md
index f509e65c..99ff185a 100644
--- a/clients/python/README.md
+++ b/clients/python/README.md
@@ -52,6 +52,14 @@ print(text)
 # ' Rayleigh scattering'
 ```
 
+Check all currently deployed models on the Huggingface Inference API with `Text Generation` support:
+
+```python
+from text_generation.inference_api import deployed_models
+
+print(deployed_models())
+```
+
 ### Hugging Face Inference Endpoint usage
 
 ```python
@@ -193,4 +201,9 @@ class StreamResponse:
     # Generation details
     # Only available when the generation is finished
     details: Optional[StreamDetails]
+
+# Inference API currently deployed model
+class DeployedModel:
+    model_id: str
+    sha: str
 ```
\ No newline at end of file
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
index d07f1dbc..1af5de99 100644
--- a/clients/python/pyproject.toml
+++ b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation"
-version = "0.4.1"
+version = "0.5.0"
 description = "Hugging Face Text Generation Python Client"
 license = "Apache-2.0"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]
diff --git a/clients/python/tests/test_inference_api.py b/clients/python/tests/test_inference_api.py
index 79e503a3..59297c26 100644
--- a/clients/python/tests/test_inference_api.py
+++ b/clients/python/tests/test_inference_api.py
@@ -6,12 +6,20 @@ from text_generation import (
     Client,
     AsyncClient,
 )
-from text_generation.errors import NotSupportedError
-from text_generation.inference_api import get_supported_models
+from text_generation.errors import NotSupportedError, NotFoundError
+from text_generation.inference_api import check_model_support, deployed_models
 
 
-def test_get_supported_models():
-    assert isinstance(get_supported_models(), list)
+def test_check_model_support(flan_t5_xxl, unsupported_model, fake_model):
+    assert check_model_support(flan_t5_xxl)
+    assert not check_model_support(unsupported_model)
+
+    with pytest.raises(NotFoundError):
+        check_model_support(fake_model)
+
+
+def test_deployed_models():
+    deployed_models()
 
 
 def test_client(flan_t5_xxl):
diff --git a/clients/python/text_generation/inference_api.py b/clients/python/text_generation/inference_api.py
index eb70b3d1..31635e6e 100644
--- a/clients/python/text_generation/inference_api.py
+++ b/clients/python/text_generation/inference_api.py
@@ -1,44 +1,57 @@
 import os
 import requests
-import base64
-import json
-import warnings
 
-from typing import List, Optional
+from typing import Optional, List
 from huggingface_hub.utils import build_hf_headers
 
 from text_generation import Client, AsyncClient, __version__
-from text_generation.errors import NotSupportedError
+from text_generation.types import DeployedModel
+from text_generation.errors import NotSupportedError, parse_error
 
 INFERENCE_ENDPOINT = os.environ.get(
     "HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"
 )
 
-SUPPORTED_MODELS = None
 
-
-def get_supported_models() -> Optional[List[str]]:
+def deployed_models() -> List[DeployedModel]:
     """
-    Get the list of supported text-generation models from GitHub
+    Get all currently deployed models with text-generation-inference-support
 
     Returns:
-        Optional[List[str]]: supported models list or None if unable to get the list from GitHub
+        List[DeployedModel]: list of all currently deployed models
     """
-    global SUPPORTED_MODELS
-    if SUPPORTED_MODELS is not None:
-        return SUPPORTED_MODELS
-
-    response = requests.get(
-        "https://api.github.com/repos/huggingface/text-generation-inference/contents/supported_models.json",
+    resp = requests.get(
+        f"https://api-inference.huggingface.co/framework/text-generation-inference",
         timeout=5,
     )
-    if response.status_code == 200:
-        file_content = response.json()["content"]
-        SUPPORTED_MODELS = json.loads(base64.b64decode(file_content).decode("utf-8"))
-        return SUPPORTED_MODELS
 
-    warnings.warn("Could not retrieve list of supported models.")
-    return None
+    payload = resp.json()
+    if resp.status_code != 200:
+        raise parse_error(resp.status_code, payload)
+
+    models = [DeployedModel(**raw_deployed_model) for raw_deployed_model in payload]
+    return models
+
+
+def check_model_support(repo_id: str) -> bool:
+    """
+    Check if a given model is supported by text-generation-inference
+
+    Returns:
+        bool: whether the model is supported by this client
+    """
+    resp = requests.get(
+        f"https://api-inference.huggingface.co/status/{repo_id}",
+        timeout=5,
+    )
+
+    payload = resp.json()
+    if resp.status_code != 200:
+        raise parse_error(resp.status_code, payload)
+
+    framework = payload["framework"]
+    supported = framework == "text-generation-inference"
+    return supported
 
 
 class InferenceAPIClient(Client):
@@ -83,8 +96,7 @@ class InferenceAPIClient(Client):
         """
 
         # Text Generation Inference client only supports a subset of the available hub models
-        supported_models = get_supported_models()
-        if supported_models is not None and repo_id not in supported_models:
+        if not check_model_support(repo_id):
             raise NotSupportedError(repo_id)
 
         headers = build_hf_headers(
@@ -140,8 +152,7 @@ class InferenceAPIAsyncClient(AsyncClient):
         """
 
         # Text Generation Inference client only supports a subset of the available hub models
-        supported_models = get_supported_models()
-        if supported_models is not None and repo_id not in supported_models:
+        if not check_model_support(repo_id):
             raise NotSupportedError(repo_id)
 
         headers = build_hf_headers(
diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py
index 21a9849b..f3f9dcb5 100644
--- a/clients/python/text_generation/types.py
+++ b/clients/python/text_generation/types.py
@@ -223,3 +223,9 @@ class StreamResponse(BaseModel):
     # Generation details
     # Only available when the generation is finished
     details: Optional[StreamDetails]
+
+
+# Inference API currently deployed model
+class DeployedModel(BaseModel):
+    model_id: str
+    sha: str