Updating the schema thing + redocly.

2024-07-30 16:14:52 +02:00 · 2024-07-30 16:14:52 +02:00 · e3418c3340
parent fa687dd340
commit e3418c3340
5 changed files with 178 additions and 142 deletions
--- a/.redocly.lint-ignore.yaml
+++ b/.redocly.lint-ignore.yaml
@ -0,0 +1,79 @@
 # This file instructs Redocly's linter to ignore the rules contained for specific parts of your API.
 # See https://redoc.ly/docs/cli/ for more information.
 docs/openapi.json:
  no-empty-servers:
    - '#/openapi'
  spec:
    - >-
      #/components/schemas/GenerateParameters/properties/best_of/exclusiveMinimum
    - >-
      #/components/schemas/GenerateParameters/properties/frequency_penalty/exclusiveMinimum
    - '#/components/schemas/GenerateParameters/properties/grammar/nullable'
    - >-
      #/components/schemas/GenerateParameters/properties/repetition_penalty/exclusiveMinimum
    - '#/components/schemas/GenerateParameters/properties/seed/exclusiveMinimum'
    - >-
      #/components/schemas/GenerateParameters/properties/temperature/exclusiveMinimum
    - '#/components/schemas/GenerateParameters/properties/top_k/exclusiveMinimum'
    - >-
      #/components/schemas/GenerateParameters/properties/top_n_tokens/exclusiveMinimum
    - '#/components/schemas/GenerateParameters/properties/top_p/exclusiveMinimum'
    - >-
      #/components/schemas/GenerateParameters/properties/typical_p/exclusiveMinimum
    - '#/components/schemas/GenerateResponse/properties/details/nullable'
    - '#/components/schemas/StreamResponse/properties/details/nullable'
    - '#/components/schemas/ChatRequest/properties/response_format/nullable'
    - '#/components/schemas/ChatRequest/properties/tool_choice/nullable'
    - '#/components/schemas/ToolChoice/nullable'
    - '#/components/schemas/ChatCompletionComplete/properties/logprobs/nullable'
    - '#/components/schemas/ChatCompletionChoice/properties/logprobs/nullable'
  no-invalid-media-type-examples:
    - '#/paths/~1/post/responses/422/content/application~1json/example'
    - '#/paths/~1/post/responses/424/content/application~1json/example'
    - '#/paths/~1/post/responses/429/content/application~1json/example'
    - '#/paths/~1/post/responses/500/content/application~1json/example'
    - '#/paths/~1generate/post/responses/422/content/application~1json/example'
    - '#/paths/~1generate/post/responses/424/content/application~1json/example'
    - '#/paths/~1generate/post/responses/429/content/application~1json/example'
    - '#/paths/~1generate/post/responses/500/content/application~1json/example'
    - >-
      #/paths/~1generate_stream/post/responses/422/content/text~1event-stream/example
    - >-
      #/paths/~1generate_stream/post/responses/424/content/text~1event-stream/example
    - >-
      #/paths/~1generate_stream/post/responses/429/content/text~1event-stream/example
    - >-
      #/paths/~1generate_stream/post/responses/500/content/text~1event-stream/example
    - '#/paths/~1tokenize/post/responses/404/content/application~1json/example'
    - >-
      #/paths/~1v1~1chat~1completions/post/responses/422/content/application~1json/example
    - >-
      #/paths/~1v1~1chat~1completions/post/responses/424/content/application~1json/example
    - >-
      #/paths/~1v1~1chat~1completions/post/responses/429/content/application~1json/example
    - >-
      #/paths/~1v1~1chat~1completions/post/responses/500/content/application~1json/example
    - >-
      #/paths/~1v1~1completions/post/responses/422/content/application~1json/example
    - >-
      #/paths/~1v1~1completions/post/responses/424/content/application~1json/example
    - >-
      #/paths/~1v1~1completions/post/responses/429/content/application~1json/example
    - >-
      #/paths/~1v1~1completions/post/responses/500/content/application~1json/example
  operation-4xx-response:
    - '#/paths/~1health/get/responses'
    - '#/paths/~1info/get/responses'
    - '#/paths/~1metrics/get/responses'
  no-unused-components:
    - '#/components/schemas/Completion'
  security-defined:
    - '#/paths/~1/post'
    - '#/paths/~1generate/post'
    - '#/paths/~1generate_stream/post'
    - '#/paths/~1health/get'
    - '#/paths/~1info/get'
    - '#/paths/~1metrics/get'
    - '#/paths/~1tokenize/post'
    - '#/paths/~1v1~1chat~1completions/post'
    - '#/paths/~1v1~1completions/post'
--- a/backends/v3/src/main.rs
+++ b/backends/v3/src/main.rs
@ -113,19 +113,14 @@ async fn main() -> Result<(), RouterError> {
        max_client_batch_size,
    } = args;
-    let print_schema_command = match command {
+    if let Some(Commands::PrintSchema) = command {
-        Some(Commands::PrintSchema) => true,
+        use utoipa::OpenApi;
-        None => {
+        let api_doc = text_generation_router::server::ApiDoc::openapi();
-            // only init logging if we are not running the print schema command
+        let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
-            text_generation_router::logging::init_logging(
+        println!("{}", api_doc);
-                otlp_endpoint,
+        std::process::exit(0);
                otlp_service_name,
                json_output,
            );
            false
        }
    };
-    // Launch Tokio runtime
+    text_generation_router::logging::init_logging(otlp_endpoint, otlp_service_name, json_output);
    // Validate args
    if max_input_tokens >= max_total_tokens {
@ -187,7 +182,6 @@ async fn main() -> Result<(), RouterError> {
        messages_api_enabled,
        disable_grammar_support,
        max_client_batch_size,
        print_schema_command,
    )
    .await?;
    Ok(())
--- a/docs/openapi.json
+++ b/docs/openapi.json
@ -1580,16 +1580,11 @@
        "type": "object",
        "required": [
          "model_id",
          "model_dtype",
          "model_device_type",
          "max_concurrent_requests",
          "max_best_of",
          "max_stop_sequences",
          "max_input_tokens",
          "max_total_tokens",
          "waiting_served_ratio",
          "max_batch_total_tokens",
          "max_waiting_tokens",
          "validation_workers",
          "max_client_batch_size",
          "router",
@ -1601,18 +1596,6 @@
            "example": "null",
            "nullable": true
          },
          "max_batch_size": {
            "type": "integer",
            "example": "null",
            "nullable": true,
            "minimum": 0
          },
          "max_batch_total_tokens": {
            "type": "integer",
            "format": "int32",
            "example": "32000",
            "minimum": 0
          },
          "max_best_of": {
            "type": "integer",
            "example": "2",
@ -1644,19 +1627,6 @@
            "example": "2048",
            "minimum": 0
          },
          "max_waiting_tokens": {
            "type": "integer",
            "example": "20",
            "minimum": 0
          },
          "model_device_type": {
            "type": "string",
            "example": "cuda"
          },
          "model_dtype": {
            "type": "string",
            "example": "torch.float16"
          },
          "model_id": {
            "type": "string",
            "description": "Model info",
@ -1690,11 +1660,6 @@
          "version": {
            "type": "string",
            "example": "0.5.0"
          },
          "waiting_served_ratio": {
            "type": "number",
            "format": "float",
            "example": "1.2"
          }
        }
      },
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -1396,6 +1396,90 @@ async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
 #[derive(Clone, Debug)]
 pub(crate) struct ComputeType(String);
 // OpenAPI documentation
 #[derive(OpenApi)]
 #[openapi(
 paths(
 health,
 get_model_info,
 compat_generate,
 generate,
 generate_stream,
 chat_completions,
 completions,
 tokenize,
 metrics,
 ),
 components(
 schemas(
 Info,
 CompatGenerateRequest,
 GenerateRequest,
 GrammarType,
 ChatRequest,
 Message,
 MessageContent,
 MessageChunk,
 Url,
 FunctionName,
 OutputMessage,
 TextMessage,
 ToolCallMessage,
 ToolCallDelta,
 ChatCompletionComplete,
 ChatCompletionChoice,
 ChatCompletionDelta,
 ChatCompletionChunk,
 ChatCompletionLogprob,
 ChatCompletionLogprobs,
 ChatCompletionTopLogprob,
 ChatCompletion,
 CompletionRequest,
 CompletionComplete,
 Chunk,
 Completion,
 CompletionFinal,
 Prompt,
 GenerateParameters,
 PrefillToken,
 Token,
 GenerateResponse,
 TokenizeResponse,
 SimpleToken,
 BestOfSequence,
 Details,
 FinishReason,
 StreamResponse,
 StreamDetails,
 ErrorResponse,
 GrammarType,
 Usage,
 DeltaToolCall,
 ToolType,
 Tool,
 ToolCall,
 Function,
 FunctionDefinition,
 ToolChoice,
 )
 ),
 tags(
 (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
 ),
 info(
 title = "Text Generation Inference",
 license(
 name = "Apache 2.0",
 url = "https://www.apache.org/licenses/LICENSE-2.0"
 )
 )
 )]
 pub struct ApiDoc;
 pub fn schema() -> ApiDoc {
    ApiDoc
 }
 /// Serving method
 #[allow(clippy::too_many_arguments)]
 pub async fn run(
@ -1420,95 +1504,7 @@ pub async fn run(
    messages_api_enabled: bool,
    grammar_support: bool,
    max_client_batch_size: usize,
    print_schema_command: bool,
 ) -> Result<(), WebServerError> {
    // OpenAPI documentation
    #[derive(OpenApi)]
    #[openapi(
    paths(
    health,
    get_model_info,
    compat_generate,
    generate,
    generate_stream,
    chat_completions,
    completions,
    tokenize,
    metrics,
    ),
    components(
    schemas(
    Info,
    CompatGenerateRequest,
    GenerateRequest,
    GrammarType,
    ChatRequest,
    Message,
    MessageContent,
    MessageChunk,
    Url,
    FunctionName,
    OutputMessage,
    TextMessage,
    ToolCallMessage,
    ToolCallDelta,
    ChatCompletionComplete,
    ChatCompletionChoice,
    ChatCompletionDelta,
    ChatCompletionChunk,
    ChatCompletionLogprob,
    ChatCompletionLogprobs,
    ChatCompletionTopLogprob,
    ChatCompletion,
    CompletionRequest,
    CompletionComplete,
    Chunk,
    Completion,
    CompletionFinal,
    Prompt,
    GenerateParameters,
    PrefillToken,
    Token,
    GenerateResponse,
    TokenizeResponse,
    SimpleToken,
    BestOfSequence,
    Details,
    FinishReason,
    StreamResponse,
    StreamDetails,
    ErrorResponse,
    GrammarType,
    Usage,
    DeltaToolCall,
    ToolType,
    Tool,
    ToolCall,
    Function,
    FunctionDefinition,
    ToolChoice,
    )
    ),
    tags(
    (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
    ),
    info(
    title = "Text Generation Inference",
    license(
    name = "Apache 2.0",
    url = "https://www.apache.org/licenses/LICENSE-2.0"
    )
    )
    )]
    struct ApiDoc;
    // Create state
    if print_schema_command {
        let api_doc = ApiDoc::openapi();
        let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
        println!("{}", api_doc);
        std::process::exit(0);
    }
    // CORS allowed origins
    // map to go inside the option and then map to parse from String to HeaderValue
    // Finally, convert to AllowOrigin
--- a/update_doc.py
+++ b/update_doc.py
@ -167,22 +167,24 @@ def check_openapi(check: bool):
    else:
        os.rename(tmp_filename, filename)
        print("OpenAPI documentation updated.")
-    errors = subprocess.run(
+    p = subprocess.run(
        [
-            "swagger-cli",
+            "redocly",
            # allow for trailing whitespace since it's not significant
            # and the precommit hook will remove it
-            "validate",
+            "lint",
            filename,
        ],
        capture_output=True,
-    ).stderr.decode("utf-8")
+    )
    errors = p.stderr.decode("utf-8")
    # The openapi specs fails on `exclusive_minimum` which is expected to be a boolean where
    # utoipa outputs a value instead: https://github.com/juhaku/utoipa/issues/969
-    if not errors.startswith("Swagger schema validation failed."):
+    print(errors)
    if p.returncode != 0:
        print(errors)
        raise Exception(
-            f"OpenAPI documentation is invalid, `swagger-cli validate` showed some error:\n {errors}"
+            f"OpenAPI documentation is invalid, `redocly lint {filename}` showed some error:\n {errors}"
        )
    return True