Updating the schema thing + redocly.

This commit is contained in:
Nicolas Patry 2024-07-30 16:14:52 +02:00
parent fa687dd340
commit e3418c3340
No known key found for this signature in database
GPG Key ID: E939E8CC91A1C674
5 changed files with 178 additions and 142 deletions

79
.redocly.lint-ignore.yaml Normal file
View File

@ -0,0 +1,79 @@
# This file instructs Redocly's linter to ignore the rules contained for specific parts of your API.
# See https://redoc.ly/docs/cli/ for more information.
docs/openapi.json:
no-empty-servers:
- '#/openapi'
spec:
- >-
#/components/schemas/GenerateParameters/properties/best_of/exclusiveMinimum
- >-
#/components/schemas/GenerateParameters/properties/frequency_penalty/exclusiveMinimum
- '#/components/schemas/GenerateParameters/properties/grammar/nullable'
- >-
#/components/schemas/GenerateParameters/properties/repetition_penalty/exclusiveMinimum
- '#/components/schemas/GenerateParameters/properties/seed/exclusiveMinimum'
- >-
#/components/schemas/GenerateParameters/properties/temperature/exclusiveMinimum
- '#/components/schemas/GenerateParameters/properties/top_k/exclusiveMinimum'
- >-
#/components/schemas/GenerateParameters/properties/top_n_tokens/exclusiveMinimum
- '#/components/schemas/GenerateParameters/properties/top_p/exclusiveMinimum'
- >-
#/components/schemas/GenerateParameters/properties/typical_p/exclusiveMinimum
- '#/components/schemas/GenerateResponse/properties/details/nullable'
- '#/components/schemas/StreamResponse/properties/details/nullable'
- '#/components/schemas/ChatRequest/properties/response_format/nullable'
- '#/components/schemas/ChatRequest/properties/tool_choice/nullable'
- '#/components/schemas/ToolChoice/nullable'
- '#/components/schemas/ChatCompletionComplete/properties/logprobs/nullable'
- '#/components/schemas/ChatCompletionChoice/properties/logprobs/nullable'
no-invalid-media-type-examples:
- '#/paths/~1/post/responses/422/content/application~1json/example'
- '#/paths/~1/post/responses/424/content/application~1json/example'
- '#/paths/~1/post/responses/429/content/application~1json/example'
- '#/paths/~1/post/responses/500/content/application~1json/example'
- '#/paths/~1generate/post/responses/422/content/application~1json/example'
- '#/paths/~1generate/post/responses/424/content/application~1json/example'
- '#/paths/~1generate/post/responses/429/content/application~1json/example'
- '#/paths/~1generate/post/responses/500/content/application~1json/example'
- >-
#/paths/~1generate_stream/post/responses/422/content/text~1event-stream/example
- >-
#/paths/~1generate_stream/post/responses/424/content/text~1event-stream/example
- >-
#/paths/~1generate_stream/post/responses/429/content/text~1event-stream/example
- >-
#/paths/~1generate_stream/post/responses/500/content/text~1event-stream/example
- '#/paths/~1tokenize/post/responses/404/content/application~1json/example'
- >-
#/paths/~1v1~1chat~1completions/post/responses/422/content/application~1json/example
- >-
#/paths/~1v1~1chat~1completions/post/responses/424/content/application~1json/example
- >-
#/paths/~1v1~1chat~1completions/post/responses/429/content/application~1json/example
- >-
#/paths/~1v1~1chat~1completions/post/responses/500/content/application~1json/example
- >-
#/paths/~1v1~1completions/post/responses/422/content/application~1json/example
- >-
#/paths/~1v1~1completions/post/responses/424/content/application~1json/example
- >-
#/paths/~1v1~1completions/post/responses/429/content/application~1json/example
- >-
#/paths/~1v1~1completions/post/responses/500/content/application~1json/example
operation-4xx-response:
- '#/paths/~1health/get/responses'
- '#/paths/~1info/get/responses'
- '#/paths/~1metrics/get/responses'
no-unused-components:
- '#/components/schemas/Completion'
security-defined:
- '#/paths/~1/post'
- '#/paths/~1generate/post'
- '#/paths/~1generate_stream/post'
- '#/paths/~1health/get'
- '#/paths/~1info/get'
- '#/paths/~1metrics/get'
- '#/paths/~1tokenize/post'
- '#/paths/~1v1~1chat~1completions/post'
- '#/paths/~1v1~1completions/post'

View File

@ -113,19 +113,14 @@ async fn main() -> Result<(), RouterError> {
max_client_batch_size, max_client_batch_size,
} = args; } = args;
let print_schema_command = match command { if let Some(Commands::PrintSchema) = command {
Some(Commands::PrintSchema) => true, use utoipa::OpenApi;
None => { let api_doc = text_generation_router::server::ApiDoc::openapi();
// only init logging if we are not running the print schema command let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
text_generation_router::logging::init_logging( println!("{}", api_doc);
otlp_endpoint, std::process::exit(0);
otlp_service_name,
json_output,
);
false
}
}; };
// Launch Tokio runtime text_generation_router::logging::init_logging(otlp_endpoint, otlp_service_name, json_output);
// Validate args // Validate args
if max_input_tokens >= max_total_tokens { if max_input_tokens >= max_total_tokens {
@ -187,7 +182,6 @@ async fn main() -> Result<(), RouterError> {
messages_api_enabled, messages_api_enabled,
disable_grammar_support, disable_grammar_support,
max_client_batch_size, max_client_batch_size,
print_schema_command,
) )
.await?; .await?;
Ok(()) Ok(())

View File

@ -1580,16 +1580,11 @@
"type": "object", "type": "object",
"required": [ "required": [
"model_id", "model_id",
"model_dtype",
"model_device_type",
"max_concurrent_requests", "max_concurrent_requests",
"max_best_of", "max_best_of",
"max_stop_sequences", "max_stop_sequences",
"max_input_tokens", "max_input_tokens",
"max_total_tokens", "max_total_tokens",
"waiting_served_ratio",
"max_batch_total_tokens",
"max_waiting_tokens",
"validation_workers", "validation_workers",
"max_client_batch_size", "max_client_batch_size",
"router", "router",
@ -1601,18 +1596,6 @@
"example": "null", "example": "null",
"nullable": true "nullable": true
}, },
"max_batch_size": {
"type": "integer",
"example": "null",
"nullable": true,
"minimum": 0
},
"max_batch_total_tokens": {
"type": "integer",
"format": "int32",
"example": "32000",
"minimum": 0
},
"max_best_of": { "max_best_of": {
"type": "integer", "type": "integer",
"example": "2", "example": "2",
@ -1644,19 +1627,6 @@
"example": "2048", "example": "2048",
"minimum": 0 "minimum": 0
}, },
"max_waiting_tokens": {
"type": "integer",
"example": "20",
"minimum": 0
},
"model_device_type": {
"type": "string",
"example": "cuda"
},
"model_dtype": {
"type": "string",
"example": "torch.float16"
},
"model_id": { "model_id": {
"type": "string", "type": "string",
"description": "Model info", "description": "Model info",
@ -1690,11 +1660,6 @@
"version": { "version": {
"type": "string", "type": "string",
"example": "0.5.0" "example": "0.5.0"
},
"waiting_served_ratio": {
"type": "number",
"format": "float",
"example": "1.2"
} }
} }
}, },

View File

@ -1396,6 +1396,90 @@ async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub(crate) struct ComputeType(String); pub(crate) struct ComputeType(String);
// OpenAPI documentation
#[derive(OpenApi)]
#[openapi(
paths(
health,
get_model_info,
compat_generate,
generate,
generate_stream,
chat_completions,
completions,
tokenize,
metrics,
),
components(
schemas(
Info,
CompatGenerateRequest,
GenerateRequest,
GrammarType,
ChatRequest,
Message,
MessageContent,
MessageChunk,
Url,
FunctionName,
OutputMessage,
TextMessage,
ToolCallMessage,
ToolCallDelta,
ChatCompletionComplete,
ChatCompletionChoice,
ChatCompletionDelta,
ChatCompletionChunk,
ChatCompletionLogprob,
ChatCompletionLogprobs,
ChatCompletionTopLogprob,
ChatCompletion,
CompletionRequest,
CompletionComplete,
Chunk,
Completion,
CompletionFinal,
Prompt,
GenerateParameters,
PrefillToken,
Token,
GenerateResponse,
TokenizeResponse,
SimpleToken,
BestOfSequence,
Details,
FinishReason,
StreamResponse,
StreamDetails,
ErrorResponse,
GrammarType,
Usage,
DeltaToolCall,
ToolType,
Tool,
ToolCall,
Function,
FunctionDefinition,
ToolChoice,
)
),
tags(
(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
),
info(
title = "Text Generation Inference",
license(
name = "Apache 2.0",
url = "https://www.apache.org/licenses/LICENSE-2.0"
)
)
)]
pub struct ApiDoc;
pub fn schema() -> ApiDoc {
ApiDoc
}
/// Serving method /// Serving method
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub async fn run( pub async fn run(
@ -1420,95 +1504,7 @@ pub async fn run(
messages_api_enabled: bool, messages_api_enabled: bool,
grammar_support: bool, grammar_support: bool,
max_client_batch_size: usize, max_client_batch_size: usize,
print_schema_command: bool,
) -> Result<(), WebServerError> { ) -> Result<(), WebServerError> {
// OpenAPI documentation
#[derive(OpenApi)]
#[openapi(
paths(
health,
get_model_info,
compat_generate,
generate,
generate_stream,
chat_completions,
completions,
tokenize,
metrics,
),
components(
schemas(
Info,
CompatGenerateRequest,
GenerateRequest,
GrammarType,
ChatRequest,
Message,
MessageContent,
MessageChunk,
Url,
FunctionName,
OutputMessage,
TextMessage,
ToolCallMessage,
ToolCallDelta,
ChatCompletionComplete,
ChatCompletionChoice,
ChatCompletionDelta,
ChatCompletionChunk,
ChatCompletionLogprob,
ChatCompletionLogprobs,
ChatCompletionTopLogprob,
ChatCompletion,
CompletionRequest,
CompletionComplete,
Chunk,
Completion,
CompletionFinal,
Prompt,
GenerateParameters,
PrefillToken,
Token,
GenerateResponse,
TokenizeResponse,
SimpleToken,
BestOfSequence,
Details,
FinishReason,
StreamResponse,
StreamDetails,
ErrorResponse,
GrammarType,
Usage,
DeltaToolCall,
ToolType,
Tool,
ToolCall,
Function,
FunctionDefinition,
ToolChoice,
)
),
tags(
(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
),
info(
title = "Text Generation Inference",
license(
name = "Apache 2.0",
url = "https://www.apache.org/licenses/LICENSE-2.0"
)
)
)]
struct ApiDoc;
// Create state
if print_schema_command {
let api_doc = ApiDoc::openapi();
let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
println!("{}", api_doc);
std::process::exit(0);
}
// CORS allowed origins // CORS allowed origins
// map to go inside the option and then map to parse from String to HeaderValue // map to go inside the option and then map to parse from String to HeaderValue
// Finally, convert to AllowOrigin // Finally, convert to AllowOrigin

View File

@ -167,22 +167,24 @@ def check_openapi(check: bool):
else: else:
os.rename(tmp_filename, filename) os.rename(tmp_filename, filename)
print("OpenAPI documentation updated.") print("OpenAPI documentation updated.")
errors = subprocess.run( p = subprocess.run(
[ [
"swagger-cli", "redocly",
# allow for trailing whitespace since it's not significant # allow for trailing whitespace since it's not significant
# and the precommit hook will remove it # and the precommit hook will remove it
"validate", "lint",
filename, filename,
], ],
capture_output=True, capture_output=True,
).stderr.decode("utf-8") )
errors = p.stderr.decode("utf-8")
# The openapi specs fails on `exclusive_minimum` which is expected to be a boolean where # The openapi specs fails on `exclusive_minimum` which is expected to be a boolean where
# utoipa outputs a value instead: https://github.com/juhaku/utoipa/issues/969 # utoipa outputs a value instead: https://github.com/juhaku/utoipa/issues/969
if not errors.startswith("Swagger schema validation failed."): print(errors)
if p.returncode != 0:
print(errors) print(errors)
raise Exception( raise Exception(
f"OpenAPI documentation is invalid, `swagger-cli validate` showed some error:\n {errors}" f"OpenAPI documentation is invalid, `redocly lint {filename}` showed some error:\n {errors}"
) )
return True return True