Updating the schema thing + redocly.
This commit is contained in:
parent
fa687dd340
commit
e3418c3340
|
@ -0,0 +1,79 @@
|
|||
# This file instructs Redocly's linter to ignore the rules contained for specific parts of your API.
|
||||
# See https://redoc.ly/docs/cli/ for more information.
|
||||
docs/openapi.json:
|
||||
no-empty-servers:
|
||||
- '#/openapi'
|
||||
spec:
|
||||
- >-
|
||||
#/components/schemas/GenerateParameters/properties/best_of/exclusiveMinimum
|
||||
- >-
|
||||
#/components/schemas/GenerateParameters/properties/frequency_penalty/exclusiveMinimum
|
||||
- '#/components/schemas/GenerateParameters/properties/grammar/nullable'
|
||||
- >-
|
||||
#/components/schemas/GenerateParameters/properties/repetition_penalty/exclusiveMinimum
|
||||
- '#/components/schemas/GenerateParameters/properties/seed/exclusiveMinimum'
|
||||
- >-
|
||||
#/components/schemas/GenerateParameters/properties/temperature/exclusiveMinimum
|
||||
- '#/components/schemas/GenerateParameters/properties/top_k/exclusiveMinimum'
|
||||
- >-
|
||||
#/components/schemas/GenerateParameters/properties/top_n_tokens/exclusiveMinimum
|
||||
- '#/components/schemas/GenerateParameters/properties/top_p/exclusiveMinimum'
|
||||
- >-
|
||||
#/components/schemas/GenerateParameters/properties/typical_p/exclusiveMinimum
|
||||
- '#/components/schemas/GenerateResponse/properties/details/nullable'
|
||||
- '#/components/schemas/StreamResponse/properties/details/nullable'
|
||||
- '#/components/schemas/ChatRequest/properties/response_format/nullable'
|
||||
- '#/components/schemas/ChatRequest/properties/tool_choice/nullable'
|
||||
- '#/components/schemas/ToolChoice/nullable'
|
||||
- '#/components/schemas/ChatCompletionComplete/properties/logprobs/nullable'
|
||||
- '#/components/schemas/ChatCompletionChoice/properties/logprobs/nullable'
|
||||
no-invalid-media-type-examples:
|
||||
- '#/paths/~1/post/responses/422/content/application~1json/example'
|
||||
- '#/paths/~1/post/responses/424/content/application~1json/example'
|
||||
- '#/paths/~1/post/responses/429/content/application~1json/example'
|
||||
- '#/paths/~1/post/responses/500/content/application~1json/example'
|
||||
- '#/paths/~1generate/post/responses/422/content/application~1json/example'
|
||||
- '#/paths/~1generate/post/responses/424/content/application~1json/example'
|
||||
- '#/paths/~1generate/post/responses/429/content/application~1json/example'
|
||||
- '#/paths/~1generate/post/responses/500/content/application~1json/example'
|
||||
- >-
|
||||
#/paths/~1generate_stream/post/responses/422/content/text~1event-stream/example
|
||||
- >-
|
||||
#/paths/~1generate_stream/post/responses/424/content/text~1event-stream/example
|
||||
- >-
|
||||
#/paths/~1generate_stream/post/responses/429/content/text~1event-stream/example
|
||||
- >-
|
||||
#/paths/~1generate_stream/post/responses/500/content/text~1event-stream/example
|
||||
- '#/paths/~1tokenize/post/responses/404/content/application~1json/example'
|
||||
- >-
|
||||
#/paths/~1v1~1chat~1completions/post/responses/422/content/application~1json/example
|
||||
- >-
|
||||
#/paths/~1v1~1chat~1completions/post/responses/424/content/application~1json/example
|
||||
- >-
|
||||
#/paths/~1v1~1chat~1completions/post/responses/429/content/application~1json/example
|
||||
- >-
|
||||
#/paths/~1v1~1chat~1completions/post/responses/500/content/application~1json/example
|
||||
- >-
|
||||
#/paths/~1v1~1completions/post/responses/422/content/application~1json/example
|
||||
- >-
|
||||
#/paths/~1v1~1completions/post/responses/424/content/application~1json/example
|
||||
- >-
|
||||
#/paths/~1v1~1completions/post/responses/429/content/application~1json/example
|
||||
- >-
|
||||
#/paths/~1v1~1completions/post/responses/500/content/application~1json/example
|
||||
operation-4xx-response:
|
||||
- '#/paths/~1health/get/responses'
|
||||
- '#/paths/~1info/get/responses'
|
||||
- '#/paths/~1metrics/get/responses'
|
||||
no-unused-components:
|
||||
- '#/components/schemas/Completion'
|
||||
security-defined:
|
||||
- '#/paths/~1/post'
|
||||
- '#/paths/~1generate/post'
|
||||
- '#/paths/~1generate_stream/post'
|
||||
- '#/paths/~1health/get'
|
||||
- '#/paths/~1info/get'
|
||||
- '#/paths/~1metrics/get'
|
||||
- '#/paths/~1tokenize/post'
|
||||
- '#/paths/~1v1~1chat~1completions/post'
|
||||
- '#/paths/~1v1~1completions/post'
|
|
@ -113,19 +113,14 @@ async fn main() -> Result<(), RouterError> {
|
|||
max_client_batch_size,
|
||||
} = args;
|
||||
|
||||
let print_schema_command = match command {
|
||||
Some(Commands::PrintSchema) => true,
|
||||
None => {
|
||||
// only init logging if we are not running the print schema command
|
||||
text_generation_router::logging::init_logging(
|
||||
otlp_endpoint,
|
||||
otlp_service_name,
|
||||
json_output,
|
||||
);
|
||||
false
|
||||
}
|
||||
if let Some(Commands::PrintSchema) = command {
|
||||
use utoipa::OpenApi;
|
||||
let api_doc = text_generation_router::server::ApiDoc::openapi();
|
||||
let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
|
||||
println!("{}", api_doc);
|
||||
std::process::exit(0);
|
||||
};
|
||||
// Launch Tokio runtime
|
||||
text_generation_router::logging::init_logging(otlp_endpoint, otlp_service_name, json_output);
|
||||
|
||||
// Validate args
|
||||
if max_input_tokens >= max_total_tokens {
|
||||
|
@ -187,7 +182,6 @@ async fn main() -> Result<(), RouterError> {
|
|||
messages_api_enabled,
|
||||
disable_grammar_support,
|
||||
max_client_batch_size,
|
||||
print_schema_command,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
|
|
|
@ -1580,16 +1580,11 @@
|
|||
"type": "object",
|
||||
"required": [
|
||||
"model_id",
|
||||
"model_dtype",
|
||||
"model_device_type",
|
||||
"max_concurrent_requests",
|
||||
"max_best_of",
|
||||
"max_stop_sequences",
|
||||
"max_input_tokens",
|
||||
"max_total_tokens",
|
||||
"waiting_served_ratio",
|
||||
"max_batch_total_tokens",
|
||||
"max_waiting_tokens",
|
||||
"validation_workers",
|
||||
"max_client_batch_size",
|
||||
"router",
|
||||
|
@ -1601,18 +1596,6 @@
|
|||
"example": "null",
|
||||
"nullable": true
|
||||
},
|
||||
"max_batch_size": {
|
||||
"type": "integer",
|
||||
"example": "null",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"max_batch_total_tokens": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"example": "32000",
|
||||
"minimum": 0
|
||||
},
|
||||
"max_best_of": {
|
||||
"type": "integer",
|
||||
"example": "2",
|
||||
|
@ -1644,19 +1627,6 @@
|
|||
"example": "2048",
|
||||
"minimum": 0
|
||||
},
|
||||
"max_waiting_tokens": {
|
||||
"type": "integer",
|
||||
"example": "20",
|
||||
"minimum": 0
|
||||
},
|
||||
"model_device_type": {
|
||||
"type": "string",
|
||||
"example": "cuda"
|
||||
},
|
||||
"model_dtype": {
|
||||
"type": "string",
|
||||
"example": "torch.float16"
|
||||
},
|
||||
"model_id": {
|
||||
"type": "string",
|
||||
"description": "Model info",
|
||||
|
@ -1690,11 +1660,6 @@
|
|||
"version": {
|
||||
"type": "string",
|
||||
"example": "0.5.0"
|
||||
},
|
||||
"waiting_served_ratio": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"example": "1.2"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
@ -1396,6 +1396,90 @@ async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
|
|||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct ComputeType(String);
|
||||
|
||||
// OpenAPI documentation
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(
|
||||
health,
|
||||
get_model_info,
|
||||
compat_generate,
|
||||
generate,
|
||||
generate_stream,
|
||||
chat_completions,
|
||||
completions,
|
||||
tokenize,
|
||||
metrics,
|
||||
),
|
||||
components(
|
||||
schemas(
|
||||
Info,
|
||||
CompatGenerateRequest,
|
||||
GenerateRequest,
|
||||
GrammarType,
|
||||
ChatRequest,
|
||||
Message,
|
||||
MessageContent,
|
||||
MessageChunk,
|
||||
Url,
|
||||
FunctionName,
|
||||
OutputMessage,
|
||||
TextMessage,
|
||||
ToolCallMessage,
|
||||
ToolCallDelta,
|
||||
ChatCompletionComplete,
|
||||
ChatCompletionChoice,
|
||||
ChatCompletionDelta,
|
||||
ChatCompletionChunk,
|
||||
ChatCompletionLogprob,
|
||||
ChatCompletionLogprobs,
|
||||
ChatCompletionTopLogprob,
|
||||
ChatCompletion,
|
||||
CompletionRequest,
|
||||
CompletionComplete,
|
||||
Chunk,
|
||||
Completion,
|
||||
CompletionFinal,
|
||||
Prompt,
|
||||
GenerateParameters,
|
||||
PrefillToken,
|
||||
Token,
|
||||
GenerateResponse,
|
||||
TokenizeResponse,
|
||||
SimpleToken,
|
||||
BestOfSequence,
|
||||
Details,
|
||||
FinishReason,
|
||||
StreamResponse,
|
||||
StreamDetails,
|
||||
ErrorResponse,
|
||||
GrammarType,
|
||||
Usage,
|
||||
DeltaToolCall,
|
||||
ToolType,
|
||||
Tool,
|
||||
ToolCall,
|
||||
Function,
|
||||
FunctionDefinition,
|
||||
ToolChoice,
|
||||
)
|
||||
),
|
||||
tags(
|
||||
(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
|
||||
),
|
||||
info(
|
||||
title = "Text Generation Inference",
|
||||
license(
|
||||
name = "Apache 2.0",
|
||||
url = "https://www.apache.org/licenses/LICENSE-2.0"
|
||||
)
|
||||
)
|
||||
)]
|
||||
pub struct ApiDoc;
|
||||
|
||||
pub fn schema() -> ApiDoc {
|
||||
ApiDoc
|
||||
}
|
||||
|
||||
/// Serving method
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn run(
|
||||
|
@ -1420,95 +1504,7 @@ pub async fn run(
|
|||
messages_api_enabled: bool,
|
||||
grammar_support: bool,
|
||||
max_client_batch_size: usize,
|
||||
print_schema_command: bool,
|
||||
) -> Result<(), WebServerError> {
|
||||
// OpenAPI documentation
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(
|
||||
health,
|
||||
get_model_info,
|
||||
compat_generate,
|
||||
generate,
|
||||
generate_stream,
|
||||
chat_completions,
|
||||
completions,
|
||||
tokenize,
|
||||
metrics,
|
||||
),
|
||||
components(
|
||||
schemas(
|
||||
Info,
|
||||
CompatGenerateRequest,
|
||||
GenerateRequest,
|
||||
GrammarType,
|
||||
ChatRequest,
|
||||
Message,
|
||||
MessageContent,
|
||||
MessageChunk,
|
||||
Url,
|
||||
FunctionName,
|
||||
OutputMessage,
|
||||
TextMessage,
|
||||
ToolCallMessage,
|
||||
ToolCallDelta,
|
||||
ChatCompletionComplete,
|
||||
ChatCompletionChoice,
|
||||
ChatCompletionDelta,
|
||||
ChatCompletionChunk,
|
||||
ChatCompletionLogprob,
|
||||
ChatCompletionLogprobs,
|
||||
ChatCompletionTopLogprob,
|
||||
ChatCompletion,
|
||||
CompletionRequest,
|
||||
CompletionComplete,
|
||||
Chunk,
|
||||
Completion,
|
||||
CompletionFinal,
|
||||
Prompt,
|
||||
GenerateParameters,
|
||||
PrefillToken,
|
||||
Token,
|
||||
GenerateResponse,
|
||||
TokenizeResponse,
|
||||
SimpleToken,
|
||||
BestOfSequence,
|
||||
Details,
|
||||
FinishReason,
|
||||
StreamResponse,
|
||||
StreamDetails,
|
||||
ErrorResponse,
|
||||
GrammarType,
|
||||
Usage,
|
||||
DeltaToolCall,
|
||||
ToolType,
|
||||
Tool,
|
||||
ToolCall,
|
||||
Function,
|
||||
FunctionDefinition,
|
||||
ToolChoice,
|
||||
)
|
||||
),
|
||||
tags(
|
||||
(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
|
||||
),
|
||||
info(
|
||||
title = "Text Generation Inference",
|
||||
license(
|
||||
name = "Apache 2.0",
|
||||
url = "https://www.apache.org/licenses/LICENSE-2.0"
|
||||
)
|
||||
)
|
||||
)]
|
||||
struct ApiDoc;
|
||||
|
||||
// Create state
|
||||
if print_schema_command {
|
||||
let api_doc = ApiDoc::openapi();
|
||||
let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
|
||||
println!("{}", api_doc);
|
||||
std::process::exit(0);
|
||||
}
|
||||
// CORS allowed origins
|
||||
// map to go inside the option and then map to parse from String to HeaderValue
|
||||
// Finally, convert to AllowOrigin
|
||||
|
|
|
@ -167,22 +167,24 @@ def check_openapi(check: bool):
|
|||
else:
|
||||
os.rename(tmp_filename, filename)
|
||||
print("OpenAPI documentation updated.")
|
||||
errors = subprocess.run(
|
||||
p = subprocess.run(
|
||||
[
|
||||
"swagger-cli",
|
||||
"redocly",
|
||||
# allow for trailing whitespace since it's not significant
|
||||
# and the precommit hook will remove it
|
||||
"validate",
|
||||
"lint",
|
||||
filename,
|
||||
],
|
||||
capture_output=True,
|
||||
).stderr.decode("utf-8")
|
||||
)
|
||||
errors = p.stderr.decode("utf-8")
|
||||
# The openapi specs fails on `exclusive_minimum` which is expected to be a boolean where
|
||||
# utoipa outputs a value instead: https://github.com/juhaku/utoipa/issues/969
|
||||
if not errors.startswith("Swagger schema validation failed."):
|
||||
print(errors)
|
||||
if p.returncode != 0:
|
||||
print(errors)
|
||||
raise Exception(
|
||||
f"OpenAPI documentation is invalid, `swagger-cli validate` showed some error:\n {errors}"
|
||||
f"OpenAPI documentation is invalid, `redocly lint {filename}` showed some error:\n {errors}"
|
||||
)
|
||||
return True
|
||||
|
||||
|
|
Loading…
Reference in New Issue