Updating the schema thing + redocly.
This commit is contained in:
parent
fa687dd340
commit
e3418c3340
|
@ -0,0 +1,79 @@
|
||||||
|
# This file instructs Redocly's linter to ignore the rules contained for specific parts of your API.
|
||||||
|
# See https://redoc.ly/docs/cli/ for more information.
|
||||||
|
docs/openapi.json:
|
||||||
|
no-empty-servers:
|
||||||
|
- '#/openapi'
|
||||||
|
spec:
|
||||||
|
- >-
|
||||||
|
#/components/schemas/GenerateParameters/properties/best_of/exclusiveMinimum
|
||||||
|
- >-
|
||||||
|
#/components/schemas/GenerateParameters/properties/frequency_penalty/exclusiveMinimum
|
||||||
|
- '#/components/schemas/GenerateParameters/properties/grammar/nullable'
|
||||||
|
- >-
|
||||||
|
#/components/schemas/GenerateParameters/properties/repetition_penalty/exclusiveMinimum
|
||||||
|
- '#/components/schemas/GenerateParameters/properties/seed/exclusiveMinimum'
|
||||||
|
- >-
|
||||||
|
#/components/schemas/GenerateParameters/properties/temperature/exclusiveMinimum
|
||||||
|
- '#/components/schemas/GenerateParameters/properties/top_k/exclusiveMinimum'
|
||||||
|
- >-
|
||||||
|
#/components/schemas/GenerateParameters/properties/top_n_tokens/exclusiveMinimum
|
||||||
|
- '#/components/schemas/GenerateParameters/properties/top_p/exclusiveMinimum'
|
||||||
|
- >-
|
||||||
|
#/components/schemas/GenerateParameters/properties/typical_p/exclusiveMinimum
|
||||||
|
- '#/components/schemas/GenerateResponse/properties/details/nullable'
|
||||||
|
- '#/components/schemas/StreamResponse/properties/details/nullable'
|
||||||
|
- '#/components/schemas/ChatRequest/properties/response_format/nullable'
|
||||||
|
- '#/components/schemas/ChatRequest/properties/tool_choice/nullable'
|
||||||
|
- '#/components/schemas/ToolChoice/nullable'
|
||||||
|
- '#/components/schemas/ChatCompletionComplete/properties/logprobs/nullable'
|
||||||
|
- '#/components/schemas/ChatCompletionChoice/properties/logprobs/nullable'
|
||||||
|
no-invalid-media-type-examples:
|
||||||
|
- '#/paths/~1/post/responses/422/content/application~1json/example'
|
||||||
|
- '#/paths/~1/post/responses/424/content/application~1json/example'
|
||||||
|
- '#/paths/~1/post/responses/429/content/application~1json/example'
|
||||||
|
- '#/paths/~1/post/responses/500/content/application~1json/example'
|
||||||
|
- '#/paths/~1generate/post/responses/422/content/application~1json/example'
|
||||||
|
- '#/paths/~1generate/post/responses/424/content/application~1json/example'
|
||||||
|
- '#/paths/~1generate/post/responses/429/content/application~1json/example'
|
||||||
|
- '#/paths/~1generate/post/responses/500/content/application~1json/example'
|
||||||
|
- >-
|
||||||
|
#/paths/~1generate_stream/post/responses/422/content/text~1event-stream/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1generate_stream/post/responses/424/content/text~1event-stream/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1generate_stream/post/responses/429/content/text~1event-stream/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1generate_stream/post/responses/500/content/text~1event-stream/example
|
||||||
|
- '#/paths/~1tokenize/post/responses/404/content/application~1json/example'
|
||||||
|
- >-
|
||||||
|
#/paths/~1v1~1chat~1completions/post/responses/422/content/application~1json/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1v1~1chat~1completions/post/responses/424/content/application~1json/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1v1~1chat~1completions/post/responses/429/content/application~1json/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1v1~1chat~1completions/post/responses/500/content/application~1json/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1v1~1completions/post/responses/422/content/application~1json/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1v1~1completions/post/responses/424/content/application~1json/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1v1~1completions/post/responses/429/content/application~1json/example
|
||||||
|
- >-
|
||||||
|
#/paths/~1v1~1completions/post/responses/500/content/application~1json/example
|
||||||
|
operation-4xx-response:
|
||||||
|
- '#/paths/~1health/get/responses'
|
||||||
|
- '#/paths/~1info/get/responses'
|
||||||
|
- '#/paths/~1metrics/get/responses'
|
||||||
|
no-unused-components:
|
||||||
|
- '#/components/schemas/Completion'
|
||||||
|
security-defined:
|
||||||
|
- '#/paths/~1/post'
|
||||||
|
- '#/paths/~1generate/post'
|
||||||
|
- '#/paths/~1generate_stream/post'
|
||||||
|
- '#/paths/~1health/get'
|
||||||
|
- '#/paths/~1info/get'
|
||||||
|
- '#/paths/~1metrics/get'
|
||||||
|
- '#/paths/~1tokenize/post'
|
||||||
|
- '#/paths/~1v1~1chat~1completions/post'
|
||||||
|
- '#/paths/~1v1~1completions/post'
|
|
@ -113,19 +113,14 @@ async fn main() -> Result<(), RouterError> {
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
} = args;
|
} = args;
|
||||||
|
|
||||||
let print_schema_command = match command {
|
if let Some(Commands::PrintSchema) = command {
|
||||||
Some(Commands::PrintSchema) => true,
|
use utoipa::OpenApi;
|
||||||
None => {
|
let api_doc = text_generation_router::server::ApiDoc::openapi();
|
||||||
// only init logging if we are not running the print schema command
|
let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
|
||||||
text_generation_router::logging::init_logging(
|
println!("{}", api_doc);
|
||||||
otlp_endpoint,
|
std::process::exit(0);
|
||||||
otlp_service_name,
|
|
||||||
json_output,
|
|
||||||
);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
// Launch Tokio runtime
|
text_generation_router::logging::init_logging(otlp_endpoint, otlp_service_name, json_output);
|
||||||
|
|
||||||
// Validate args
|
// Validate args
|
||||||
if max_input_tokens >= max_total_tokens {
|
if max_input_tokens >= max_total_tokens {
|
||||||
|
@ -187,7 +182,6 @@ async fn main() -> Result<(), RouterError> {
|
||||||
messages_api_enabled,
|
messages_api_enabled,
|
||||||
disable_grammar_support,
|
disable_grammar_support,
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
print_schema_command,
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -1580,16 +1580,11 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
"model_id",
|
"model_id",
|
||||||
"model_dtype",
|
|
||||||
"model_device_type",
|
|
||||||
"max_concurrent_requests",
|
"max_concurrent_requests",
|
||||||
"max_best_of",
|
"max_best_of",
|
||||||
"max_stop_sequences",
|
"max_stop_sequences",
|
||||||
"max_input_tokens",
|
"max_input_tokens",
|
||||||
"max_total_tokens",
|
"max_total_tokens",
|
||||||
"waiting_served_ratio",
|
|
||||||
"max_batch_total_tokens",
|
|
||||||
"max_waiting_tokens",
|
|
||||||
"validation_workers",
|
"validation_workers",
|
||||||
"max_client_batch_size",
|
"max_client_batch_size",
|
||||||
"router",
|
"router",
|
||||||
|
@ -1601,18 +1596,6 @@
|
||||||
"example": "null",
|
"example": "null",
|
||||||
"nullable": true
|
"nullable": true
|
||||||
},
|
},
|
||||||
"max_batch_size": {
|
|
||||||
"type": "integer",
|
|
||||||
"example": "null",
|
|
||||||
"nullable": true,
|
|
||||||
"minimum": 0
|
|
||||||
},
|
|
||||||
"max_batch_total_tokens": {
|
|
||||||
"type": "integer",
|
|
||||||
"format": "int32",
|
|
||||||
"example": "32000",
|
|
||||||
"minimum": 0
|
|
||||||
},
|
|
||||||
"max_best_of": {
|
"max_best_of": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": "2",
|
"example": "2",
|
||||||
|
@ -1644,19 +1627,6 @@
|
||||||
"example": "2048",
|
"example": "2048",
|
||||||
"minimum": 0
|
"minimum": 0
|
||||||
},
|
},
|
||||||
"max_waiting_tokens": {
|
|
||||||
"type": "integer",
|
|
||||||
"example": "20",
|
|
||||||
"minimum": 0
|
|
||||||
},
|
|
||||||
"model_device_type": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "cuda"
|
|
||||||
},
|
|
||||||
"model_dtype": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "torch.float16"
|
|
||||||
},
|
|
||||||
"model_id": {
|
"model_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Model info",
|
"description": "Model info",
|
||||||
|
@ -1690,11 +1660,6 @@
|
||||||
"version": {
|
"version": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "0.5.0"
|
"example": "0.5.0"
|
||||||
},
|
|
||||||
"waiting_served_ratio": {
|
|
||||||
"type": "number",
|
|
||||||
"format": "float",
|
|
||||||
"example": "1.2"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
@ -1396,6 +1396,90 @@ async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub(crate) struct ComputeType(String);
|
pub(crate) struct ComputeType(String);
|
||||||
|
|
||||||
|
// OpenAPI documentation
|
||||||
|
#[derive(OpenApi)]
|
||||||
|
#[openapi(
|
||||||
|
paths(
|
||||||
|
health,
|
||||||
|
get_model_info,
|
||||||
|
compat_generate,
|
||||||
|
generate,
|
||||||
|
generate_stream,
|
||||||
|
chat_completions,
|
||||||
|
completions,
|
||||||
|
tokenize,
|
||||||
|
metrics,
|
||||||
|
),
|
||||||
|
components(
|
||||||
|
schemas(
|
||||||
|
Info,
|
||||||
|
CompatGenerateRequest,
|
||||||
|
GenerateRequest,
|
||||||
|
GrammarType,
|
||||||
|
ChatRequest,
|
||||||
|
Message,
|
||||||
|
MessageContent,
|
||||||
|
MessageChunk,
|
||||||
|
Url,
|
||||||
|
FunctionName,
|
||||||
|
OutputMessage,
|
||||||
|
TextMessage,
|
||||||
|
ToolCallMessage,
|
||||||
|
ToolCallDelta,
|
||||||
|
ChatCompletionComplete,
|
||||||
|
ChatCompletionChoice,
|
||||||
|
ChatCompletionDelta,
|
||||||
|
ChatCompletionChunk,
|
||||||
|
ChatCompletionLogprob,
|
||||||
|
ChatCompletionLogprobs,
|
||||||
|
ChatCompletionTopLogprob,
|
||||||
|
ChatCompletion,
|
||||||
|
CompletionRequest,
|
||||||
|
CompletionComplete,
|
||||||
|
Chunk,
|
||||||
|
Completion,
|
||||||
|
CompletionFinal,
|
||||||
|
Prompt,
|
||||||
|
GenerateParameters,
|
||||||
|
PrefillToken,
|
||||||
|
Token,
|
||||||
|
GenerateResponse,
|
||||||
|
TokenizeResponse,
|
||||||
|
SimpleToken,
|
||||||
|
BestOfSequence,
|
||||||
|
Details,
|
||||||
|
FinishReason,
|
||||||
|
StreamResponse,
|
||||||
|
StreamDetails,
|
||||||
|
ErrorResponse,
|
||||||
|
GrammarType,
|
||||||
|
Usage,
|
||||||
|
DeltaToolCall,
|
||||||
|
ToolType,
|
||||||
|
Tool,
|
||||||
|
ToolCall,
|
||||||
|
Function,
|
||||||
|
FunctionDefinition,
|
||||||
|
ToolChoice,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
tags(
|
||||||
|
(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
|
||||||
|
),
|
||||||
|
info(
|
||||||
|
title = "Text Generation Inference",
|
||||||
|
license(
|
||||||
|
name = "Apache 2.0",
|
||||||
|
url = "https://www.apache.org/licenses/LICENSE-2.0"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
pub struct ApiDoc;
|
||||||
|
|
||||||
|
pub fn schema() -> ApiDoc {
|
||||||
|
ApiDoc
|
||||||
|
}
|
||||||
|
|
||||||
/// Serving method
|
/// Serving method
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub async fn run(
|
pub async fn run(
|
||||||
|
@ -1420,95 +1504,7 @@ pub async fn run(
|
||||||
messages_api_enabled: bool,
|
messages_api_enabled: bool,
|
||||||
grammar_support: bool,
|
grammar_support: bool,
|
||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
print_schema_command: bool,
|
|
||||||
) -> Result<(), WebServerError> {
|
) -> Result<(), WebServerError> {
|
||||||
// OpenAPI documentation
|
|
||||||
#[derive(OpenApi)]
|
|
||||||
#[openapi(
|
|
||||||
paths(
|
|
||||||
health,
|
|
||||||
get_model_info,
|
|
||||||
compat_generate,
|
|
||||||
generate,
|
|
||||||
generate_stream,
|
|
||||||
chat_completions,
|
|
||||||
completions,
|
|
||||||
tokenize,
|
|
||||||
metrics,
|
|
||||||
),
|
|
||||||
components(
|
|
||||||
schemas(
|
|
||||||
Info,
|
|
||||||
CompatGenerateRequest,
|
|
||||||
GenerateRequest,
|
|
||||||
GrammarType,
|
|
||||||
ChatRequest,
|
|
||||||
Message,
|
|
||||||
MessageContent,
|
|
||||||
MessageChunk,
|
|
||||||
Url,
|
|
||||||
FunctionName,
|
|
||||||
OutputMessage,
|
|
||||||
TextMessage,
|
|
||||||
ToolCallMessage,
|
|
||||||
ToolCallDelta,
|
|
||||||
ChatCompletionComplete,
|
|
||||||
ChatCompletionChoice,
|
|
||||||
ChatCompletionDelta,
|
|
||||||
ChatCompletionChunk,
|
|
||||||
ChatCompletionLogprob,
|
|
||||||
ChatCompletionLogprobs,
|
|
||||||
ChatCompletionTopLogprob,
|
|
||||||
ChatCompletion,
|
|
||||||
CompletionRequest,
|
|
||||||
CompletionComplete,
|
|
||||||
Chunk,
|
|
||||||
Completion,
|
|
||||||
CompletionFinal,
|
|
||||||
Prompt,
|
|
||||||
GenerateParameters,
|
|
||||||
PrefillToken,
|
|
||||||
Token,
|
|
||||||
GenerateResponse,
|
|
||||||
TokenizeResponse,
|
|
||||||
SimpleToken,
|
|
||||||
BestOfSequence,
|
|
||||||
Details,
|
|
||||||
FinishReason,
|
|
||||||
StreamResponse,
|
|
||||||
StreamDetails,
|
|
||||||
ErrorResponse,
|
|
||||||
GrammarType,
|
|
||||||
Usage,
|
|
||||||
DeltaToolCall,
|
|
||||||
ToolType,
|
|
||||||
Tool,
|
|
||||||
ToolCall,
|
|
||||||
Function,
|
|
||||||
FunctionDefinition,
|
|
||||||
ToolChoice,
|
|
||||||
)
|
|
||||||
),
|
|
||||||
tags(
|
|
||||||
(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
|
|
||||||
),
|
|
||||||
info(
|
|
||||||
title = "Text Generation Inference",
|
|
||||||
license(
|
|
||||||
name = "Apache 2.0",
|
|
||||||
url = "https://www.apache.org/licenses/LICENSE-2.0"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)]
|
|
||||||
struct ApiDoc;
|
|
||||||
|
|
||||||
// Create state
|
|
||||||
if print_schema_command {
|
|
||||||
let api_doc = ApiDoc::openapi();
|
|
||||||
let api_doc = serde_json::to_string_pretty(&api_doc).unwrap();
|
|
||||||
println!("{}", api_doc);
|
|
||||||
std::process::exit(0);
|
|
||||||
}
|
|
||||||
// CORS allowed origins
|
// CORS allowed origins
|
||||||
// map to go inside the option and then map to parse from String to HeaderValue
|
// map to go inside the option and then map to parse from String to HeaderValue
|
||||||
// Finally, convert to AllowOrigin
|
// Finally, convert to AllowOrigin
|
||||||
|
|
|
@ -167,22 +167,24 @@ def check_openapi(check: bool):
|
||||||
else:
|
else:
|
||||||
os.rename(tmp_filename, filename)
|
os.rename(tmp_filename, filename)
|
||||||
print("OpenAPI documentation updated.")
|
print("OpenAPI documentation updated.")
|
||||||
errors = subprocess.run(
|
p = subprocess.run(
|
||||||
[
|
[
|
||||||
"swagger-cli",
|
"redocly",
|
||||||
# allow for trailing whitespace since it's not significant
|
# allow for trailing whitespace since it's not significant
|
||||||
# and the precommit hook will remove it
|
# and the precommit hook will remove it
|
||||||
"validate",
|
"lint",
|
||||||
filename,
|
filename,
|
||||||
],
|
],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
).stderr.decode("utf-8")
|
)
|
||||||
|
errors = p.stderr.decode("utf-8")
|
||||||
# The openapi specs fails on `exclusive_minimum` which is expected to be a boolean where
|
# The openapi specs fails on `exclusive_minimum` which is expected to be a boolean where
|
||||||
# utoipa outputs a value instead: https://github.com/juhaku/utoipa/issues/969
|
# utoipa outputs a value instead: https://github.com/juhaku/utoipa/issues/969
|
||||||
if not errors.startswith("Swagger schema validation failed."):
|
print(errors)
|
||||||
|
if p.returncode != 0:
|
||||||
print(errors)
|
print(errors)
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"OpenAPI documentation is invalid, `swagger-cli validate` showed some error:\n {errors}"
|
f"OpenAPI documentation is invalid, `redocly lint {filename}` showed some error:\n {errors}"
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue