preemo_text-generation-infe.../docs/openapi.json

1027 lines
27 KiB
JSON

{
"openapi": "3.0.3",
"info": {
"title": "Text Generation Inference",
"description": "Text Generation Webserver",
"contact": {
"name": "Olivier Dehaene"
},
"license": {
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
"version": "0.9.4"
},
"paths": {
"/": {
"post": {
"tags": [
"Text Generation Inference"
],
"summary": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
"description": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
"operationId": "compat_generate",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompatGenerateRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Generated Text",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GenerateResponse"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/StreamResponse"
}
}
}
},
"422": {
"description": "Input validation error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Input validation error"
}
}
}
},
"424": {
"description": "Generation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Request failed during generation"
}
}
}
},
"429": {
"description": "Model is overloaded",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Model is overloaded"
}
}
}
},
"500": {
"description": "Incomplete generation",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Incomplete generation"
}
}
}
}
}
}
},
"/completions": {
"post": {
"tags": [
"Text Generation Inference"
],
"summary": "Completion request. Enable stream of token by setting `stream == true`",
"description": "Completion request. Enable stream of token by setting `stream == true`",
"operationId": "completions_generate",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompatCompletionRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Generated Completion",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompletionsResponse"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/CompletionsResponse"
}
}
}
},
"422": {
"description": "Input validation error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Input validation error"
}
}
}
},
"424": {
"description": "Generation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Request failed during generation"
}
}
}
},
"429": {
"description": "Model is overloaded",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Model is overloaded"
}
}
}
},
"500": {
"description": "Incomplete generation",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Incomplete generation"
}
}
}
}
}
}
},
"/chat/completions": {
"post": {
"tags": [
"Text Generation Inference"
],
"summary": "Generate tokens via Chat",
"description": "Generate tokens via Chat",
"operationId": "chatcompletions_generate",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompatChatCompletionRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Generated Completion",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ChatCompletionsResponse"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/ChatCompletionsStreamResponse"
}
}
}
},
"422": {
"description": "Input validation error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Input validation error"
}
}
}
},
"424": {
"description": "Generation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Request failed during generation"
}
}
}
},
"429": {
"description": "Model is overloaded",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Model is overloaded"
}
}
}
},
"500": {
"description": "Incomplete generation",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Incomplete generation"
}
}
}
}
}
}
},
"/generate": {
"post": {
"tags": [
"Text Generation Inference"
],
"summary": "Generate tokens",
"description": "Generate tokens",
"operationId": "generate",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GenerateRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Generated Text",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GenerateResponse"
}
}
}
},
"422": {
"description": "Input validation error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Input validation error"
}
}
}
},
"424": {
"description": "Generation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Request failed during generation"
}
}
}
},
"429": {
"description": "Model is overloaded",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Model is overloaded"
}
}
}
},
"500": {
"description": "Incomplete generation",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Incomplete generation"
}
}
}
}
}
}
},
"/generate_stream": {
"post": {
"tags": [
"Text Generation Inference"
],
"summary": "Generate a stream of token using Server-Sent Events",
"description": "Generate a stream of token using Server-Sent Events",
"operationId": "generate_stream",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GenerateRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Generated Text",
"content": {
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/StreamResponse"
}
}
}
},
"422": {
"description": "Input validation error",
"content": {
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Input validation error"
}
}
}
},
"424": {
"description": "Generation Error",
"content": {
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Request failed during generation"
}
}
}
},
"429": {
"description": "Model is overloaded",
"content": {
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Model is overloaded"
}
}
}
},
"500": {
"description": "Incomplete generation",
"content": {
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "Incomplete generation"
}
}
}
}
}
}
},
"/health": {
"get": {
"tags": [
"Text Generation Inference"
],
"summary": "Health check method",
"description": "Health check method",
"operationId": "health",
"responses": {
"200": {
"description": "Everything is working fine"
},
"503": {
"description": "Text generation inference is down",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "unhealthy",
"error_type": "healthcheck"
}
}
}
}
}
}
},
"/info": {
"get": {
"tags": [
"Text Generation Inference"
],
"summary": "Text Generation Inference endpoint info",
"description": "Text Generation Inference endpoint info",
"operationId": "get_model_info",
"responses": {
"200": {
"description": "Served model info",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Info"
}
}
}
}
}
}
},
"/metrics": {
"get": {
"tags": [
"Text Generation Inference"
],
"summary": "Prometheus metrics scrape endpoint",
"description": "Prometheus metrics scrape endpoint",
"operationId": "metrics",
"responses": {
"200": {
"description": "Prometheus Metrics",
"content": {
"text/plain": {
"schema": {
"type": "string"
}
}
}
}
}
}
}
},
"components": {
"schemas": {
"BestOfSequence": {
"type": "object",
"required": [
"generated_text",
"finish_reason",
"generated_tokens",
"prefill",
"tokens"
],
"properties": {
"finish_reason": {
"$ref": "#/components/schemas/FinishReason"
},
"generated_text": {
"type": "string",
"example": "test"
},
"generated_tokens": {
"type": "integer",
"format": "int32",
"example": 1,
"minimum": 0.0
},
"prefill": {
"type": "array",
"items": {
"$ref": "#/components/schemas/PrefillToken"
}
},
"seed": {
"type": "integer",
"format": "int64",
"example": 42,
"nullable": true,
"minimum": 0.0
},
"tokens": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Token"
}
}
}
},
"CompatGenerateRequest": {
"type": "object",
"required": [
"inputs"
],
"properties": {
"inputs": {
"type": "string",
"example": "My name is Olivier and I"
},
"parameters": {
"$ref": "#/components/schemas/GenerateParameters"
},
"stream": {
"type": "boolean",
"default": "false"
}
}
},
"Details": {
"type": "object",
"required": [
"finish_reason",
"generated_tokens",
"prefill",
"tokens"
],
"properties": {
"best_of_sequences": {
"type": "array",
"items": {
"$ref": "#/components/schemas/BestOfSequence"
},
"nullable": true
},
"finish_reason": {
"$ref": "#/components/schemas/FinishReason"
},
"generated_tokens": {
"type": "integer",
"format": "int32",
"example": 1,
"minimum": 0.0
},
"prefill": {
"type": "array",
"items": {
"$ref": "#/components/schemas/PrefillToken"
}
},
"seed": {
"type": "integer",
"format": "int64",
"example": 42,
"nullable": true,
"minimum": 0.0
},
"tokens": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Token"
}
}
}
},
"ErrorResponse": {
"type": "object",
"required": [
"error",
"error_type"
],
"properties": {
"error": {
"type": "string"
},
"error_type": {
"type": "string"
}
}
},
"FinishReason": {
"type": "string",
"enum": [
"length",
"eos_token",
"stop_sequence"
]
},
"GenerateParameters": {
"type": "object",
"properties": {
"best_of": {
"type": "integer",
"default": "null",
"example": 1,
"nullable": true,
"minimum": 0.0,
"exclusiveMinimum": 0.0
},
"decoder_input_details": {
"type": "boolean",
"default": "true"
},
"details": {
"type": "boolean",
"default": "true"
},
"do_sample": {
"type": "boolean",
"default": "false",
"example": true
},
"max_new_tokens": {
"type": "integer",
"format": "int32",
"default": "20",
"minimum": 0.0,
"exclusiveMaximum": 512.0,
"exclusiveMinimum": 0.0
},
"repetition_penalty": {
"type": "number",
"format": "float",
"default": "null",
"example": 1.03,
"nullable": true,
"exclusiveMinimum": 0.0
},
"return_full_text": {
"type": "boolean",
"default": "null",
"example": false,
"nullable": true
},
"seed": {
"type": "integer",
"format": "int64",
"default": "null",
"example": "null",
"nullable": true,
"minimum": 0.0,
"exclusiveMinimum": 0.0
},
"stop": {
"type": "array",
"items": {
"type": "string"
},
"example": [
"photographer"
],
"maxItems": 4
},
"temperature": {
"type": "number",
"format": "float",
"default": "null",
"example": 0.5,
"nullable": true,
"exclusiveMinimum": 0.0
},
"top_k": {
"type": "integer",
"format": "int32",
"default": "null",
"example": 10,
"nullable": true,
"exclusiveMinimum": 0.0
},
"top_p": {
"type": "number",
"format": "float",
"default": "null",
"example": 0.95,
"nullable": true,
"maximum": 1.0,
"exclusiveMinimum": 0.0
},
"truncate": {
"type": "integer",
"default": "null",
"example": "null",
"nullable": true,
"minimum": 0.0
},
"typical_p": {
"type": "number",
"format": "float",
"default": "null",
"example": 0.95,
"nullable": true,
"maximum": 1.0,
"exclusiveMinimum": 0.0
},
"watermark": {
"type": "boolean",
"default": "false",
"example": true
}
}
},
"GenerateRequest": {
"type": "object",
"required": [
"inputs"
],
"properties": {
"inputs": {
"type": "string",
"example": "My name is Olivier and I"
},
"parameters": {
"$ref": "#/components/schemas/GenerateParameters"
}
}
},
"GenerateResponse": {
"type": "object",
"required": [
"generated_text"
],
"properties": {
"details": {
"allOf": [
{
"$ref": "#/components/schemas/Details"
}
],
"nullable": true
},
"generated_text": {
"type": "string",
"example": "test"
}
}
},
"Info": {
"type": "object",
"required": [
"model_id",
"model_dtype",
"model_device_type",
"max_concurrent_requests",
"max_best_of",
"max_stop_sequences",
"max_input_length",
"max_total_tokens",
"waiting_served_ratio",
"max_batch_total_tokens",
"max_waiting_tokens",
"validation_workers",
"version"
],
"properties": {
"docker_label": {
"type": "string",
"example": "null",
"nullable": true
},
"max_batch_total_tokens": {
"type": "integer",
"format": "int32",
"example": "32000",
"minimum": 0.0
},
"max_best_of": {
"type": "integer",
"example": "2",
"minimum": 0.0
},
"max_concurrent_requests": {
"type": "integer",
"description": "Router Parameters",
"example": "128",
"minimum": 0.0
},
"max_input_length": {
"type": "integer",
"example": "1024",
"minimum": 0.0
},
"max_stop_sequences": {
"type": "integer",
"example": "4",
"minimum": 0.0
},
"max_total_tokens": {
"type": "integer",
"example": "2048",
"minimum": 0.0
},
"max_waiting_tokens": {
"type": "integer",
"example": "20",
"minimum": 0.0
},
"model_device_type": {
"type": "string",
"example": "cuda"
},
"model_dtype": {
"type": "string",
"example": "torch.float16"
},
"model_id": {
"type": "string",
"description": "Model info",
"example": "bigscience/blomm-560m"
},
"model_pipeline_tag": {
"type": "string",
"example": "text-generation",
"nullable": true
},
"model_sha": {
"type": "string",
"example": "e985a63cdc139290c5f700ff1929f0b5942cced2",
"nullable": true
},
"sha": {
"type": "string",
"example": "null",
"nullable": true
},
"validation_workers": {
"type": "integer",
"example": "2",
"minimum": 0.0
},
"version": {
"type": "string",
"description": "Router Info",
"example": "0.5.0"
},
"waiting_served_ratio": {
"type": "number",
"format": "float",
"example": "1.2"
}
}
},
"PrefillToken": {
"type": "object",
"required": [
"id",
"text",
"logprob"
],
"properties": {
"id": {
"type": "integer",
"format": "int32",
"example": 0,
"minimum": 0.0
},
"logprob": {
"type": "number",
"format": "float",
"example": -0.34,
"nullable": true
},
"text": {
"type": "string",
"example": "test"
}
}
},
"StreamDetails": {
"type": "object",
"required": [
"finish_reason",
"generated_tokens"
],
"properties": {
"finish_reason": {
"$ref": "#/components/schemas/FinishReason"
},
"generated_tokens": {
"type": "integer",
"format": "int32",
"example": 1,
"minimum": 0.0
},
"seed": {
"type": "integer",
"format": "int64",
"example": 42,
"nullable": true,
"minimum": 0.0
}
}
},
"StreamResponse": {
"type": "object",
"required": [
"token"
],
"properties": {
"details": {
"allOf": [
{
"$ref": "#/components/schemas/StreamDetails"
}
],
"nullable": true
},
"generated_text": {
"type": "string",
"default": "null",
"example": "test",
"nullable": true
},
"token": {
"$ref": "#/components/schemas/Token"
}
}
},
"Token": {
"type": "object",
"required": [
"id",
"text",
"logprob",
"special"
],
"properties": {
"id": {
"type": "integer",
"format": "int32",
"example": 0,
"minimum": 0.0
},
"logprob": {
"type": "number",
"format": "float",
"example": -0.34,
"nullable": true
},
"special": {
"type": "boolean",
"example": "false"
},
"text": {
"type": "string",
"example": "test"
}
}
}
}
},
"tags": [
{
"name": "Text Generation Inference",
"description": "Hugging Face Text Generation Inference API"
}
]
}