1474 lines
39 KiB
JSON
1474 lines
39 KiB
JSON
{
|
|
"openapi": "3.0.3",
|
|
"info": {
|
|
"title": "Text Generation Inference",
|
|
"description": "Text Generation Webserver",
|
|
"contact": {
|
|
"name": "Olivier Dehaene"
|
|
},
|
|
"license": {
|
|
"name": "Apache 2.0",
|
|
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
|
},
|
|
"version": "1.4.3"
|
|
},
|
|
"paths": {
|
|
"/": {
|
|
"post": {
|
|
"tags": [
|
|
"Text Generation Inference"
|
|
],
|
|
"summary": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
|
|
"description": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
|
|
"operationId": "compat_generate",
|
|
"requestBody": {
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/CompatGenerateRequest"
|
|
}
|
|
}
|
|
},
|
|
"required": true
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Generated Text",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/GenerateResponse"
|
|
}
|
|
},
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/StreamResponse"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"422": {
|
|
"description": "Input validation error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Input validation error"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"424": {
|
|
"description": "Generation Error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Request failed during generation"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Model is overloaded",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Model is overloaded"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Incomplete generation",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Incomplete generation"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/generate": {
|
|
"post": {
|
|
"tags": [
|
|
"Text Generation Inference"
|
|
],
|
|
"summary": "Generate tokens",
|
|
"description": "Generate tokens",
|
|
"operationId": "generate",
|
|
"requestBody": {
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/GenerateRequest"
|
|
}
|
|
}
|
|
},
|
|
"required": true
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Generated Text",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/GenerateResponse"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"422": {
|
|
"description": "Input validation error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Input validation error"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"424": {
|
|
"description": "Generation Error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Request failed during generation"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Model is overloaded",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Model is overloaded"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Incomplete generation",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Incomplete generation"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/generate_stream": {
|
|
"post": {
|
|
"tags": [
|
|
"Text Generation Inference"
|
|
],
|
|
"summary": "Generate a stream of token using Server-Sent Events",
|
|
"description": "Generate a stream of token using Server-Sent Events",
|
|
"operationId": "generate_stream",
|
|
"requestBody": {
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/GenerateRequest"
|
|
}
|
|
}
|
|
},
|
|
"required": true
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Generated Text",
|
|
"content": {
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/StreamResponse"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"422": {
|
|
"description": "Input validation error",
|
|
"content": {
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Input validation error"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"424": {
|
|
"description": "Generation Error",
|
|
"content": {
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Request failed during generation"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Model is overloaded",
|
|
"content": {
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Model is overloaded"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Incomplete generation",
|
|
"content": {
|
|
"text/event-stream": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Incomplete generation"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/health": {
|
|
"get": {
|
|
"tags": [
|
|
"Text Generation Inference"
|
|
],
|
|
"summary": "Health check method",
|
|
"description": "Health check method",
|
|
"operationId": "health",
|
|
"responses": {
|
|
"200": {
|
|
"description": "Everything is working fine"
|
|
},
|
|
"503": {
|
|
"description": "Text generation inference is down",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "unhealthy",
|
|
"error_type": "healthcheck"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/info": {
|
|
"get": {
|
|
"tags": [
|
|
"Text Generation Inference"
|
|
],
|
|
"summary": "Text Generation Inference endpoint info",
|
|
"description": "Text Generation Inference endpoint info",
|
|
"operationId": "get_model_info",
|
|
"responses": {
|
|
"200": {
|
|
"description": "Served model info",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/Info"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/metrics": {
|
|
"get": {
|
|
"tags": [
|
|
"Text Generation Inference"
|
|
],
|
|
"summary": "Prometheus metrics scrape endpoint",
|
|
"description": "Prometheus metrics scrape endpoint",
|
|
"operationId": "metrics",
|
|
"responses": {
|
|
"200": {
|
|
"description": "Prometheus Metrics",
|
|
"content": {
|
|
"text/plain": {
|
|
"schema": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/tokenize": {
|
|
"post": {
|
|
"tags": [
|
|
"Text Generation Inference"
|
|
],
|
|
"summary": "Tokenize inputs",
|
|
"description": "Tokenize inputs",
|
|
"operationId": "tokenize",
|
|
"requestBody": {
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/GenerateRequest"
|
|
}
|
|
}
|
|
},
|
|
"required": true
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Tokenized ids",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/TokenizeResponse"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"404": {
|
|
"description": "No tokenizer found",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "No fast tokenizer available"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/v1/chat/completions": {
|
|
"post": {
|
|
"tags": [
|
|
"Text Generation Inference"
|
|
],
|
|
"summary": "Generate tokens",
|
|
"description": "Generate tokens",
|
|
"operationId": "chat_completions",
|
|
"requestBody": {
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ChatRequest"
|
|
}
|
|
}
|
|
},
|
|
"required": true
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Generated Text",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ChatCompletionChunk"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"422": {
|
|
"description": "Input validation error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Input validation error"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"424": {
|
|
"description": "Generation Error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Request failed during generation"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Model is overloaded",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Model is overloaded"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Incomplete generation",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
|
},
|
|
"example": {
|
|
"error": "Incomplete generation"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"components": {
|
|
"schemas": {
|
|
"BestOfSequence": {
|
|
"type": "object",
|
|
"required": [
|
|
"generated_text",
|
|
"finish_reason",
|
|
"generated_tokens",
|
|
"prefill",
|
|
"tokens"
|
|
],
|
|
"properties": {
|
|
"finish_reason": {
|
|
"$ref": "#/components/schemas/FinishReason"
|
|
},
|
|
"generated_text": {
|
|
"type": "string",
|
|
"example": "test"
|
|
},
|
|
"generated_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"example": 1,
|
|
"minimum": 0
|
|
},
|
|
"prefill": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/PrefillToken"
|
|
}
|
|
},
|
|
"seed": {
|
|
"type": "integer",
|
|
"format": "int64",
|
|
"example": 42,
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"tokens": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/Token"
|
|
}
|
|
},
|
|
"top_tokens": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/Token"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"ChatCompletion": {
|
|
"type": "object",
|
|
"required": [
|
|
"id",
|
|
"object",
|
|
"created",
|
|
"model",
|
|
"system_fingerprint",
|
|
"choices",
|
|
"usage"
|
|
],
|
|
"properties": {
|
|
"choices": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/ChatCompletionComplete"
|
|
}
|
|
},
|
|
"created": {
|
|
"type": "integer",
|
|
"format": "int64",
|
|
"example": "1706270835",
|
|
"minimum": 0
|
|
},
|
|
"id": {
|
|
"type": "string"
|
|
},
|
|
"model": {
|
|
"type": "string",
|
|
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
|
},
|
|
"object": {
|
|
"type": "string"
|
|
},
|
|
"system_fingerprint": {
|
|
"type": "string"
|
|
},
|
|
"usage": {
|
|
"$ref": "#/components/schemas/Usage"
|
|
}
|
|
}
|
|
},
|
|
"ChatCompletionChoice": {
|
|
"type": "object",
|
|
"required": [
|
|
"index",
|
|
"delta"
|
|
],
|
|
"properties": {
|
|
"delta": {
|
|
"$ref": "#/components/schemas/ChatCompletionDelta"
|
|
},
|
|
"finish_reason": {
|
|
"type": "string",
|
|
"nullable": true
|
|
},
|
|
"index": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"minimum": 0
|
|
},
|
|
"logprobs": {
|
|
"allOf": [
|
|
{
|
|
"$ref": "#/components/schemas/ChatCompletionLogprobs"
|
|
}
|
|
],
|
|
"nullable": true
|
|
}
|
|
}
|
|
},
|
|
"ChatCompletionChunk": {
|
|
"type": "object",
|
|
"required": [
|
|
"id",
|
|
"object",
|
|
"created",
|
|
"model",
|
|
"system_fingerprint",
|
|
"choices"
|
|
],
|
|
"properties": {
|
|
"choices": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/ChatCompletionChoice"
|
|
}
|
|
},
|
|
"created": {
|
|
"type": "integer",
|
|
"format": "int64",
|
|
"example": "1706270978",
|
|
"minimum": 0
|
|
},
|
|
"id": {
|
|
"type": "string"
|
|
},
|
|
"model": {
|
|
"type": "string",
|
|
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
|
},
|
|
"object": {
|
|
"type": "string"
|
|
},
|
|
"system_fingerprint": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
},
|
|
"ChatCompletionComplete": {
|
|
"type": "object",
|
|
"required": [
|
|
"index",
|
|
"message",
|
|
"finish_reason"
|
|
],
|
|
"properties": {
|
|
"finish_reason": {
|
|
"type": "string"
|
|
},
|
|
"index": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"minimum": 0
|
|
},
|
|
"logprobs": {
|
|
"allOf": [
|
|
{
|
|
"$ref": "#/components/schemas/ChatCompletionLogprobs"
|
|
}
|
|
],
|
|
"nullable": true
|
|
},
|
|
"message": {
|
|
"$ref": "#/components/schemas/Message"
|
|
}
|
|
}
|
|
},
|
|
"ChatCompletionDelta": {
|
|
"type": "object",
|
|
"required": [
|
|
"role",
|
|
"content"
|
|
],
|
|
"properties": {
|
|
"content": {
|
|
"type": "string",
|
|
"example": "What is Deep Learning?"
|
|
},
|
|
"role": {
|
|
"type": "string",
|
|
"example": "user"
|
|
}
|
|
}
|
|
},
|
|
"ChatCompletionLogprob": {
|
|
"type": "object",
|
|
"required": [
|
|
"token",
|
|
"logprob",
|
|
"top_logprobs"
|
|
],
|
|
"properties": {
|
|
"logprob": {
|
|
"type": "number",
|
|
"format": "float"
|
|
},
|
|
"token": {
|
|
"type": "string"
|
|
},
|
|
"top_logprobs": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/ChatCompletionTopLogprob"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"ChatCompletionLogprobs": {
|
|
"type": "object",
|
|
"required": [
|
|
"content"
|
|
],
|
|
"properties": {
|
|
"content": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/ChatCompletionLogprob"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"ChatCompletionTopLogprob": {
|
|
"type": "object",
|
|
"required": [
|
|
"token",
|
|
"logprob"
|
|
],
|
|
"properties": {
|
|
"logprob": {
|
|
"type": "number",
|
|
"format": "float"
|
|
},
|
|
"token": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
},
|
|
"ChatRequest": {
|
|
"type": "object",
|
|
"required": [
|
|
"model"
|
|
],
|
|
"properties": {
|
|
"frequency_penalty": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
|
|
"example": "1.0",
|
|
"nullable": true
|
|
},
|
|
"logit_bias": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "number",
|
|
"format": "float"
|
|
},
|
|
"description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.",
|
|
"nullable": true
|
|
},
|
|
"logprobs": {
|
|
"type": "boolean",
|
|
"description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.",
|
|
"example": "false",
|
|
"nullable": true
|
|
},
|
|
"max_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
|
"example": "32",
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"messages": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/Message"
|
|
},
|
|
"description": "A list of messages comprising the conversation so far."
|
|
},
|
|
"model": {
|
|
"type": "string",
|
|
"description": "UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
|
|
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
|
},
|
|
"n": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
|
|
"example": "2",
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"presence_penalty": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
|
|
"example": 0.1,
|
|
"nullable": true
|
|
},
|
|
"seed": {
|
|
"type": "integer",
|
|
"format": "int64",
|
|
"example": 42,
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"stream": {
|
|
"type": "boolean"
|
|
},
|
|
"temperature": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.",
|
|
"example": 1.0,
|
|
"nullable": true
|
|
},
|
|
"top_logprobs": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
|
|
"example": "5",
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"top_p": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
|
|
"example": 0.95,
|
|
"nullable": true
|
|
}
|
|
}
|
|
},
|
|
"CompatGenerateRequest": {
|
|
"type": "object",
|
|
"required": [
|
|
"inputs"
|
|
],
|
|
"properties": {
|
|
"inputs": {
|
|
"type": "string",
|
|
"example": "My name is Olivier and I"
|
|
},
|
|
"parameters": {
|
|
"$ref": "#/components/schemas/GenerateParameters"
|
|
},
|
|
"stream": {
|
|
"type": "boolean",
|
|
"default": "false"
|
|
}
|
|
}
|
|
},
|
|
"Details": {
|
|
"type": "object",
|
|
"required": [
|
|
"finish_reason",
|
|
"generated_tokens",
|
|
"prefill",
|
|
"tokens"
|
|
],
|
|
"properties": {
|
|
"best_of_sequences": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/BestOfSequence"
|
|
},
|
|
"nullable": true
|
|
},
|
|
"finish_reason": {
|
|
"$ref": "#/components/schemas/FinishReason"
|
|
},
|
|
"generated_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"example": 1,
|
|
"minimum": 0
|
|
},
|
|
"prefill": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/PrefillToken"
|
|
}
|
|
},
|
|
"seed": {
|
|
"type": "integer",
|
|
"format": "int64",
|
|
"example": 42,
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"tokens": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/Token"
|
|
}
|
|
},
|
|
"top_tokens": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/Token"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"ErrorResponse": {
|
|
"type": "object",
|
|
"required": [
|
|
"error",
|
|
"error_type"
|
|
],
|
|
"properties": {
|
|
"error": {
|
|
"type": "string"
|
|
},
|
|
"error_type": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
},
|
|
"FinishReason": {
|
|
"type": "string",
|
|
"enum": [
|
|
"length",
|
|
"eos_token",
|
|
"stop_sequence"
|
|
],
|
|
"example": "Length"
|
|
},
|
|
"GenerateParameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"best_of": {
|
|
"type": "integer",
|
|
"default": "null",
|
|
"example": 1,
|
|
"nullable": true,
|
|
"minimum": 0,
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"decoder_input_details": {
|
|
"type": "boolean",
|
|
"default": "true"
|
|
},
|
|
"details": {
|
|
"type": "boolean",
|
|
"default": "true"
|
|
},
|
|
"do_sample": {
|
|
"type": "boolean",
|
|
"default": "false",
|
|
"example": true
|
|
},
|
|
"frequency_penalty": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"default": "null",
|
|
"example": 0.1,
|
|
"nullable": true,
|
|
"exclusiveMinimum": -2
|
|
},
|
|
"grammar": {
|
|
"allOf": [
|
|
{
|
|
"$ref": "#/components/schemas/GrammarType"
|
|
}
|
|
],
|
|
"nullable": true
|
|
},
|
|
"max_new_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"default": "100",
|
|
"example": "20",
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"repetition_penalty": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"default": "null",
|
|
"example": 1.03,
|
|
"nullable": true,
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"return_full_text": {
|
|
"type": "boolean",
|
|
"default": "null",
|
|
"example": false,
|
|
"nullable": true
|
|
},
|
|
"seed": {
|
|
"type": "integer",
|
|
"format": "int64",
|
|
"default": "null",
|
|
"example": "null",
|
|
"nullable": true,
|
|
"minimum": 0,
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"stop": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"example": [
|
|
"photographer"
|
|
],
|
|
"maxItems": 4
|
|
},
|
|
"temperature": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"default": "null",
|
|
"example": 0.5,
|
|
"nullable": true,
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"top_k": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"default": "null",
|
|
"example": 10,
|
|
"nullable": true,
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"top_n_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"default": "null",
|
|
"example": 5,
|
|
"nullable": true,
|
|
"minimum": 0,
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"top_p": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"default": "null",
|
|
"example": 0.95,
|
|
"nullable": true,
|
|
"maximum": 1,
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"truncate": {
|
|
"type": "integer",
|
|
"default": "null",
|
|
"example": "null",
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"typical_p": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"default": "null",
|
|
"example": 0.95,
|
|
"nullable": true,
|
|
"maximum": 1,
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"watermark": {
|
|
"type": "boolean",
|
|
"default": "false",
|
|
"example": true
|
|
}
|
|
}
|
|
},
|
|
"GenerateRequest": {
|
|
"type": "object",
|
|
"required": [
|
|
"inputs"
|
|
],
|
|
"properties": {
|
|
"inputs": {
|
|
"type": "string",
|
|
"example": "My name is Olivier and I"
|
|
},
|
|
"parameters": {
|
|
"$ref": "#/components/schemas/GenerateParameters"
|
|
}
|
|
}
|
|
},
|
|
"GenerateResponse": {
|
|
"type": "object",
|
|
"required": [
|
|
"generated_text"
|
|
],
|
|
"properties": {
|
|
"details": {
|
|
"allOf": [
|
|
{
|
|
"$ref": "#/components/schemas/Details"
|
|
}
|
|
],
|
|
"nullable": true
|
|
},
|
|
"generated_text": {
|
|
"type": "string",
|
|
"example": "test"
|
|
}
|
|
}
|
|
},
|
|
"GrammarType": {
|
|
"oneOf": [
|
|
{
|
|
"type": "object",
|
|
"required": [
|
|
"type",
|
|
"value"
|
|
],
|
|
"properties": {
|
|
"type": {
|
|
"type": "string",
|
|
"enum": [
|
|
"json"
|
|
]
|
|
},
|
|
"value": {
|
|
"description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions."
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"type": "object",
|
|
"required": [
|
|
"type",
|
|
"value"
|
|
],
|
|
"properties": {
|
|
"type": {
|
|
"type": "string",
|
|
"enum": [
|
|
"regex"
|
|
]
|
|
},
|
|
"value": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"discriminator": {
|
|
"propertyName": "type"
|
|
}
|
|
},
|
|
"Info": {
|
|
"type": "object",
|
|
"required": [
|
|
"model_id",
|
|
"model_dtype",
|
|
"model_device_type",
|
|
"max_concurrent_requests",
|
|
"max_best_of",
|
|
"max_stop_sequences",
|
|
"max_input_length",
|
|
"max_total_tokens",
|
|
"waiting_served_ratio",
|
|
"max_batch_total_tokens",
|
|
"max_waiting_tokens",
|
|
"validation_workers",
|
|
"version"
|
|
],
|
|
"properties": {
|
|
"docker_label": {
|
|
"type": "string",
|
|
"example": "null",
|
|
"nullable": true
|
|
},
|
|
"max_batch_size": {
|
|
"type": "integer",
|
|
"example": "null",
|
|
"nullable": true,
|
|
"minimum": 0
|
|
},
|
|
"max_batch_total_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"example": "32000",
|
|
"minimum": 0
|
|
},
|
|
"max_best_of": {
|
|
"type": "integer",
|
|
"example": "2",
|
|
"minimum": 0
|
|
},
|
|
"max_concurrent_requests": {
|
|
"type": "integer",
|
|
"description": "Router Parameters",
|
|
"example": "128",
|
|
"minimum": 0
|
|
},
|
|
"max_input_length": {
|
|
"type": "integer",
|
|
"example": "1024",
|
|
"minimum": 0
|
|
},
|
|
"max_stop_sequences": {
|
|
"type": "integer",
|
|
"example": "4",
|
|
"minimum": 0
|
|
},
|
|
"max_total_tokens": {
|
|
"type": "integer",
|
|
"example": "2048",
|
|
"minimum": 0
|
|
},
|
|
"max_waiting_tokens": {
|
|
"type": "integer",
|
|
"example": "20",
|
|
"minimum": 0
|
|
},
|
|
"model_device_type": {
|
|
"type": "string",
|
|
"example": "cuda"
|
|
},
|
|
"model_dtype": {
|
|
"type": "string",
|
|
"example": "torch.float16"
|
|
},
|
|
"model_id": {
|
|
"type": "string",
|
|
"description": "Model info",
|
|
"example": "bigscience/blomm-560m"
|
|
},
|
|
"model_pipeline_tag": {
|
|
"type": "string",
|
|
"example": "text-generation",
|
|
"nullable": true
|
|
},
|
|
"model_sha": {
|
|
"type": "string",
|
|
"example": "e985a63cdc139290c5f700ff1929f0b5942cced2",
|
|
"nullable": true
|
|
},
|
|
"sha": {
|
|
"type": "string",
|
|
"example": "null",
|
|
"nullable": true
|
|
},
|
|
"validation_workers": {
|
|
"type": "integer",
|
|
"example": "2",
|
|
"minimum": 0
|
|
},
|
|
"version": {
|
|
"type": "string",
|
|
"description": "Router Info",
|
|
"example": "0.5.0"
|
|
},
|
|
"waiting_served_ratio": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"example": "1.2"
|
|
}
|
|
}
|
|
},
|
|
"Message": {
|
|
"type": "object",
|
|
"required": [
|
|
"role",
|
|
"content"
|
|
],
|
|
"properties": {
|
|
"content": {
|
|
"type": "string",
|
|
"example": "My name is David and I"
|
|
},
|
|
"name": {
|
|
"type": "string",
|
|
"example": "\"David\"",
|
|
"nullable": true
|
|
},
|
|
"role": {
|
|
"type": "string",
|
|
"example": "user"
|
|
}
|
|
}
|
|
},
|
|
"PrefillToken": {
|
|
"type": "object",
|
|
"required": [
|
|
"id",
|
|
"text",
|
|
"logprob"
|
|
],
|
|
"properties": {
|
|
"id": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"example": 0,
|
|
"minimum": 0
|
|
},
|
|
"logprob": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"example": -0.34,
|
|
"nullable": true
|
|
},
|
|
"text": {
|
|
"type": "string",
|
|
"example": "test"
|
|
}
|
|
}
|
|
},
|
|
"SimpleToken": {
|
|
"type": "object",
|
|
"required": [
|
|
"id",
|
|
"text",
|
|
"start",
|
|
"stop"
|
|
],
|
|
"properties": {
|
|
"id": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"example": 0,
|
|
"minimum": 0
|
|
},
|
|
"start": {
|
|
"type": "integer",
|
|
"example": 0,
|
|
"minimum": 0
|
|
},
|
|
"stop": {
|
|
"type": "integer",
|
|
"example": 2,
|
|
"minimum": 0
|
|
},
|
|
"text": {
|
|
"type": "string",
|
|
"example": "test"
|
|
}
|
|
}
|
|
},
|
|
"StreamDetails": {
|
|
"type": "object",
|
|
"required": [
|
|
"finish_reason",
|
|
"generated_tokens"
|
|
],
|
|
"properties": {
|
|
"finish_reason": {
|
|
"$ref": "#/components/schemas/FinishReason"
|
|
},
|
|
"generated_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"example": 1,
|
|
"minimum": 0
|
|
},
|
|
"seed": {
|
|
"type": "integer",
|
|
"format": "int64",
|
|
"example": 42,
|
|
"nullable": true,
|
|
"minimum": 0
|
|
}
|
|
}
|
|
},
|
|
"StreamResponse": {
|
|
"type": "object",
|
|
"required": [
|
|
"index",
|
|
"token"
|
|
],
|
|
"properties": {
|
|
"details": {
|
|
"allOf": [
|
|
{
|
|
"$ref": "#/components/schemas/StreamDetails"
|
|
}
|
|
],
|
|
"default": "null",
|
|
"nullable": true
|
|
},
|
|
"generated_text": {
|
|
"type": "string",
|
|
"default": "null",
|
|
"example": "test",
|
|
"nullable": true
|
|
},
|
|
"index": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"minimum": 0
|
|
},
|
|
"token": {
|
|
"$ref": "#/components/schemas/Token"
|
|
},
|
|
"top_tokens": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/Token"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"Token": {
|
|
"type": "object",
|
|
"required": [
|
|
"id",
|
|
"text",
|
|
"logprob",
|
|
"special"
|
|
],
|
|
"properties": {
|
|
"id": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"example": 0,
|
|
"minimum": 0
|
|
},
|
|
"logprob": {
|
|
"type": "number",
|
|
"format": "float",
|
|
"example": -0.34,
|
|
"nullable": true
|
|
},
|
|
"special": {
|
|
"type": "boolean",
|
|
"example": "false"
|
|
},
|
|
"text": {
|
|
"type": "string",
|
|
"example": "test"
|
|
}
|
|
}
|
|
},
|
|
"TokenizeResponse": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/SimpleToken"
|
|
}
|
|
},
|
|
"Usage": {
|
|
"type": "object",
|
|
"required": [
|
|
"prompt_tokens",
|
|
"completion_tokens",
|
|
"total_tokens"
|
|
],
|
|
"properties": {
|
|
"completion_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"minimum": 0
|
|
},
|
|
"prompt_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"minimum": 0
|
|
},
|
|
"total_tokens": {
|
|
"type": "integer",
|
|
"format": "int32",
|
|
"minimum": 0
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"tags": [
|
|
{
|
|
"name": "Text Generation Inference",
|
|
"description": "Hugging Face Text Generation Inference API"
|
|
}
|
|
]
|
|
}
|