2023-02-03 04:43:37 -07:00
|
|
|
{
|
|
|
|
"openapi": "3.0.3",
|
|
|
|
"info": {
|
|
|
|
"title": "Text Generation Inference",
|
|
|
|
"description": "Text Generation Webserver",
|
|
|
|
"contact": {
|
|
|
|
"name": "Olivier Dehaene",
|
2023-02-07 07:40:25 -07:00
|
|
|
"email": "olivier@huggingface.co"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"license": {
|
|
|
|
"name": "Apache 2.0",
|
|
|
|
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
|
|
|
},
|
2023-02-16 09:28:29 -07:00
|
|
|
"version": "0.3.0"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"paths": {
|
|
|
|
"/generate": {
|
|
|
|
"post": {
|
|
|
|
"tags": [
|
|
|
|
"Text Generation Inference"
|
|
|
|
],
|
|
|
|
"summary": "Generate tokens",
|
|
|
|
"description": "Generate tokens",
|
|
|
|
"operationId": "generate",
|
|
|
|
"requestBody": {
|
|
|
|
"content": {
|
|
|
|
"application/json": {
|
|
|
|
"schema": {
|
|
|
|
"$ref": "#/components/schemas/GenerateRequest"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": true
|
|
|
|
},
|
|
|
|
"responses": {
|
|
|
|
"200": {
|
|
|
|
"description": "Generated Text",
|
|
|
|
"content": {
|
|
|
|
"application/json": {
|
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/GenerateResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"422": {
|
|
|
|
"description": "Input validation error",
|
|
|
|
"content": {
|
|
|
|
"application/json": {
|
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"example": {
|
|
|
|
"error": "Input validation error"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"424": {
|
|
|
|
"description": "Generation Error",
|
|
|
|
"content": {
|
|
|
|
"application/json": {
|
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"example": {
|
|
|
|
"error": "Request failed during generation"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"429": {
|
|
|
|
"description": "Model is overloaded",
|
|
|
|
"content": {
|
|
|
|
"application/json": {
|
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"example": {
|
|
|
|
"error": "Model is overloaded"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"500": {
|
|
|
|
"description": "Incomplete generation",
|
|
|
|
"content": {
|
|
|
|
"application/json": {
|
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"example": {
|
|
|
|
"error": "Incomplete generation"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"deprecated": false
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"/generate_stream": {
|
|
|
|
"post": {
|
|
|
|
"tags": [
|
|
|
|
"Text Generation Inference"
|
|
|
|
],
|
2023-02-08 14:30:11 -07:00
|
|
|
"summary": "Generate a stream of token using Server-Sent Events",
|
|
|
|
"description": "Generate a stream of token using Server-Sent Events",
|
2023-02-03 04:43:37 -07:00
|
|
|
"operationId": "generate_stream",
|
|
|
|
"requestBody": {
|
|
|
|
"content": {
|
|
|
|
"application/json": {
|
|
|
|
"schema": {
|
|
|
|
"$ref": "#/components/schemas/GenerateRequest"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": true
|
|
|
|
},
|
|
|
|
"responses": {
|
|
|
|
"200": {
|
|
|
|
"description": "Generated Text",
|
|
|
|
"content": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"text/event-stream": {
|
2023-02-03 04:43:37 -07:00
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/StreamResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"422": {
|
|
|
|
"description": "Input validation error",
|
|
|
|
"content": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"text/event-stream": {
|
2023-02-03 04:43:37 -07:00
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"example": {
|
|
|
|
"error": "Input validation error"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"424": {
|
|
|
|
"description": "Generation Error",
|
|
|
|
"content": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"text/event-stream": {
|
2023-02-03 04:43:37 -07:00
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"example": {
|
|
|
|
"error": "Request failed during generation"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"429": {
|
|
|
|
"description": "Model is overloaded",
|
|
|
|
"content": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"text/event-stream": {
|
2023-02-03 04:43:37 -07:00
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"example": {
|
|
|
|
"error": "Model is overloaded"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"500": {
|
|
|
|
"description": "Incomplete generation",
|
|
|
|
"content": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"text/event-stream": {
|
2023-02-03 04:43:37 -07:00
|
|
|
"schema": {
|
2023-02-16 09:28:29 -07:00
|
|
|
"$ref": "#/components/schemas/ErrorResponse"
|
2023-02-03 04:43:37 -07:00
|
|
|
},
|
|
|
|
"example": {
|
|
|
|
"error": "Incomplete generation"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"deprecated": false
|
|
|
|
}
|
2023-02-16 09:28:29 -07:00
|
|
|
},
|
|
|
|
"/metrics": {
|
|
|
|
"get": {
|
|
|
|
"tags": [
|
|
|
|
"Text Generation Inference"
|
|
|
|
],
|
|
|
|
"summary": "Prometheus metrics scrape endpoint",
|
|
|
|
"description": "Prometheus metrics scrape endpoint",
|
|
|
|
"operationId": "metrics",
|
|
|
|
"responses": {
|
|
|
|
"200": {
|
|
|
|
"description": "Prometheus Metrics",
|
|
|
|
"content": {
|
|
|
|
"text/plain": {
|
|
|
|
"schema": {
|
|
|
|
"type": "string"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"deprecated": false
|
|
|
|
}
|
2023-02-03 04:43:37 -07:00
|
|
|
}
|
|
|
|
},
|
|
|
|
"components": {
|
|
|
|
"schemas": {
|
|
|
|
"Details": {
|
|
|
|
"type": "object",
|
|
|
|
"required": [
|
|
|
|
"finish_reason",
|
|
|
|
"generated_tokens"
|
|
|
|
],
|
|
|
|
"properties": {
|
|
|
|
"finish_reason": {
|
|
|
|
"$ref": "#/components/schemas/FinishReason"
|
|
|
|
},
|
|
|
|
"generated_tokens": {
|
|
|
|
"type": "integer",
|
|
|
|
"format": "int32",
|
|
|
|
"example": 1
|
|
|
|
},
|
|
|
|
"prefill": {
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"$ref": "#/components/schemas/Token"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"seed": {
|
|
|
|
"type": "integer",
|
|
|
|
"format": "int64",
|
|
|
|
"example": 42
|
|
|
|
},
|
|
|
|
"tokens": {
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"$ref": "#/components/schemas/Token"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"ErrorResponse": {
|
|
|
|
"type": "object",
|
|
|
|
"required": [
|
|
|
|
"error"
|
|
|
|
],
|
|
|
|
"properties": {
|
|
|
|
"error": {
|
|
|
|
"type": "string"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"FinishReason": {
|
|
|
|
"type": "string",
|
|
|
|
"enum": [
|
|
|
|
"length",
|
|
|
|
"eos_token",
|
|
|
|
"stop_sequence"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"GenerateParameters": {
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"details": {
|
|
|
|
"type": "boolean",
|
|
|
|
"default": "true"
|
|
|
|
},
|
|
|
|
"do_sample": {
|
|
|
|
"type": "boolean",
|
|
|
|
"default": "false",
|
|
|
|
"example": true
|
|
|
|
},
|
|
|
|
"max_new_tokens": {
|
|
|
|
"type": "integer",
|
|
|
|
"format": "int32",
|
|
|
|
"default": "20",
|
|
|
|
"exclusiveMaximum": 512.0,
|
|
|
|
"exclusiveMinimum": 0.0
|
|
|
|
},
|
|
|
|
"repetition_penalty": {
|
|
|
|
"type": "number",
|
|
|
|
"format": "float",
|
|
|
|
"default": "null",
|
|
|
|
"example": 1.03,
|
|
|
|
"nullable": true,
|
|
|
|
"exclusiveMinimum": 0.0
|
|
|
|
},
|
|
|
|
"seed": {
|
|
|
|
"type": "integer",
|
|
|
|
"format": "int64"
|
|
|
|
},
|
|
|
|
"stop": {
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "string"
|
|
|
|
},
|
|
|
|
"example": [
|
|
|
|
"photographer"
|
|
|
|
],
|
|
|
|
"maxItems": 4
|
|
|
|
},
|
|
|
|
"temperature": {
|
|
|
|
"type": "number",
|
|
|
|
"format": "float",
|
|
|
|
"default": "null",
|
|
|
|
"example": 0.5,
|
|
|
|
"nullable": true,
|
|
|
|
"exclusiveMinimum": 0.0
|
|
|
|
},
|
|
|
|
"top_k": {
|
|
|
|
"type": "integer",
|
|
|
|
"format": "int32",
|
|
|
|
"default": "null",
|
|
|
|
"example": 10,
|
|
|
|
"nullable": true,
|
|
|
|
"exclusiveMinimum": 0.0
|
|
|
|
},
|
|
|
|
"top_p": {
|
|
|
|
"type": "number",
|
|
|
|
"format": "float",
|
|
|
|
"default": "null",
|
|
|
|
"example": 0.95,
|
|
|
|
"nullable": true,
|
|
|
|
"maximum": 1.0,
|
|
|
|
"exclusiveMinimum": 0.0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"GenerateRequest": {
|
|
|
|
"type": "object",
|
|
|
|
"required": [
|
|
|
|
"inputs"
|
|
|
|
],
|
|
|
|
"properties": {
|
|
|
|
"inputs": {
|
|
|
|
"type": "string",
|
|
|
|
"example": "My name is Olivier and I"
|
|
|
|
},
|
|
|
|
"parameters": {
|
|
|
|
"$ref": "#/components/schemas/GenerateParameters"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"GenerateResponse": {
|
|
|
|
"type": "object",
|
|
|
|
"required": [
|
|
|
|
"generated_text"
|
|
|
|
],
|
|
|
|
"properties": {
|
|
|
|
"details": {
|
|
|
|
"$ref": "#/components/schemas/Details"
|
|
|
|
},
|
|
|
|
"generated_text": {
|
|
|
|
"type": "string",
|
|
|
|
"example": "test"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"StreamDetails": {
|
|
|
|
"type": "object",
|
|
|
|
"required": [
|
|
|
|
"finish_reason",
|
|
|
|
"generated_tokens"
|
|
|
|
],
|
|
|
|
"properties": {
|
|
|
|
"finish_reason": {
|
|
|
|
"$ref": "#/components/schemas/FinishReason"
|
|
|
|
},
|
|
|
|
"generated_tokens": {
|
|
|
|
"type": "integer",
|
|
|
|
"format": "int32",
|
|
|
|
"example": 1
|
|
|
|
},
|
|
|
|
"seed": {
|
|
|
|
"type": "integer",
|
|
|
|
"format": "int64",
|
|
|
|
"example": 42
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"StreamResponse": {
|
|
|
|
"type": "object",
|
|
|
|
"required": [
|
|
|
|
"token"
|
|
|
|
],
|
|
|
|
"properties": {
|
|
|
|
"details": {
|
|
|
|
"$ref": "#/components/schemas/StreamDetails"
|
|
|
|
},
|
|
|
|
"generated_text": {
|
|
|
|
"type": "string",
|
|
|
|
"default": "null",
|
|
|
|
"example": "test",
|
|
|
|
"nullable": true
|
|
|
|
},
|
|
|
|
"token": {
|
|
|
|
"$ref": "#/components/schemas/Token"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"Token": {
|
|
|
|
"type": "object",
|
|
|
|
"required": [
|
|
|
|
"id",
|
|
|
|
"text",
|
|
|
|
"logprob"
|
|
|
|
],
|
|
|
|
"properties": {
|
|
|
|
"id": {
|
|
|
|
"type": "integer",
|
|
|
|
"format": "int32",
|
|
|
|
"example": 0
|
|
|
|
},
|
|
|
|
"logprob": {
|
|
|
|
"type": "number",
|
|
|
|
"format": "float",
|
|
|
|
"example": -0.34,
|
|
|
|
"nullable": true
|
|
|
|
},
|
|
|
|
"text": {
|
|
|
|
"type": "string",
|
|
|
|
"example": "test"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"tags": [
|
|
|
|
{
|
|
|
|
"name": "Text Generation Inference",
|
|
|
|
"description": "Hugging Face Text Generation Inference API"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|