diff --git a/Cargo.lock b/Cargo.lock index 944047e4..2284ef84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -785,7 +785,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "grpc-metadata" -version = "0.1.0" +version = "0.3.0" dependencies = [ "opentelemetry", "tonic", @@ -2222,7 +2222,7 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "0.2.1" +version = "0.3.0" dependencies = [ "futures", "grpc-metadata", @@ -2239,7 +2239,7 @@ dependencies = [ [[package]] name = "text-generation-launcher" -version = "0.2.1" +version = "0.3.0" dependencies = [ "clap 4.1.4", "ctrlc", @@ -2254,7 +2254,7 @@ dependencies = [ [[package]] name = "text-generation-router" -version = "0.2.1" +version = "0.3.0" dependencies = [ "async-stream", "axum", diff --git a/docs/openapi.json b/docs/openapi.json index b4ef3ba6..11e2eec0 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -11,7 +11,7 @@ "name": "Apache 2.0", "url": "https://www.apache.org/licenses/LICENSE-2.0" }, - "version": "0.2.1" + "version": "0.3.0" }, "paths": { "/generate": { @@ -38,10 +38,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/GenerateResponse" - } + "$ref": "#/components/schemas/GenerateResponse" } } } @@ -51,10 +48,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Input validation error" @@ -67,10 +61,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Request failed during generation" @@ -83,10 +74,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Model is overloaded" @@ -99,10 +87,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Incomplete generation" @@ -136,12 +121,9 @@ "200": { "description": "Generated Text", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/StreamResponse" - } + "$ref": "#/components/schemas/StreamResponse" } } } @@ -149,12 +131,9 @@ "422": { "description": "Input validation error", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Input validation error" @@ -165,12 +144,9 @@ "424": { "description": "Generation Error", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Request failed during generation" @@ -181,12 +157,9 @@ "429": { "description": "Model is overloaded", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Model is overloaded" @@ -197,12 +170,9 @@ "500": { "description": "Incomplete generation", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Incomplete generation" @@ -213,6 +183,29 @@ }, "deprecated": false } + }, + "/metrics": { + "get": { + "tags": [ + "Text Generation Inference" + ], + "summary": "Prometheus metrics scrape endpoint", + "description": "Prometheus metrics scrape endpoint", + "operationId": "metrics", + "responses": { + "200": { + "description": "Prometheus Metrics", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + } + } + }, + "deprecated": false + } } }, "components": { diff --git a/launcher/Cargo.toml b/launcher/Cargo.toml index a0d2b089..b60ba97d 100644 --- a/launcher/Cargo.toml +++ b/launcher/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-launcher" -version = "0.2.1" +version = "0.3.0" edition = "2021" authors = ["Olivier Dehaene"] description = "Text Generation Launcher" diff --git a/router/Cargo.toml b/router/Cargo.toml index 9ac500c9..156adad7 100644 --- a/router/Cargo.toml +++ b/router/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-router" -version = "0.2.1" +version = "0.3.0" edition = "2021" authors = ["Olivier Dehaene"] description = "Text Generation Webserver" diff --git a/router/client/Cargo.toml b/router/client/Cargo.toml index 7c7ed48e..06ebedcc 100644 --- a/router/client/Cargo.toml +++ b/router/client/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-client" -version = "0.2.1" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/router/grpc-metadata/Cargo.toml b/router/grpc-metadata/Cargo.toml index 311092e3..66dfde26 100644 --- a/router/grpc-metadata/Cargo.toml +++ b/router/grpc-metadata/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "grpc-metadata" -version = "0.1.0" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/server/pyproject.toml b/server/pyproject.toml index d3a8c112..4722f703 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "text-generation" -version = "0.2.1" +version = "0.3.0" description = "Text Generation Inference Python gRPC Server" authors = ["Olivier Dehaene "]