hf_text-generation-inference/router/client/src/lib.rs

//! Text Generation gRPC client library

mod client;
#[allow(clippy::derive_partial_eq_without_eq)]
mod pb;
mod sharded_client;

pub use client::Client;
pub use pb::generate::v2::HealthResponse;
pub use pb::generate::v2::InfoResponse as ShardInfo;
pub use pb::generate::v2::{
    Batch, CachedBatch, FinishReason, GeneratedText, Generation, GrammarType,
    NextTokenChooserParameters, Request, StoppingCriteriaParameters, Tokens,
};
pub use sharded_client::ShardedClient;
use thiserror::Error;
use tonic::transport;
use tonic::Status;

#[derive(Error, Debug, Clone)]
pub enum ClientError {
    #[error("Could not connect to Text Generation server: {0}")]
    Connection(String),
    #[error("Server error: {0}")]
    Generation(String),
    #[error("Sharded results are empty")]
    EmptyResults,
}

impl From<Status> for ClientError {
    fn from(err: Status) -> Self {
        let err = Self::Generation(err.message().to_string());
        tracing::error!("{err}");
        err
    }
}

impl From<transport::Error> for ClientError {
    fn from(err: transport::Error) -> Self {
        let err = Self::Connection(err.to_string());
        tracing::error!("{err}");
        err
    }
}

pub type Result<T> = std::result::Result<T, ClientError>;
v0.1.0 2022-10-18 07:19:03 -06:00			`//! Text Generation gRPC client library`
Init 2022-10-08 04:30:12 -06:00
			`mod client;`
v0.1.0 2022-10-18 07:19:03 -06:00			`#[allow(clippy::derive_partial_eq_without_eq)]`
Init 2022-10-08 04:30:12 -06:00			`mod pb;`
			`mod sharded_client;`

			`pub use client::Client;`
Speculative (#1308) 2023-12-11 04:46:30 -07:00			`pub use pb::generate::v2::HealthResponse;`
			`pub use pb::generate::v2::InfoResponse as ShardInfo;`
			`pub use pb::generate::v2::{`
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00			`Batch, CachedBatch, FinishReason, GeneratedText, Generation, GrammarType,`
			`NextTokenChooserParameters, Request, StoppingCriteriaParameters, Tokens,`
feat: Support stop sequences (#7) 2022-12-12 10:25:22 -07:00			`};`
Init 2022-10-08 04:30:12 -06:00			`pub use sharded_client::ShardedClient;`
			`use thiserror::Error;`
v0.1.0 2022-10-18 07:19:03 -06:00			`use tonic::transport;`
Init 2022-10-08 04:30:12 -06:00			`use tonic::Status;`

			`#[derive(Error, Debug, Clone)]`
feat: Improve error handling 2022-10-17 06:59:00 -06:00			`pub enum ClientError {`
feat: add distributed tracing (#62) 2023-02-13 05:02:45 -07:00			`#[error("Could not connect to Text Generation server: {0}")]`
feat: Improve error handling 2022-10-17 06:59:00 -06:00			`Connection(String),`
feat: add distributed tracing (#62) 2023-02-13 05:02:45 -07:00			`#[error("Server error: {0}")]`
feat: Improve error handling 2022-10-17 06:59:00 -06:00			`Generation(String),`
feat(server): shard token decode (#303) 2023-05-10 07:48:21 -06:00			`#[error("Sharded results are empty")]`
			`EmptyResults,`
Init 2022-10-08 04:30:12 -06:00			`}`

			`impl From<Status> for ClientError {`
			`fn from(err: Status) -> Self {`
feat: add distributed tracing (#62) 2023-02-13 05:02:45 -07:00			`let err = Self::Generation(err.message().to_string());`
			`tracing::error!("{err}");`
			`err`
feat: Improve error handling 2022-10-17 06:59:00 -06:00			`}`
			`}`

			`impl From<transport::Error> for ClientError {`
			`fn from(err: transport::Error) -> Self {`
feat: add distributed tracing (#62) 2023-02-13 05:02:45 -07:00			`let err = Self::Connection(err.to_string());`
			`tracing::error!("{err}");`
			`err`
Init 2022-10-08 04:30:12 -06:00			`}`
			`}`

			`pub type Result<T> = std::result::Result<T, ClientError>;`