119 lines
3.0 KiB
Protocol Buffer
119 lines
3.0 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
package generate.v1;
|
|
|
|
service TextGenerationService {
|
|
/// Service discovery
|
|
rpc ServiceDiscovery (ServiceDiscoveryRequest) returns (ServiceDiscoveryResponse) {}
|
|
/// Empties batch cache
|
|
rpc ClearCache (ClearCacheRequest) returns (ClearCacheResponse);
|
|
/// Generate tokens for a batch
|
|
rpc Generate (GenerateRequest) returns (GenerateResponse);
|
|
/// Generate tokens for a list of cached batches
|
|
rpc GenerateWithCache (GenerateWithCacheRequest) returns (GenerateWithCacheResponse);
|
|
/// Generate tokens until the text of at least one request of the batch is generated
|
|
rpc GenerateUntilFinished (GenerateUntilFinishedRequest) returns (GenerateUntilFinishedResponse);
|
|
/// Generate tokens until the text of at least one request of the cached batches i finished
|
|
rpc GenerateUntilFinishedWithCache (GenerateUntilFinishedWithCacheRequest) returns (GenerateUntilFinishedWithCacheResponse);
|
|
}
|
|
|
|
/// Empty request
|
|
message ServiceDiscoveryRequest {}
|
|
|
|
message ServiceDiscoveryResponse {
|
|
/// Other shards urls
|
|
repeated string urls = 1;
|
|
}
|
|
|
|
/// Empty request
|
|
message ClearCacheRequest {}
|
|
|
|
/// Empty response
|
|
message ClearCacheResponse {}
|
|
|
|
message LogitsWarperParameters {
|
|
float temperature = 1;
|
|
uint32 top_k = 2;
|
|
float top_p = 3;
|
|
bool do_sample = 4;
|
|
}
|
|
|
|
message Request {
|
|
/// Request ID
|
|
uint64 id = 1;
|
|
/// The generation context
|
|
string inputs = 2;
|
|
/// The number of tokens inside inputs
|
|
uint32 input_length = 3;
|
|
/// Logits Warper Parameters
|
|
LogitsWarperParameters parameters = 4;
|
|
/// Stopping criteria
|
|
uint32 max_new_tokens = 5;
|
|
}
|
|
|
|
message Batch {
|
|
/// Batch ID
|
|
uint64 id = 1;
|
|
/// Individual requests
|
|
repeated Request requests = 2;
|
|
/// Batch size (==len(requests))
|
|
uint32 size = 3;
|
|
/// Length of the longest sequence within the batch (used for padding)
|
|
uint32 max_sequence_length = 4;
|
|
}
|
|
|
|
message GeneratedText {
|
|
/// Request
|
|
Request request = 1;
|
|
/// Output
|
|
string output = 2;
|
|
}
|
|
|
|
message GenerateRequest {
|
|
/// Batch
|
|
Batch batch = 1;
|
|
}
|
|
|
|
message GenerateResponse {
|
|
/// Finished requests
|
|
repeated GeneratedText generated_texts = 1;
|
|
/// Next batch (cached)
|
|
optional Batch batch = 2;
|
|
}
|
|
|
|
message GenerateWithCacheRequest {
|
|
/// Cached batches
|
|
repeated Batch batches = 1;
|
|
}
|
|
|
|
message GenerateWithCacheResponse {
|
|
/// Finished requests
|
|
repeated GeneratedText generated_texts = 1;
|
|
/// Next batch (cached)
|
|
optional Batch batch = 2;
|
|
}
|
|
|
|
message GenerateUntilFinishedRequest {
|
|
/// Batch
|
|
Batch batch = 1;
|
|
}
|
|
|
|
message GenerateUntilFinishedResponse {
|
|
/// Finished requests
|
|
repeated GeneratedText generated_texts = 1;
|
|
/// Next batch (cached)
|
|
optional Batch batch = 2;
|
|
}
|
|
|
|
message GenerateUntilFinishedWithCacheRequest {
|
|
/// Cached batches
|
|
repeated Batch batches = 1;
|
|
}
|
|
|
|
message GenerateUntilFinishedWithCacheResponse {
|
|
/// Finished requests
|
|
repeated GeneratedText generated_texts = 1;
|
|
/// Next batch (cached)
|
|
optional Batch batch = 2;
|
|
}
|