hf_text-generation-inference/proto/generate.proto

83 lines
1.8 KiB
Protocol Buffer
Raw Normal View History

2022-10-08 04:30:12 -06:00
syntax = "proto3";
package generate.v1;
service TextGeneration {
/// Service discovery
rpc ServiceDiscovery(Empty) returns (ServiceDiscoveryResponse) {}
/// Empties batch cache
rpc ClearCache(Empty) returns (Empty);
/// Generate tokens for a batch without cache
rpc Generate(Batch) returns (Response);
/// Generate tokens for a batch with cache
rpc GenerateWithCache(BatchCached) returns (Response);
}
message ServiceDiscoveryResponse {
repeated string urls = 1;
}
message LogitsWarperParameters {
float temperature = 1;
uint32 top_k = 2;
float top_p = 3;
bool do_sample = 4;
}
message Request {
/// Request ID
uint64 id = 1;
/// The generation context
string inputs = 2;
/// Logits Warper Parameters
LogitsWarperParameters parameters = 3;
/// Stopping criteria
uint32 max_new_tokens = 4;
}
message Batch {
/// Batch ID
uint64 id = 1;
/// Individual requests
repeated Request requests = 2;
}
message BatchCached {
/// Batch ID
uint64 id = 1;
/// Request ids within cache
repeated uint64 request_ids = 2;
/// Cache IDs
repeated uint64 batch_cached_ids = 3;
/// Batch size (sum of all batch sizes)
uint32 total_batch_size = 4;
/// Max sequence length
uint32 max_sequence_length = 5;
}
message FinishedGeneration {
/// ID of the original request
uint64 id = 1;
/// Output
string output = 2;
}
message CacheEntry {
/// Cache ID; same as batch ID
uint64 id = 1;
/// Requests present in cache entry
repeated uint64 request_ids = 2;
/// Sequence length
uint32 sequence_length = 3;
}
message Response {
/// Finished requests (optional)
repeated FinishedGeneration finished = 1;
/// Cache entry (optional)
optional CacheEntry cache_entry = 2;
}
// Represent an empty message.
message Empty {}