83 lines
1.8 KiB
Protocol Buffer
83 lines
1.8 KiB
Protocol Buffer
|
syntax = "proto3";
|
||
|
|
||
|
package generate.v1;
|
||
|
|
||
|
service TextGeneration {
|
||
|
/// Service discovery
|
||
|
rpc ServiceDiscovery(Empty) returns (ServiceDiscoveryResponse) {}
|
||
|
/// Empties batch cache
|
||
|
rpc ClearCache(Empty) returns (Empty);
|
||
|
/// Generate tokens for a batch without cache
|
||
|
rpc Generate(Batch) returns (Response);
|
||
|
/// Generate tokens for a batch with cache
|
||
|
rpc GenerateWithCache(BatchCached) returns (Response);
|
||
|
}
|
||
|
|
||
|
message ServiceDiscoveryResponse {
|
||
|
repeated string urls = 1;
|
||
|
}
|
||
|
|
||
|
message LogitsWarperParameters {
|
||
|
float temperature = 1;
|
||
|
uint32 top_k = 2;
|
||
|
float top_p = 3;
|
||
|
bool do_sample = 4;
|
||
|
}
|
||
|
|
||
|
message Request {
|
||
|
/// Request ID
|
||
|
uint64 id = 1;
|
||
|
/// The generation context
|
||
|
string inputs = 2;
|
||
|
/// Logits Warper Parameters
|
||
|
LogitsWarperParameters parameters = 3;
|
||
|
/// Stopping criteria
|
||
|
uint32 max_new_tokens = 4;
|
||
|
}
|
||
|
|
||
|
message Batch {
|
||
|
/// Batch ID
|
||
|
uint64 id = 1;
|
||
|
/// Individual requests
|
||
|
repeated Request requests = 2;
|
||
|
}
|
||
|
|
||
|
message BatchCached {
|
||
|
/// Batch ID
|
||
|
uint64 id = 1;
|
||
|
/// Request ids within cache
|
||
|
repeated uint64 request_ids = 2;
|
||
|
/// Cache IDs
|
||
|
repeated uint64 batch_cached_ids = 3;
|
||
|
/// Batch size (sum of all batch sizes)
|
||
|
uint32 total_batch_size = 4;
|
||
|
/// Max sequence length
|
||
|
uint32 max_sequence_length = 5;
|
||
|
}
|
||
|
|
||
|
message FinishedGeneration {
|
||
|
/// ID of the original request
|
||
|
uint64 id = 1;
|
||
|
/// Output
|
||
|
string output = 2;
|
||
|
}
|
||
|
|
||
|
message CacheEntry {
|
||
|
/// Cache ID; same as batch ID
|
||
|
uint64 id = 1;
|
||
|
/// Requests present in cache entry
|
||
|
repeated uint64 request_ids = 2;
|
||
|
/// Sequence length
|
||
|
uint32 sequence_length = 3;
|
||
|
}
|
||
|
|
||
|
message Response {
|
||
|
/// Finished requests (optional)
|
||
|
repeated FinishedGeneration finished = 1;
|
||
|
/// Cache entry (optional)
|
||
|
optional CacheEntry cache_entry = 2;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Represent an empty message.
|
||
|
message Empty {}
|