syntax = "proto3";

package generate.v1;

service TextGeneration {
    /// Service discovery
    rpc ServiceDiscovery(Empty) returns (ServiceDiscoveryResponse) {}
    /// Empties batch cache
    rpc ClearCache(Empty) returns (Empty);
    /// Generate tokens for a batch without cache
    rpc Generate(Batch) returns (Response);
    /// Generate tokens for a batch with cache
    rpc GenerateWithCache(BatchCached) returns (Response);
}

message ServiceDiscoveryResponse {
    repeated string urls = 1;
}

message LogitsWarperParameters {
    float temperature = 1;
    uint32 top_k = 2;
    float top_p = 3;
    bool do_sample = 4;
}

message Request {
    /// Request ID
    uint64 id = 1;
    /// The generation context
    string inputs = 2;
    /// Logits Warper Parameters
    LogitsWarperParameters parameters = 3;
    /// Stopping criteria
    uint32 max_new_tokens = 4;
}

message Batch {
    /// Batch ID
    uint64 id = 1;
    /// Individual requests
    repeated Request requests = 2;
}

message BatchCached {
    /// Batch ID
    uint64 id = 1;
    /// Request ids within cache
    repeated uint64 request_ids = 2;
    /// Cache IDs
    repeated uint64 batch_cached_ids = 3;
    /// Batch size (sum of all batch sizes)
    uint32 total_batch_size = 4;
    /// Max sequence length
    uint32 max_sequence_length = 5;
}

message FinishedGeneration {
    /// ID of the original request
    uint64 id = 1;
    /// Output
    string output = 2;
}

message CacheEntry {
    /// Cache ID; same as batch ID
    uint64 id = 1;
    /// Requests present in cache entry
    repeated uint64 request_ids = 2;
    /// Sequence length
    uint32 sequence_length = 3;
}

message Response {
    /// Finished requests (optional)
    repeated FinishedGeneration finished = 1;
    /// Cache entry (optional)
    optional CacheEntry cache_entry = 2;
}


// Represent an empty message.
message Empty {}