2022-10-08 04:30:12 -06:00
|
|
|
syntax = "proto3";
|
|
|
|
|
|
|
|
package generate.v1;
|
|
|
|
|
2022-10-11 08:50:54 -06:00
|
|
|
service TextGenerationService {
|
2022-10-08 04:30:12 -06:00
|
|
|
/// Service discovery
|
2022-10-11 08:50:54 -06:00
|
|
|
rpc ServiceDiscovery (ServiceDiscoveryRequest) returns (ServiceDiscoveryResponse) {}
|
2022-10-08 04:30:12 -06:00
|
|
|
/// Empties batch cache
|
2022-10-11 08:50:54 -06:00
|
|
|
rpc ClearCache (ClearCacheRequest) returns (ClearCacheResponse);
|
2023-01-31 09:04:00 -07:00
|
|
|
/// Prefill batch and decode first token
|
|
|
|
rpc Prefill (PrefillRequest) returns (PrefillResponse);
|
|
|
|
/// Decode token for a list of prefilled batches
|
|
|
|
rpc Decode (DecodeRequest) returns (DecodeResponse);
|
2022-10-08 04:30:12 -06:00
|
|
|
}
|
|
|
|
|
2022-10-11 08:50:54 -06:00
|
|
|
/// Empty request
|
|
|
|
message ServiceDiscoveryRequest {}
|
|
|
|
|
2022-10-08 04:30:12 -06:00
|
|
|
message ServiceDiscoveryResponse {
|
2022-10-11 08:50:54 -06:00
|
|
|
/// Other shards urls
|
2022-10-08 04:30:12 -06:00
|
|
|
repeated string urls = 1;
|
|
|
|
}
|
|
|
|
|
2022-10-11 08:50:54 -06:00
|
|
|
/// Empty request
|
|
|
|
message ClearCacheRequest {}
|
|
|
|
|
|
|
|
/// Empty response
|
|
|
|
message ClearCacheResponse {}
|
|
|
|
|
2022-12-15 09:03:56 -07:00
|
|
|
message NextTokenChooserParameters {
|
2022-12-12 10:25:22 -07:00
|
|
|
/// exponential scaling output probability distribution
|
2022-10-08 04:30:12 -06:00
|
|
|
float temperature = 1;
|
2022-12-12 10:25:22 -07:00
|
|
|
/// restricting to the k highest probability elements
|
2022-10-08 04:30:12 -06:00
|
|
|
uint32 top_k = 2;
|
2022-12-12 10:25:22 -07:00
|
|
|
/// restricting to top tokens summing to prob_cut_off <= prob_cut_off
|
2022-10-08 04:30:12 -06:00
|
|
|
float top_p = 3;
|
2023-03-09 03:33:57 -07:00
|
|
|
/// restricting to top tokens summing to prob_cut_off <= prob_cut_off
|
|
|
|
float typical_p = 4;
|
2022-12-12 10:25:22 -07:00
|
|
|
/// apply sampling on the logits
|
2023-03-09 03:33:57 -07:00
|
|
|
bool do_sample = 5;
|
2023-01-30 07:36:16 -07:00
|
|
|
/// random seed for sampling
|
2023-03-09 03:33:57 -07:00
|
|
|
uint64 seed = 6;
|
2023-02-01 07:58:42 -07:00
|
|
|
/// repetition penalty
|
2023-03-09 03:33:57 -07:00
|
|
|
float repetition_penalty = 7;
|
2023-03-02 04:30:41 -07:00
|
|
|
/// token watermarking using "A Watermark for Large Language Models"
|
2023-03-09 03:33:57 -07:00
|
|
|
bool watermark = 8;
|
2022-10-08 04:30:12 -06:00
|
|
|
}
|
|
|
|
|
2022-12-12 10:25:22 -07:00
|
|
|
message StoppingCriteriaParameters {
|
|
|
|
/// Maximum number of generated tokens
|
|
|
|
uint32 max_new_tokens = 1;
|
|
|
|
/// Optional stopping sequences
|
|
|
|
repeated string stop_sequences = 2;
|
|
|
|
}
|
|
|
|
|
2022-10-08 04:30:12 -06:00
|
|
|
message Request {
|
|
|
|
/// Request ID
|
|
|
|
uint64 id = 1;
|
|
|
|
/// The generation context
|
|
|
|
string inputs = 2;
|
2022-10-11 08:50:54 -06:00
|
|
|
/// The number of tokens inside inputs
|
|
|
|
uint32 input_length = 3;
|
2022-12-15 09:03:56 -07:00
|
|
|
/// Next Token Chooser Parameters
|
|
|
|
NextTokenChooserParameters parameters = 4;
|
2022-12-12 10:25:22 -07:00
|
|
|
/// Stopping Criteria Parameters
|
|
|
|
StoppingCriteriaParameters stopping_parameters = 5;
|
2022-10-08 04:30:12 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
message Batch {
|
|
|
|
/// Batch ID
|
|
|
|
uint64 id = 1;
|
|
|
|
/// Individual requests
|
|
|
|
repeated Request requests = 2;
|
2022-10-11 08:50:54 -06:00
|
|
|
/// Batch size (==len(requests))
|
|
|
|
uint32 size = 3;
|
2022-10-08 04:30:12 -06:00
|
|
|
}
|
|
|
|
|
2023-02-03 04:43:37 -07:00
|
|
|
enum FinishReason {
|
|
|
|
FINISH_REASON_LENGTH = 0;
|
|
|
|
FINISH_REASON_EOS_TOKEN = 1;
|
|
|
|
FINISH_REASON_STOP_SEQUENCE = 2;
|
|
|
|
}
|
|
|
|
|
2022-10-11 08:50:54 -06:00
|
|
|
message GeneratedText {
|
2022-10-08 04:30:12 -06:00
|
|
|
/// Output
|
2023-01-31 09:04:00 -07:00
|
|
|
string text = 1;
|
2022-11-04 07:22:47 -06:00
|
|
|
/// Number of generated tokens
|
2023-01-31 09:04:00 -07:00
|
|
|
uint32 generated_tokens = 2;
|
2022-12-12 10:25:22 -07:00
|
|
|
/// Finish reason
|
2023-02-03 04:43:37 -07:00
|
|
|
FinishReason finish_reason = 3;
|
2023-01-30 07:36:16 -07:00
|
|
|
/// Seed
|
2023-01-31 09:04:00 -07:00
|
|
|
optional uint64 seed = 4;
|
2022-10-08 04:30:12 -06:00
|
|
|
}
|
|
|
|
|
2023-01-31 09:04:00 -07:00
|
|
|
message PrefillTokens {
|
|
|
|
/// Prefill Token IDs
|
|
|
|
repeated uint32 ids = 1;
|
|
|
|
/// Prefill Logprobs
|
|
|
|
repeated float logprobs = 2;
|
|
|
|
/// Prefill tokens
|
|
|
|
repeated string texts = 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
message Generation {
|
|
|
|
/// Request ID
|
|
|
|
uint64 request_id = 1;
|
|
|
|
/// Prefill tokens (optional)
|
|
|
|
PrefillTokens prefill_tokens = 2;
|
|
|
|
/// Token ID
|
|
|
|
uint32 token_id = 3;
|
|
|
|
/// Logprob
|
|
|
|
float token_logprob = 4;
|
|
|
|
/// Text
|
|
|
|
string token_text = 5;
|
2023-02-24 07:55:57 -07:00
|
|
|
/// Is it a special token
|
|
|
|
bool token_is_special = 6;
|
2023-01-31 09:04:00 -07:00
|
|
|
/// Complete generated text
|
2023-02-24 07:55:57 -07:00
|
|
|
GeneratedText generated_text = 7;
|
2023-01-31 09:04:00 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
message PrefillRequest {
|
2022-10-11 08:50:54 -06:00
|
|
|
/// Batch
|
|
|
|
Batch batch = 1;
|
2022-10-08 04:30:12 -06:00
|
|
|
}
|
|
|
|
|
2023-01-31 09:04:00 -07:00
|
|
|
message PrefillResponse {
|
|
|
|
/// Generation
|
|
|
|
repeated Generation generations = 1;
|
2022-10-11 08:50:54 -06:00
|
|
|
/// Next batch (cached)
|
|
|
|
optional Batch batch = 2;
|
2022-10-08 04:30:12 -06:00
|
|
|
}
|
|
|
|
|
2023-01-31 09:04:00 -07:00
|
|
|
message DecodeRequest {
|
2022-10-11 08:50:54 -06:00
|
|
|
/// Cached batches
|
|
|
|
repeated Batch batches = 1;
|
|
|
|
}
|
2022-10-08 04:30:12 -06:00
|
|
|
|
2023-01-31 09:04:00 -07:00
|
|
|
message DecodeResponse {
|
|
|
|
/// Decodes
|
|
|
|
repeated Generation generations = 1;
|
2022-10-11 08:50:54 -06:00
|
|
|
/// Next batch (cached)
|
|
|
|
optional Batch batch = 2;
|
2023-01-31 09:04:00 -07:00
|
|
|
}
|