feat(router): Add const parameters to validation logic (#15)

I noticed some opportunity to collapse some of the logic, in case you
are interested.
This commit is contained in:
Nick Hill 2023-01-03 01:41:22 -08:00 committed by GitHub
parent 3efa5bbbfd
commit 60472f9d2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 70 deletions

View File

@ -71,12 +71,8 @@ impl State {
id: *id, id: *id,
inputs: entry.request.inputs.clone(), inputs: entry.request.inputs.clone(),
input_length: entry.input_length as u32, input_length: entry.input_length as u32,
parameters: Some(NextTokenChooserParameters::from( parameters: Some((&entry.request.parameters).into()),
entry.request.parameters.clone(), stopping_parameters: Some(entry.request.parameters.clone().into()),
)),
stopping_parameters: Some(StoppingCriteriaParameters::from(
entry.request.parameters.clone(),
)),
}); });
ids.push(*id); ids.push(*id);
@ -162,8 +158,8 @@ impl Db {
} }
} }
impl From<GenerateParameters> for NextTokenChooserParameters { impl From<&GenerateParameters> for NextTokenChooserParameters {
fn from(parameters: GenerateParameters) -> Self { fn from(parameters: &GenerateParameters) -> Self {
Self { Self {
temperature: parameters.temperature, temperature: parameters.temperature,
top_k: parameters.top_k as u32, top_k: parameters.top_k as u32,

View File

@ -4,12 +4,11 @@ use axum::http::StatusCode;
use axum::Json; use axum::Json;
use thiserror::Error; use thiserror::Error;
use tokenizers::tokenizer::Tokenizer; use tokenizers::tokenizer::Tokenizer;
use tokenizers::{
DecoderWrapper, ModelWrapper, NormalizerWrapper, PostProcessorWrapper, PreTokenizerWrapper,
TokenizerImpl,
};
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};
const MAX_MAX_NEW_TOKENS: u32 = 512;
const MAX_STOP_SEQUENCES: usize = 4;
/// Validation /// Validation
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Validation { pub struct Validation {
@ -63,7 +62,7 @@ async fn validation_task(
// Create workers // Create workers
for _ in 0..workers { for _ in 0..workers {
let tokenizer_clone = tokenizer.clone(); let tokenizer_clone: Tokenizer = tokenizer.clone().into();
// Create channel to communicate with worker // Create channel to communicate with worker
let (worker_sender, worker_receiver) = mpsc::channel(workers); let (worker_sender, worker_receiver) = mpsc::channel(workers);
workers_senders.push(worker_sender); workers_senders.push(worker_sender);
@ -89,68 +88,54 @@ async fn validation_task(
/// Check the parameters inside the payload and get the number of tokens inside the input using /// Check the parameters inside the payload and get the number of tokens inside the input using
/// the tokenizer /// the tokenizer
fn validation_worker( fn validation_worker(
tokenizer: TokenizerImpl< tokenizer: Tokenizer,
ModelWrapper,
NormalizerWrapper,
PreTokenizerWrapper,
PostProcessorWrapper,
DecoderWrapper,
>,
max_input_length: usize, max_input_length: usize,
mut receiver: mpsc::Receiver<ValidationRequest>, mut receiver: mpsc::Receiver<ValidationRequest>,
) { ) {
// Loop over requests // Loop over requests
while let Some((request, response_tx)) = receiver.blocking_recv() { while let Some((request, response_tx)) = receiver.blocking_recv() {
if request.parameters.temperature <= 0.0 { response_tx.send(validate(request, &tokenizer, max_input_length)).unwrap_or(())
response_tx }
.send(Err(ValidationError::Temperature)) }
.unwrap_or(());
continue;
}
if request.parameters.top_p <= 0.0 || request.parameters.top_p > 1.0 {
response_tx.send(Err(ValidationError::TopP)).unwrap_or(());
continue;
}
if request.parameters.top_k < 0 {
response_tx.send(Err(ValidationError::TopK)).unwrap_or(());
continue;
}
if request.parameters.max_new_tokens > 512 {
response_tx
.send(Err(ValidationError::MaxNewTokens))
.unwrap_or(());
continue;
}
if request.parameters.stop.len() > 4 {
response_tx
.send(Err(ValidationError::StopSequence(
request.parameters.stop.len(),
)))
.unwrap_or(());
continue;
}
// Get the number of tokens in the input fn validate(
match tokenizer.encode(request.inputs.clone(), true) { request: GenerateRequest,
Ok(inputs) => { tokenizer: &Tokenizer,
let input_length = inputs.len(); max_input_length: usize,
) -> Result<(usize, GenerateRequest), ValidationError> {
if request.parameters.temperature <= 0.0 {
return Err(ValidationError::Temperature);
}
if request.parameters.top_p <= 0.0 || request.parameters.top_p > 1.0 {
return Err(ValidationError::TopP);
}
if request.parameters.top_k < 0 {
return Err(ValidationError::TopK);
}
if request.parameters.max_new_tokens > MAX_MAX_NEW_TOKENS {
return Err(ValidationError::MaxNewTokens(MAX_MAX_NEW_TOKENS));
}
if request.parameters.stop.len() > MAX_STOP_SEQUENCES {
return Err(ValidationError::StopSequence(
MAX_STOP_SEQUENCES, request.parameters.stop.len(),
))
}
if input_length > max_input_length { // Get the number of tokens in the input
response_tx match tokenizer.encode(request.inputs.clone(), true) {
.send(Err(ValidationError::InputLength( Ok(inputs) => {
input_length, let input_length = inputs.len();
max_input_length,
)))
.unwrap_or(());
continue;
}
response_tx.send(Ok((input_length, request))).unwrap_or(()); if input_length > max_input_length {
Err(ValidationError::InputLength(
input_length,
max_input_length,
))
} else {
Ok((input_length, request))
} }
Err(err) => response_tx },
.send(Err(ValidationError::Tokenizer(err.to_string()))) Err(err) => Err(ValidationError::Tokenizer(err.to_string())),
.unwrap_or(()),
};
} }
} }
@ -167,12 +152,12 @@ pub enum ValidationError {
TopP, TopP,
#[error("top_k must be strictly positive")] #[error("top_k must be strictly positive")]
TopK, TopK,
#[error("max_new_tokens must be <= 512")] #[error("max_new_tokens must be <= {0}")]
MaxNewTokens, MaxNewTokens(u32),
#[error("inputs must have less than {1} tokens. Given: {0}")] #[error("inputs must have less than {1} tokens. Given: {0}")]
InputLength(usize, usize), InputLength(usize, usize),
#[error("stop supports up to 4 stop sequences. Given: {0}")] #[error("stop supports up to {0} stop sequences. Given: {1}")]
StopSequence(usize), StopSequence(usize, usize),
#[error("tokenizer error {0}")] #[error("tokenizer error {0}")]
Tokenizer(String), Tokenizer(String),
} }