From 8c74ee4498f71cebfb098a0332f14752304edfba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 3 Sep 2024 11:46:23 +0000 Subject: [PATCH] Simplify image token lookup --- router/src/config.rs | 6 +----- router/src/validation.rs | 12 +++++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/router/src/config.rs b/router/src/config.rs index a28d0577..5d0be9c8 100644 --- a/router/src/config.rs +++ b/router/src/config.rs @@ -7,7 +7,6 @@ pub struct LlavaNext { pub(crate) text_config: TextConfig, pub(crate) vision_config: VisionConfig, pub(crate) image_grid_pinpoints: Vec<(usize, usize)>, - pub(crate) image_token_index: u32, } fn get_anyres_image_grid_shape( @@ -113,9 +112,7 @@ pub struct ClipVisionModel { #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] -pub struct Idefics2 { - pub(crate) image_token_id: u32, -} +pub struct Idefics2 {} impl Idefics2 { pub fn get_number_of_features(&self, _height: usize, _width: usize) -> usize { @@ -132,7 +129,6 @@ pub struct PaliTextConfig { #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub struct Paligemma { - pub(crate) image_token_index: u32, pub(crate) text_config: PaliTextConfig, } diff --git a/router/src/validation.rs b/router/src/validation.rs index 09e1375c..f82f9670 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -1,5 +1,5 @@ /// Payload validation logic -use crate::config::{Config, Idefics2}; +use crate::config::Config; use crate::validation::ValidationError::{BestOfSampling, BestOfSeed, EmptyInput}; use crate::{ GenerateParameters, GenerateRequest, GrammarType, HubPreprocessorConfig, Idefics2Preprocessor, @@ -605,10 +605,12 @@ fn image_tokens( fn image_id(config: &Config, tokenizer: &Tokenizer) -> u32 { use Config::*; match config { - Idefics => tokenizer.token_to_id("").unwrap(), - Idefics2(idefics) => idefics.image_token_id, - LlavaNext(llava) => llava.image_token_index, - Paligemma(paligemma) => paligemma.image_token_index, + // The configuration key for the image token id does not seem to + // be standardized, but the image tag is. So let's use that to get + // the image token id. + Idefics | Idefics2(_) | LlavaNext(_) | Paligemma(_) => { + tokenizer.token_to_id("").unwrap() + } _ => unimplemented!("Images tokens are not supported for this model configuration"), } }