From a9c2d28a3ac5fa74cd72032501e2cda500a069c8 Mon Sep 17 00:00:00 2001 From: David Holtz Date: Mon, 18 Nov 2024 21:16:21 +0000 Subject: [PATCH] feat: support video input chunks and enable qwen2 vl to process video --- backends/client/src/lib.rs | 7 +- backends/client/src/v3/mod.rs | 2 +- backends/v3/src/client/mod.rs | 2 +- backends/v3/src/queue.rs | 5 +- docs/openapi.json | 18 +++ docs/source/reference/launcher.md | 19 ++- router/src/lib.rs | 4 +- router/src/validation.rs | 122 ++++++++++++++++-- .../models/custom_modeling/qwen2_vl.py | 74 +++++++---- .../models/vlm_causal_lm.py | 120 +++++++++++++++-- 10 files changed, 310 insertions(+), 63 deletions(-) diff --git a/backends/client/src/lib.rs b/backends/client/src/lib.rs index 4fdd8f56..2c01c5b0 100644 --- a/backends/client/src/lib.rs +++ b/backends/client/src/lib.rs @@ -9,7 +9,7 @@ use thiserror::Error; use tonic::transport; use tonic::Status; -pub use v3::{Chunk, Image, Input, InputChunk}; +pub use v3::{Chunk, Image, Input, InputChunk, Video}; #[async_trait] pub trait Health { @@ -79,8 +79,9 @@ impl ChunksToString for Vec { let encoded = STANDARD.encode(data); output.push_str(&format!("![](data:{};base64,{})", mimetype, encoded)) } - Some(Chunk::Video(url)) => { - output.push_str(&format!("