From a3049f102e3e2a1e08bb945d5b6d8b514ec1d694 Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 9 Dec 2024 21:25:13 +0000 Subject: [PATCH] fix: address image resize and rebase changes --- backends/client/src/lib.rs | 2 +- backends/v2/src/client/mod.rs | 2 +- backends/v3/src/client/mod.rs | 2 +- .../models/flash_causal_lm.py | 18 +++++++++++------- .../models/vlm_causal_lm.py | 7 +++++++ 5 files changed, 21 insertions(+), 10 deletions(-) diff --git a/backends/client/src/lib.rs b/backends/client/src/lib.rs index e33d2348..fbe2e7e6 100644 --- a/backends/client/src/lib.rs +++ b/backends/client/src/lib.rs @@ -86,6 +86,6 @@ impl ChunksToString for Vec { } } -static WARMUP_IMAGE_BASE64 : &str = "iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAAAAACpleexAAAGc0lEQVR4nAFoBpf5AINHnT9oHGHwxejPBqNS161/mUe+CNEM2ZjIb1zZ+/ygXl5vkP9T6lgA+jwpw1IgNJCtWJkY7CNQhfuiZBrm+8cUVPW10g4TQIkAS4kJ+6A5qQEfgO5HoId0MVcs4gEcIILe/jMpXbwGCgORcpeGMh0ANY9Kk2EU3Wh6BMR39APvsV0KW8yzEbc5e8JtBWUhs4tHYZ4XhUg5RUVvh9Tl4/FuaOMEgANG4gnASCYBPa1f5p/+uFPqsTWWEqrrQfbI4HsbRHF0smMiTwTKvQJg0zxGRc8muP0ZnqI1cvgtySZP25FY606dWen/hyiwFpcHuy+6xCKCAszDRskWmbjzRyaAb3I5fR+GeB66N5AnNQJ2GCwhYsQJifZBCw7hsxQAcSxA0TMqUCwvxxa+WbWItKBdDUFuBj6TkgcC9letT2IKSPHMr0tLxwFK76OfHulksUmKDPKS+BgDhbKv88I6GqwdMxapuKaqXh04MKP/xuCOAIR2TZzfBY/vtEJO5hSCjtcVqGA+1VQAOocwBA41Vil7/xK32enA6cUCLM7864NqqADVS45Fc6M2/NjrxCcjwL+2tHI3Bs/sOtf3hjETiP0MGQJI/LP+3Cx9SGzwgF/gtNBJLROxfUkaX+Y3AvmMd/IEuQm0EWYv7c8TAGSNTNdgALrJdPUqCo7+B4lGDX+0mfiIVcUS/bQyRvKdBIxydsNe1fEAHhN2FlGUa1YlbdgQeLRW/rjMZc9YqsnGCIZE4krYv80pEVL+bUzMEAQ8HvAODK/TBsK7ba1Sn0HBh6IwLTVEb6idIwi2jKDfpyQx/U+obVDMBFbaVWPV/JkSWtqUqbon7XYOd8wCNJQWDglmxavw51+FRT1u/cwKKHwAM3cdszRLianvQ8QM3Dp+3iEpvT79x6Zcql+SGRHLAd+BUQAnve2eKwRkk7DB5P9iN/LIlXMgxzdcDii2nmYjMhyB3yVbP6bXHEtIo/WYsP+dBEuAtOQBeBgIpwHaFgYwVCEQ6pGU0lZC0iPh86uzzpPWe/I8q2Dc4CEA6Pnz6QnU1ujnzYsuJ0EptCUWTfcSxccu5tCSHH0jTQgrG6/nDlsrMAQCOj1FyhHoXS9GUWxwU/sV8AFHduFF6okKq5vlX4lRvj1+CNMcHOlnAF2Yui1hfSwSXt8MjPstlEj5TPf7li7JBRKvSjDgAaVcm4EFLmxMLmcCTyE/LivPSEOexyzKp8O/MIPLAiQ/4onPEb2s8ESAQBILVhvlqSB1rwCGH1xEePawEple0P9023HYGrvujCDUMlSdgpB/71rfEfTjgY8uHFgPAPdla93mIt7/uWNathN5EUaD8QOzOcQ541UomYbKERPqRjqSBA7PK0sCz8zpLAeMNVI9+Rx/eQeo5A7FeI7wMQuyYCAaxx9u3GfRqMSXUqTwsQAlQ6dAnhSiTQqzy0ivVCrFhIoWciWl/8+6meceIuJ6rvRqJ/GswyeSBP5AMMelxb2MN7XoaXqL9HEvIC1FUhTrHk5xvA1GSOlTSvOHnvxb5c8Behci1Cburu3pF201hAMQNLcjHgmwnnpZFoPvBKpCDLxXVadIjdO4bwLMOtWuExNtJ3x0qJ1yMWIPuZOL20FJNGXcO2f51co5uhjTk52RSDDEAryv3E7i0xHi7eq3Mh/5u1/Vitqez8PoMq3b5/BVpCFvII4YrYqfi5EBaCSvfvWJ4L3E7/h4Fmk+EbAV4ZzqyAZjeNEa+FbDSplFljz49sIczQJBBHrd0OaqYGLf+nKY7SL6EWG1aBBuPayoNRNxflhTdPYLoz4N5EDVAgwNlJXD3/gcnpE9UizPIauZNhP/1rcnE7gNOdwJ4dyZZFQcJSLWH1kC5xOZ9ls7GIUv4BctR2o73VVy0zICLkRku+34Y/6YXywzG2t4adZW5QK7WqesdSGuIXXaws/lLujr2ujEgzRFdt9p/gyGFUgsY8YH2x3pGCEOAu5G62sOGAbM5vK88t8zqfDdNCPweZhVJ0cHNw5vC3Lims1435Q+DXwE+K7yjXFolURnHhsUUEoUV/u+9kJD0kPUf8vCjB/3IlJNjcrvEuu+NwKVe83Xd9r6ltsqgv614BHA9QOqO9Itnu7PiQPNzWqrV9r269A7rW1rAUmrqu5+w8XzfuT3s6wszZH5xPXovR85dhN0TXjzBo2PIhc38srv5yopvBBpp115NAAAAABJRU5ErkJggg=="; +static WARMUP_IMAGE_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAIAAAAC64paAAABg2lDQ1BJQ0MgcHJvZmlsZQAAKJF9kT1Iw0AcxV/TSotUROxQxCFDdbKLijjWKhShQqgVWnUwufQLmrQkKS6OgmvBwY/FqoOLs64OroIg+AHi7OCk6CIl/i8ptIjx4Lgf7+497t4BQqvKNDOQADTdMjKppJjLr4rBVwQQwhAERGVm1uckKQ3P8XUPH1/v4jzL+9yfY0AtmAzwicQJVjcs4g3imU2rznmfOMLKskp8Tjxh0AWJH7muuPzGueSwwDMjRjYzTxwhFks9rPQwKxsa8TRxTNV0yhdyLquctzhr1Qbr3JO/MFzQV5a5TnMUKSxiCRJEKGiggiosxGnVSTGRof2kh3/E8UvkUshVASPHAmrQIDt+8D/43a1ZnJp0k8JJoO/Ftj/GgOAu0G7a9vexbbdPAP8zcKV3/bUWMPtJerOrxY6AwW3g4rqrKXvA5Q4QfarLhuxIfppCsQi8n9E35YHhW6B/ze2ts4/TByBLXaVvgINDYLxE2ese7w719vbvmU5/PycecohsjayNAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAB3RJTUUH6AQIEQMnlTSSjwAAABl0RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAASSURBVDjLY2AYBaNgFIyCoQsABMQAAeRw1DoAAAAASUVORK5CYII="; pub type Result = std::result::Result; diff --git a/backends/v2/src/client/mod.rs b/backends/v2/src/client/mod.rs index b463cc98..9fe114a2 100644 --- a/backends/v2/src/client/mod.rs +++ b/backends/v2/src/client/mod.rs @@ -63,6 +63,6 @@ impl From for ClientError { } } -static WARMUP_IMAGE_BASE64 : &str = "iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAAAAACpleexAAAGc0lEQVR4nAFoBpf5AINHnT9oHGHwxejPBqNS161/mUe+CNEM2ZjIb1zZ+/ygXl5vkP9T6lgA+jwpw1IgNJCtWJkY7CNQhfuiZBrm+8cUVPW10g4TQIkAS4kJ+6A5qQEfgO5HoId0MVcs4gEcIILe/jMpXbwGCgORcpeGMh0ANY9Kk2EU3Wh6BMR39APvsV0KW8yzEbc5e8JtBWUhs4tHYZ4XhUg5RUVvh9Tl4/FuaOMEgANG4gnASCYBPa1f5p/+uFPqsTWWEqrrQfbI4HsbRHF0smMiTwTKvQJg0zxGRc8muP0ZnqI1cvgtySZP25FY606dWen/hyiwFpcHuy+6xCKCAszDRskWmbjzRyaAb3I5fR+GeB66N5AnNQJ2GCwhYsQJifZBCw7hsxQAcSxA0TMqUCwvxxa+WbWItKBdDUFuBj6TkgcC9letT2IKSPHMr0tLxwFK76OfHulksUmKDPKS+BgDhbKv88I6GqwdMxapuKaqXh04MKP/xuCOAIR2TZzfBY/vtEJO5hSCjtcVqGA+1VQAOocwBA41Vil7/xK32enA6cUCLM7864NqqADVS45Fc6M2/NjrxCcjwL+2tHI3Bs/sOtf3hjETiP0MGQJI/LP+3Cx9SGzwgF/gtNBJLROxfUkaX+Y3AvmMd/IEuQm0EWYv7c8TAGSNTNdgALrJdPUqCo7+B4lGDX+0mfiIVcUS/bQyRvKdBIxydsNe1fEAHhN2FlGUa1YlbdgQeLRW/rjMZc9YqsnGCIZE4krYv80pEVL+bUzMEAQ8HvAODK/TBsK7ba1Sn0HBh6IwLTVEb6idIwi2jKDfpyQx/U+obVDMBFbaVWPV/JkSWtqUqbon7XYOd8wCNJQWDglmxavw51+FRT1u/cwKKHwAM3cdszRLianvQ8QM3Dp+3iEpvT79x6Zcql+SGRHLAd+BUQAnve2eKwRkk7DB5P9iN/LIlXMgxzdcDii2nmYjMhyB3yVbP6bXHEtIo/WYsP+dBEuAtOQBeBgIpwHaFgYwVCEQ6pGU0lZC0iPh86uzzpPWe/I8q2Dc4CEA6Pnz6QnU1ujnzYsuJ0EptCUWTfcSxccu5tCSHH0jTQgrG6/nDlsrMAQCOj1FyhHoXS9GUWxwU/sV8AFHduFF6okKq5vlX4lRvj1+CNMcHOlnAF2Yui1hfSwSXt8MjPstlEj5TPf7li7JBRKvSjDgAaVcm4EFLmxMLmcCTyE/LivPSEOexyzKp8O/MIPLAiQ/4onPEb2s8ESAQBILVhvlqSB1rwCGH1xEePawEple0P9023HYGrvujCDUMlSdgpB/71rfEfTjgY8uHFgPAPdla93mIt7/uWNathN5EUaD8QOzOcQ541UomYbKERPqRjqSBA7PK0sCz8zpLAeMNVI9+Rx/eQeo5A7FeI7wMQuyYCAaxx9u3GfRqMSXUqTwsQAlQ6dAnhSiTQqzy0ivVCrFhIoWciWl/8+6meceIuJ6rvRqJ/GswyeSBP5AMMelxb2MN7XoaXqL9HEvIC1FUhTrHk5xvA1GSOlTSvOHnvxb5c8Behci1Cburu3pF201hAMQNLcjHgmwnnpZFoPvBKpCDLxXVadIjdO4bwLMOtWuExNtJ3x0qJ1yMWIPuZOL20FJNGXcO2f51co5uhjTk52RSDDEAryv3E7i0xHi7eq3Mh/5u1/Vitqez8PoMq3b5/BVpCFvII4YrYqfi5EBaCSvfvWJ4L3E7/h4Fmk+EbAV4ZzqyAZjeNEa+FbDSplFljz49sIczQJBBHrd0OaqYGLf+nKY7SL6EWG1aBBuPayoNRNxflhTdPYLoz4N5EDVAgwNlJXD3/gcnpE9UizPIauZNhP/1rcnE7gNOdwJ4dyZZFQcJSLWH1kC5xOZ9ls7GIUv4BctR2o73VVy0zICLkRku+34Y/6YXywzG2t4adZW5QK7WqesdSGuIXXaws/lLujr2ujEgzRFdt9p/gyGFUgsY8YH2x3pGCEOAu5G62sOGAbM5vK88t8zqfDdNCPweZhVJ0cHNw5vC3Lims1435Q+DXwE+K7yjXFolURnHhsUUEoUV/u+9kJD0kPUf8vCjB/3IlJNjcrvEuu+NwKVe83Xd9r6ltsqgv614BHA9QOqO9Itnu7PiQPNzWqrV9r269A7rW1rAUmrqu5+w8XzfuT3s6wszZH5xPXovR85dhN0TXjzBo2PIhc38srv5yopvBBpp115NAAAAABJRU5ErkJggg=="; +static WARMUP_IMAGE_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAIAAAAC64paAAABg2lDQ1BJQ0MgcHJvZmlsZQAAKJF9kT1Iw0AcxV/TSotUROxQxCFDdbKLijjWKhShQqgVWnUwufQLmrQkKS6OgmvBwY/FqoOLs64OroIg+AHi7OCk6CIl/i8ptIjx4Lgf7+497t4BQqvKNDOQADTdMjKppJjLr4rBVwQQwhAERGVm1uckKQ3P8XUPH1/v4jzL+9yfY0AtmAzwicQJVjcs4g3imU2rznmfOMLKskp8Tjxh0AWJH7muuPzGueSwwDMjRjYzTxwhFks9rPQwKxsa8TRxTNV0yhdyLquctzhr1Qbr3JO/MFzQV5a5TnMUKSxiCRJEKGiggiosxGnVSTGRof2kh3/E8UvkUshVASPHAmrQIDt+8D/43a1ZnJp0k8JJoO/Ftj/GgOAu0G7a9vexbbdPAP8zcKV3/bUWMPtJerOrxY6AwW3g4rqrKXvA5Q4QfarLhuxIfppCsQi8n9E35YHhW6B/ze2ts4/TByBLXaVvgINDYLxE2ese7w719vbvmU5/PycecohsjayNAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAB3RJTUUH6AQIEQMnlTSSjwAAABl0RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAASSURBVDjLY2AYBaNgFIyCoQsABMQAAeRw1DoAAAAASUVORK5CYII="; pub type Result = std::result::Result; diff --git a/backends/v3/src/client/mod.rs b/backends/v3/src/client/mod.rs index 86c783ea..ab4311c3 100644 --- a/backends/v3/src/client/mod.rs +++ b/backends/v3/src/client/mod.rs @@ -62,6 +62,6 @@ impl From for InputChunk { } } -static WARMUP_IMAGE_BASE64 : &str = "iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAAAAACpleexAAAGc0lEQVR4nAFoBpf5AINHnT9oHGHwxejPBqNS161/mUe+CNEM2ZjIb1zZ+/ygXl5vkP9T6lgA+jwpw1IgNJCtWJkY7CNQhfuiZBrm+8cUVPW10g4TQIkAS4kJ+6A5qQEfgO5HoId0MVcs4gEcIILe/jMpXbwGCgORcpeGMh0ANY9Kk2EU3Wh6BMR39APvsV0KW8yzEbc5e8JtBWUhs4tHYZ4XhUg5RUVvh9Tl4/FuaOMEgANG4gnASCYBPa1f5p/+uFPqsTWWEqrrQfbI4HsbRHF0smMiTwTKvQJg0zxGRc8muP0ZnqI1cvgtySZP25FY606dWen/hyiwFpcHuy+6xCKCAszDRskWmbjzRyaAb3I5fR+GeB66N5AnNQJ2GCwhYsQJifZBCw7hsxQAcSxA0TMqUCwvxxa+WbWItKBdDUFuBj6TkgcC9letT2IKSPHMr0tLxwFK76OfHulksUmKDPKS+BgDhbKv88I6GqwdMxapuKaqXh04MKP/xuCOAIR2TZzfBY/vtEJO5hSCjtcVqGA+1VQAOocwBA41Vil7/xK32enA6cUCLM7864NqqADVS45Fc6M2/NjrxCcjwL+2tHI3Bs/sOtf3hjETiP0MGQJI/LP+3Cx9SGzwgF/gtNBJLROxfUkaX+Y3AvmMd/IEuQm0EWYv7c8TAGSNTNdgALrJdPUqCo7+B4lGDX+0mfiIVcUS/bQyRvKdBIxydsNe1fEAHhN2FlGUa1YlbdgQeLRW/rjMZc9YqsnGCIZE4krYv80pEVL+bUzMEAQ8HvAODK/TBsK7ba1Sn0HBh6IwLTVEb6idIwi2jKDfpyQx/U+obVDMBFbaVWPV/JkSWtqUqbon7XYOd8wCNJQWDglmxavw51+FRT1u/cwKKHwAM3cdszRLianvQ8QM3Dp+3iEpvT79x6Zcql+SGRHLAd+BUQAnve2eKwRkk7DB5P9iN/LIlXMgxzdcDii2nmYjMhyB3yVbP6bXHEtIo/WYsP+dBEuAtOQBeBgIpwHaFgYwVCEQ6pGU0lZC0iPh86uzzpPWe/I8q2Dc4CEA6Pnz6QnU1ujnzYsuJ0EptCUWTfcSxccu5tCSHH0jTQgrG6/nDlsrMAQCOj1FyhHoXS9GUWxwU/sV8AFHduFF6okKq5vlX4lRvj1+CNMcHOlnAF2Yui1hfSwSXt8MjPstlEj5TPf7li7JBRKvSjDgAaVcm4EFLmxMLmcCTyE/LivPSEOexyzKp8O/MIPLAiQ/4onPEb2s8ESAQBILVhvlqSB1rwCGH1xEePawEple0P9023HYGrvujCDUMlSdgpB/71rfEfTjgY8uHFgPAPdla93mIt7/uWNathN5EUaD8QOzOcQ541UomYbKERPqRjqSBA7PK0sCz8zpLAeMNVI9+Rx/eQeo5A7FeI7wMQuyYCAaxx9u3GfRqMSXUqTwsQAlQ6dAnhSiTQqzy0ivVCrFhIoWciWl/8+6meceIuJ6rvRqJ/GswyeSBP5AMMelxb2MN7XoaXqL9HEvIC1FUhTrHk5xvA1GSOlTSvOHnvxb5c8Behci1Cburu3pF201hAMQNLcjHgmwnnpZFoPvBKpCDLxXVadIjdO4bwLMOtWuExNtJ3x0qJ1yMWIPuZOL20FJNGXcO2f51co5uhjTk52RSDDEAryv3E7i0xHi7eq3Mh/5u1/Vitqez8PoMq3b5/BVpCFvII4YrYqfi5EBaCSvfvWJ4L3E7/h4Fmk+EbAV4ZzqyAZjeNEa+FbDSplFljz49sIczQJBBHrd0OaqYGLf+nKY7SL6EWG1aBBuPayoNRNxflhTdPYLoz4N5EDVAgwNlJXD3/gcnpE9UizPIauZNhP/1rcnE7gNOdwJ4dyZZFQcJSLWH1kC5xOZ9ls7GIUv4BctR2o73VVy0zICLkRku+34Y/6YXywzG2t4adZW5QK7WqesdSGuIXXaws/lLujr2ujEgzRFdt9p/gyGFUgsY8YH2x3pGCEOAu5G62sOGAbM5vK88t8zqfDdNCPweZhVJ0cHNw5vC3Lims1435Q+DXwE+K7yjXFolURnHhsUUEoUV/u+9kJD0kPUf8vCjB/3IlJNjcrvEuu+NwKVe83Xd9r6ltsqgv614BHA9QOqO9Itnu7PiQPNzWqrV9r269A7rW1rAUmrqu5+w8XzfuT3s6wszZH5xPXovR85dhN0TXjzBo2PIhc38srv5yopvBBpp115NAAAAABJRU5ErkJggg=="; +static WARMUP_IMAGE_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAIAAAAC64paAAABg2lDQ1BJQ0MgcHJvZmlsZQAAKJF9kT1Iw0AcxV/TSotUROxQxCFDdbKLijjWKhShQqgVWnUwufQLmrQkKS6OgmvBwY/FqoOLs64OroIg+AHi7OCk6CIl/i8ptIjx4Lgf7+497t4BQqvKNDOQADTdMjKppJjLr4rBVwQQwhAERGVm1uckKQ3P8XUPH1/v4jzL+9yfY0AtmAzwicQJVjcs4g3imU2rznmfOMLKskp8Tjxh0AWJH7muuPzGueSwwDMjRjYzTxwhFks9rPQwKxsa8TRxTNV0yhdyLquctzhr1Qbr3JO/MFzQV5a5TnMUKSxiCRJEKGiggiosxGnVSTGRof2kh3/E8UvkUshVASPHAmrQIDt+8D/43a1ZnJp0k8JJoO/Ftj/GgOAu0G7a9vexbbdPAP8zcKV3/bUWMPtJerOrxY6AwW3g4rqrKXvA5Q4QfarLhuxIfppCsQi8n9E35YHhW6B/ze2ts4/TByBLXaVvgINDYLxE2ese7w719vbvmU5/PycecohsjayNAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAB3RJTUUH6AQIEQMnlTSSjwAAABl0RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAASSURBVDjLY2AYBaNgFIyCoQsABMQAAeRw1DoAAAAASUVORK5CYII="; pub type Result = std::result::Result; diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 07b7604d..34ed93b3 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -56,11 +56,13 @@ from text_generation_server.models.globals import ( MEM_POOL, ATTENTION, BLOCK_SIZE, - CUDA_GRAPHS, REQUEST_LOGPROBS, TGI_WIGGLE_ROOM, get_adapter_to_index, ) + +# avoid coping CUDA_GRAPHS value by importing globals as a module +import text_generation_server.models.globals as globals from text_generation_server.layers.attention import KVCache, Seqlen from text_generation_server.utils import StoppingCriteria, HeterogeneousNextTokenChooser from text_generation_server.utils.dist import MEMORY_FRACTION @@ -1629,8 +1631,8 @@ class FlashCausalLM(Model): int(val) for val in os.environ["PYTORCH_TUNABLEOP_SEQLENS"].split(",") ] - elif CUDA_GRAPHS is not None: - tuning_sequences = CUDA_GRAPHS + elif globals.CUDA_GRAPHS is not None: + tuning_sequences = globals.CUDA_GRAPHS else: tuning_sequences = [1, 2, 3, 4, 5, 6, 7] @@ -1669,13 +1671,14 @@ class FlashCausalLM(Model): "PyTorch ROCm TunableOp (https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/cuda/tunable) is disabled. TunableOp brings an additional 5-8% latency improvement for small sequence lengths but requires a warmup. If necessary, please use the environment variable PYTORCH_TUNABLEOP_ENABLED=1 to enable TunableOp.", ) - if CUDA_GRAPHS: + if globals.CUDA_GRAPHS: try: log_master( - logger.info, f"Cuda Graphs are enabled for sizes {CUDA_GRAPHS}" + logger.info, + f"Cuda Graphs are enabled for sizes {globals.CUDA_GRAPHS}", ) # Warmup cuda graphs - for bs in CUDA_GRAPHS: + for bs in globals.CUDA_GRAPHS: synchronize(self.device) free_memory = get_free_memory( self.device, MEMORY_FRACTION * TGI_WIGGLE_ROOM @@ -1699,7 +1702,8 @@ class FlashCausalLM(Model): logger.exception("Decode cuda graph warmup failed") else: log_master( - logger.info, f"Cuda Graphs are disabled (CUDA_GRAPHS={CUDA_GRAPHS})." + logger.info, + f"Cuda Graphs are disabled (CUDA_GRAPHS={globals.CUDA_GRAPHS}).", ) assert max_input_tokens is not None diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py index 657b5608..59f36db9 100644 --- a/server/text_generation_server/models/vlm_causal_lm.py +++ b/server/text_generation_server/models/vlm_causal_lm.py @@ -177,6 +177,13 @@ class VlmCausalLMBatch(FlashCausalLMBatch): pass elif chunk_type == "image": image = Image.open(BytesIO(chunk.image.data)) + # qwen2_vl expects images to be greater than 20 pixels, this is for warmup since the + # default warmup image is 20x20 + if config.model_type == "qwen2_vl": + if image.width <= 20: + w = image.width * 2 + h = image.height * 2 + image = image.resize((w, h)) if config.model_type == "llava_next": images.append(image) else: