From 2fe58798168f13146fe6a3fea130d9697ed90296 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 20 Aug 2024 15:12:41 +0200 Subject: [PATCH] Updating integration tests with new values with FI/FD. Remove paged as a default too, and using FD everywhere. --- ..._llama_completion_many_prompts_stream.json | 172 +++++++++--------- .../test_flash_deepseek_v2.json | 44 ++--- .../test_flash_deepseek_v2_load.json | 152 ++++++++-------- .../test_flash_llama_fp8_all_params.json | 44 ++--- .../test_flash_starcoder2_default_params.json | 118 ++++++------ .../test_flash_idefics2_next_all_params.json | 8 +- launcher/src/main.rs | 2 +- .../text_generation_server/models/globals.py | 2 +- 8 files changed, 271 insertions(+), 271 deletions(-) diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json index d87071cf..b7d5cfe4 100644 --- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json +++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json @@ -8,11 +8,11 @@ "text": "\n" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -23,11 +23,11 @@ "text": "\n" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -38,11 +38,11 @@ "text": "\n" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -53,11 +53,11 @@ "text": "hd" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -68,11 +68,11 @@ "text": "\n" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -83,11 +83,11 @@ "text": "\n" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -98,11 +98,11 @@ "text": "\n" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -113,11 +113,11 @@ "text": "aho" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -128,11 +128,11 @@ "text": "2" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -143,11 +143,11 @@ "text": "2" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -158,11 +158,11 @@ "text": "2" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -173,11 +173,11 @@ "text": "ima" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -188,11 +188,11 @@ "text": "." } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -203,11 +203,11 @@ "text": "." } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -218,11 +218,11 @@ "text": "." } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -233,11 +233,11 @@ "text": "\n" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -248,11 +248,11 @@ "text": " Sarah" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -263,11 +263,11 @@ "text": " Yes" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -278,11 +278,11 @@ "text": " And" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -293,11 +293,11 @@ "text": "i" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -308,11 +308,11 @@ "text": "'" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -323,11 +323,11 @@ "text": "," } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -338,11 +338,11 @@ "text": " what" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -353,11 +353,11 @@ "text": "'" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -368,11 +368,11 @@ "text": "s" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -383,11 +383,11 @@ "text": " Moh" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -398,11 +398,11 @@ "text": " is" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -413,11 +413,11 @@ "text": "m" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -428,11 +428,11 @@ "text": " Room" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -443,11 +443,11 @@ "text": "s" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -458,11 +458,11 @@ "text": " the" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -473,11 +473,11 @@ "text": " tired" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -488,11 +488,11 @@ "text": ":" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -503,11 +503,11 @@ "text": "'" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -518,11 +518,11 @@ "text": " capital" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ @@ -530,73 +530,73 @@ "finish_reason": "", "index": 3, "logprobs": null, - "text": " of" + "text": "," } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ { - "finish_reason": "", + "finish_reason": "length", "index": 0, "logprobs": null, "text": " She" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ { - "finish_reason": "", + "finish_reason": "length", "index": 1, "logprobs": null, "text": " scale" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ { - "finish_reason": "", + "finish_reason": "length", "index": 2, "logprobs": null, "text": " of" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" }, { "choices": [ { - "finish_reason": "", + "finish_reason": "length", "index": 3, "logprobs": null, - "text": " being" + "text": " its" } ], - "created": 1713284431, + "created": 1724158270, "id": "", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "object": "text_completion", - "system_fingerprint": "2.0.1-native" + "system_fingerprint": "2.2.1-dev0-native" } ] diff --git a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json index 03f90367..732b0c49 100644 --- a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json +++ b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json @@ -16,7 +16,7 @@ }, { "id": 3102, - "logprob": -11.1875, + "logprob": -11.25, "text": " request" } ], @@ -24,66 +24,66 @@ "tokens": [ { "id": 185, - "logprob": -1.5546875, + "logprob": -1.546875, "special": false, "text": "\n" }, { "id": 549, - "logprob": -2.84375, + "logprob": -2.859375, "special": false, "text": "The" }, { "id": 1727, - "logprob": -2.34375, + "logprob": -2.484375, "special": false, "text": " test" }, { "id": 3102, - "logprob": -0.8359375, + "logprob": -0.83203125, "special": false, "text": " request" }, { "id": 317, - "logprob": -1.0859375, + "logprob": -1.1484375, "special": false, "text": " is" }, { - "id": 254, - "logprob": -1.5390625, + "id": 245, + "logprob": -1.578125, "special": false, - "text": " the" + "text": " a" }, { - "id": 1022, - "logprob": -1.1875, + "id": 3412, + "logprob": -2.578125, "special": false, - "text": " first" + "text": " document" }, { - "id": 3458, - "logprob": -0.35546875, + "id": 344, + "logprob": -1.125, "special": false, - "text": " step" + "text": " that" }, { - "id": 279, - "logprob": -0.8828125, + "id": 317, + "logprob": -1.6953125, "special": false, - "text": " in" + "text": " is" }, { - "id": 254, - "logprob": -0.71484375, + "id": 1222, + "logprob": -1.71875, "special": false, - "text": " the" + "text": " used" } ], "top_tokens": null }, - "generated_text": "\nThe test request is the first step in the" + "generated_text": "\nThe test request is a document that is used" } diff --git a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json index e365829a..f1eeab25 100644 --- a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json +++ b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json @@ -37,56 +37,56 @@ }, { "id": 1727, - "logprob": -2.359375, + "logprob": -2.4375, "special": false, "text": " test" }, { "id": 3102, - "logprob": -0.83203125, + "logprob": -0.83984375, "special": false, "text": " request" }, { "id": 317, - "logprob": -1.125, + "logprob": -1.1328125, "special": false, "text": " is" }, { - "id": 245, - "logprob": -1.5703125, + "id": 254, + "logprob": -1.515625, "special": false, - "text": " a" + "text": " the" }, { - "id": 3412, - "logprob": -2.578125, + "id": 1022, + "logprob": -1.15625, "special": false, - "text": " document" + "text": " first" }, { - "id": 344, - "logprob": -1.125, + "id": 3458, + "logprob": -0.3671875, "special": false, - "text": " that" + "text": " step" }, { - "id": 317, - "logprob": -1.6953125, + "id": 279, + "logprob": -0.88671875, "special": false, - "text": " is" + "text": " in" }, { - "id": 1222, - "logprob": -1.75, + "id": 254, + "logprob": -0.69140625, "special": false, - "text": " used" + "text": " the" } ], "top_tokens": null }, - "generated_text": "\nThe test request is a document that is used" + "generated_text": "\nThe test request is the first step in the" }, { "details": { @@ -126,56 +126,56 @@ }, { "id": 1727, - "logprob": -2.359375, + "logprob": -2.4375, "special": false, "text": " test" }, { "id": 3102, - "logprob": -0.83203125, + "logprob": -0.83984375, "special": false, "text": " request" }, { "id": 317, - "logprob": -1.125, + "logprob": -1.1328125, "special": false, "text": " is" }, { - "id": 245, - "logprob": -1.5703125, + "id": 254, + "logprob": -1.515625, "special": false, - "text": " a" + "text": " the" }, { - "id": 3412, - "logprob": -2.578125, + "id": 1022, + "logprob": -1.15625, "special": false, - "text": " document" + "text": " first" }, { - "id": 344, - "logprob": -1.125, + "id": 3458, + "logprob": -0.3671875, "special": false, - "text": " that" + "text": " step" }, { - "id": 317, - "logprob": -1.6953125, + "id": 279, + "logprob": -0.88671875, "special": false, - "text": " is" + "text": " in" }, { - "id": 1222, - "logprob": -1.75, + "id": 254, + "logprob": -0.69140625, "special": false, - "text": " used" + "text": " the" } ], "top_tokens": null }, - "generated_text": "\nThe test request is a document that is used" + "generated_text": "\nThe test request is the first step in the" }, { "details": { @@ -215,56 +215,56 @@ }, { "id": 1727, - "logprob": -2.359375, + "logprob": -2.4375, "special": false, "text": " test" }, { "id": 3102, - "logprob": -0.83203125, + "logprob": -0.83984375, "special": false, "text": " request" }, { "id": 317, - "logprob": -1.125, + "logprob": -1.1328125, "special": false, "text": " is" }, { - "id": 245, - "logprob": -1.5703125, + "id": 254, + "logprob": -1.515625, "special": false, - "text": " a" + "text": " the" }, { - "id": 3412, - "logprob": -2.578125, + "id": 1022, + "logprob": -1.15625, "special": false, - "text": " document" + "text": " first" }, { - "id": 344, - "logprob": -1.125, + "id": 3458, + "logprob": -0.3671875, "special": false, - "text": " that" + "text": " step" }, { - "id": 317, - "logprob": -1.6953125, + "id": 279, + "logprob": -0.88671875, "special": false, - "text": " is" + "text": " in" }, { - "id": 1222, - "logprob": -1.75, + "id": 254, + "logprob": -0.69140625, "special": false, - "text": " used" + "text": " the" } ], "top_tokens": null }, - "generated_text": "\nThe test request is a document that is used" + "generated_text": "\nThe test request is the first step in the" }, { "details": { @@ -304,55 +304,55 @@ }, { "id": 1727, - "logprob": -2.359375, + "logprob": -2.4375, "special": false, "text": " test" }, { "id": 3102, - "logprob": -0.83203125, + "logprob": -0.83984375, "special": false, "text": " request" }, { "id": 317, - "logprob": -1.125, + "logprob": -1.1328125, "special": false, "text": " is" }, { - "id": 245, - "logprob": -1.5703125, + "id": 254, + "logprob": -1.515625, "special": false, - "text": " a" + "text": " the" }, { - "id": 3412, - "logprob": -2.578125, + "id": 1022, + "logprob": -1.15625, "special": false, - "text": " document" + "text": " first" }, { - "id": 344, - "logprob": -1.125, + "id": 3458, + "logprob": -0.3671875, "special": false, - "text": " that" + "text": " step" }, { - "id": 317, - "logprob": -1.6953125, + "id": 279, + "logprob": -0.88671875, "special": false, - "text": " is" + "text": " in" }, { - "id": 1222, - "logprob": -1.75, + "id": 254, + "logprob": -0.69140625, "special": false, - "text": " used" + "text": " the" } ], "top_tokens": null }, - "generated_text": "\nThe test request is a document that is used" + "generated_text": "\nThe test request is the first step in the" } ] diff --git a/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json index bf981e4f..e94197ba 100644 --- a/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json +++ b/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json @@ -16,7 +16,7 @@ }, { "id": 1715, - "logprob": -10.375, + "logprob": -10.4375, "text": " request" } ], @@ -30,7 +30,7 @@ }, { "id": 2209, - "logprob": -2.78125, + "logprob": -2.609375, "special": false, "text": " Is" }, @@ -42,48 +42,48 @@ }, { "id": 734, - "logprob": -2.703125, + "logprob": -2.828125, "special": false, "text": " function" }, { "id": 330, - "logprob": -0.34179688, + "logprob": -0.36523438, "special": false, "text": " \"" }, { "id": 4110, - "logprob": -2.359375, + "logprob": -2.453125, "special": false, "text": "Create" }, { - "id": 7575, - "logprob": -2.1875, + "id": 264, + "logprob": -0.20117188, "special": false, - "text": "Process" + "text": " a" + }, + { + "id": 502, + "logprob": -0.29882812, + "special": false, + "text": " new" + }, + { + "id": 1052, + "logprob": -1.1953125, + "special": false, + "text": " file" }, { "id": 1, - "logprob": -0.07910156, + "logprob": -1.265625, "special": false, "text": "\"" - }, - { - "id": 304, - "logprob": -0.83203125, - "special": false, - "text": " in" - }, - { - "id": 12468, - "logprob": -1.8203125, - "special": false, - "text": " Win" } ], "top_tokens": null }, - "generated_text": "Test request: Is the function \"CreateProcess\" in Win" + "generated_text": "Test request: Is the function \"Create a new file\"" } diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json index d882b82a..05f173a2 100644 --- a/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json +++ b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json @@ -16,7 +16,7 @@ }, { "id": 100, - "logprob": -0.38549805, + "logprob": -0.38305664, "text": "_" }, { @@ -29,7 +29,7 @@ "tokens": [ { "id": 2284, - "logprob": -0.31323242, + "logprob": -0.296875, "special": false, "text": "():" }, @@ -59,19 +59,19 @@ }, { "id": 10914, - "logprob": -0.7817383, + "logprob": -0.7734375, "special": false, "text": " World" }, { "id": 16013, - "logprob": -0.6328125, + "logprob": -0.61816406, "special": false, "text": "!\")" }, { "id": 222, - "logprob": -0.0619812, + "logprob": -0.054870605, "special": false, "text": "\n" }, @@ -83,7 +83,7 @@ }, { "id": 610, - "logprob": -0.4086914, + "logprob": -0.4152832, "special": false, "text": "def" }, @@ -113,7 +113,7 @@ }, { "id": 444, - "logprob": -0.21826172, + "logprob": -0.21313477, "special": false, "text": "name" }, @@ -160,28 +160,16 @@ "text": "Hello" }, { - "id": 925, - "logprob": -3.3476562, + "id": 332, + "logprob": -0.034698486, "special": false, - "text": " %" + "text": " \"" }, { - "id": 120, + "id": 494, "logprob": 0.0, "special": false, - "text": "s" - }, - { - "id": 11571, - "logprob": -0.10021973, - "special": false, - "text": "!\"" - }, - { - "id": 925, - "logprob": 0.0, - "special": false, - "text": " %" + "text": " +" }, { "id": 655, @@ -190,10 +178,22 @@ "text": " name" }, { - "id": 46, + "id": 494, + "logprob": -0.20141602, + "special": false, + "text": " +" + }, + { + "id": 332, "logprob": 0.0, "special": false, - "text": ")" + "text": " \"" + }, + { + "id": 16013, + "logprob": 0.0, + "special": false, + "text": "!\")" }, { "id": 222, @@ -251,7 +251,7 @@ }, { "id": 400, - "logprob": -0.074279785, + "logprob": 0.0, "special": false, "text": "age" }, @@ -310,22 +310,34 @@ "text": "Hello" }, { - "id": 925, + "id": 332, "logprob": 0.0, "special": false, - "text": " %" + "text": " \"" }, { - "id": 120, + "id": 494, "logprob": 0.0, "special": false, - "text": "s" + "text": " +" }, { - "id": 49, - "logprob": -0.07891846, + "id": 655, + "logprob": 0.0, "special": false, - "text": "," + "text": " name" + }, + { + "id": 494, + "logprob": 0.0, + "special": false, + "text": " +" + }, + { + "id": 3021, + "logprob": -0.5761719, + "special": false, + "text": " \"," }, { "id": 863, @@ -340,55 +352,43 @@ "text": " are" }, { - "id": 925, + "id": 332, "logprob": 0.0, "special": false, - "text": " %" + "text": " \"" }, { - "id": 105, + "id": 494, "logprob": 0.0, "special": false, - "text": "d" + "text": " +" }, { - "id": 11339, + "id": 615, "logprob": 0.0, "special": false, - "text": " years" + "text": " str" }, { - "id": 3627, + "id": 45, "logprob": 0.0, "special": false, - "text": " old" + "text": "(" }, { - "id": 11571, + "id": 400, "logprob": 0.0, "special": false, - "text": "!\"" + "text": "age" }, { - "id": 925, + "id": 46, "logprob": 0.0, "special": false, - "text": " %" - }, - { - "id": 327, - "logprob": 0.0, - "special": false, - "text": " (" - }, - { - "id": 444, - "logprob": 0.0, - "special": false, - "text": "name" + "text": ")" } ], "top_tokens": null }, - "generated_text": "():\n print(\"Hello World!\")\n\ndef print_hello_name(name):\n print(\"Hello %s!\" % name)\n\ndef print_hello_name_age(name, age):\n print(\"Hello %s, you are %d years old!\" % (name" + "generated_text": "():\n print(\"Hello World!\")\n\ndef print_hello_name(name):\n print(\"Hello \" + name + \"!\")\n\ndef print_hello_name_age(name, age):\n print(\"Hello \" + name + \", you are \" + str(age)" } diff --git a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json index 1fad0b96..dab437b9 100644 --- a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json +++ b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json @@ -30,19 +30,19 @@ }, { "id": 264, - "logprob": -0.37573242, + "logprob": -0.38061523, "special": false, "text": " a" }, { "id": 633, - "logprob": -0.09161377, + "logprob": -0.09301758, "special": false, "text": " new" }, { "id": 4480, - "logprob": -0.26171875, + "logprob": -0.26782227, "special": false, "text": " feature" }, @@ -78,7 +78,7 @@ }, { "id": 13, - "logprob": 0.0, + "logprob": -0.10632324, "special": false, "text": "\n" } diff --git a/launcher/src/main.rs b/launcher/src/main.rs index b4e1a6b7..e16fa09d 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -1504,7 +1504,7 @@ fn main() -> Result<(), LauncherError> { std::env::set_var("USE_PREFIX_CACHING", "0"); } match config.model_type.as_deref() { - Some("gemma2") | Some("falcon") => { + Some("gemma2") | Some("falcon") | Some("deepseek_v2") => { // Required because gemma2 needs bfloat16 which is not supported by // flashinfer ? std::env::set_var("USE_PREFIX_CACHING", "0"); diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py index 4132ca73..5dc8b685 100644 --- a/server/text_generation_server/models/globals.py +++ b/server/text_generation_server/models/globals.py @@ -7,7 +7,7 @@ from text_generation_server.utils.log import log_master PREFIX_CACHING = os.getenv("USE_PREFIX_CACHING", "1").lower() in {"1", "true"} log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}") -ATTENTION = os.getenv("ATTENTION", "flashinfer" if PREFIX_CACHING else "paged") +ATTENTION = os.getenv("ATTENTION", "flashinfer" if PREFIX_CACHING else "flashdecoding") _expected = {"paged", "flashdecoding", "flashinfer"} assert ( ATTENTION in _expected