diff --git a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral.json b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral.json new file mode 100644 index 00000000..c7ac0fff --- /dev/null +++ b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral.json @@ -0,0 +1,104 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 1824, + "logprob": -6.0625, + "text": "What" + }, + { + "id": 349, + "logprob": -1.4746094, + "text": "is" + }, + { + "id": 14972, + "logprob": -8.7578125, + "text": "Deep" + }, + { + "id": 17504, + "logprob": -2.125, + "text": "Learning" + }, + { + "id": 28804, + "logprob": -0.32128906, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 13, + "logprob": -0.39331055, + "special": false, + "text": "\n" + }, + { + "id": 13, + "logprob": -0.20532227, + "special": false, + "text": "\n" + }, + { + "id": 23229, + "logprob": -0.5996094, + "special": false, + "text": "Deep" + }, + { + "id": 5168, + "logprob": -0.67578125, + "special": false, + "text": " learning" + }, + { + "id": 349, + "logprob": -0.24951172, + "special": false, + "text": " is" + }, + { + "id": 264, + "logprob": -0.38110352, + "special": false, + "text": " a" + }, + { + "id": 19804, + "logprob": -1.3125, + "special": false, + "text": " subset" + }, + { + "id": 302, + "logprob": -0.009246826, + "special": false, + "text": " of" + }, + { + "id": 5599, + "logprob": -0.13928223, + "special": false, + "text": " machine" + }, + { + "id": 5168, + "logprob": -0.004638672, + "special": false, + "text": " learning" + } + ], + "top_tokens": null + }, + "generated_text": "\n\nDeep learning is a subset of machine learning" +} diff --git a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json index dfdd2cc3..9c4cc84e 100644 --- a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json +++ b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json @@ -1,251 +1,35 @@ { "details": { - "finish_reason": "length", - "generated_tokens": 40, + "finish_reason": "eos_token", + "generated_tokens": 4, "prefill": [], "seed": null, "tokens": [ { - "id": 13, - "logprob": -0.27416992, - "special": false, - "text": "\n" + "id": 1, + "logprob": -0.0008506775, + "special": true, + "text": "" }, { - "id": 13, - "logprob": -0.17016602, + "id": 1877, + "logprob": -0.01953125, "special": false, - "text": "\n" + "text": " check" }, { - "id": 28737, - "logprob": -2.7109375, + "id": 7873, + "logprob": -5.9247017e-05, "special": false, - "text": "I" + "text": " balance" }, { - "id": 28809, - "logprob": -1.5, - "special": false, - "text": "’" - }, - { - "id": 28719, - "logprob": -0.34204102, - "special": false, - "text": "m" - }, - { - "id": 459, - "logprob": -1.6914062, - "special": false, - "text": " not" - }, - { - "id": 1864, - "logprob": -0.69140625, - "special": false, - "text": " sure" - }, - { - "id": 513, - "logprob": -1.6171875, - "special": false, - "text": " if" - }, - { - "id": 315, - "logprob": -1.3837891, - "special": false, - "text": " I" - }, - { - "id": 541, - "logprob": -1.2226562, - "special": false, - "text": " can" - }, - { - "id": 1567, - "logprob": -1.8652344, - "special": false, - "text": " come" - }, - { - "id": 582, - "logprob": -0.0070228577, - "special": false, - "text": " up" - }, - { - "id": 395, - "logprob": -0.0054092407, - "special": false, - "text": " with" - }, - { - "id": 28705, - "logprob": -0.62597656, - "special": false, - "text": " " - }, - { - "id": 28770, - "logprob": -0.0035572052, - "special": false, - "text": "3" - }, - { - "id": 4842, - "logprob": -0.93603516, - "special": false, - "text": " unique" - }, - { - "id": 3085, - "logprob": -0.028411865, - "special": false, - "text": " words" - }, - { - "id": 369, - "logprob": -1.0400391, - "special": false, - "text": " that" - }, - { - "id": 6685, - "logprob": -0.09710693, - "special": false, - "text": " describe" - }, - { - "id": 528, - "logprob": -0.066467285, - "special": false, - "text": " me" - }, - { - "id": 28725, - "logprob": -1.0722656, - "special": false, - "text": "," - }, - { - "id": 562, - "logprob": -0.33422852, - "special": false, - "text": " but" - }, - { - "id": 315, - "logprob": -0.5136719, - "special": false, - "text": " I" - }, - { - "id": 28809, - "logprob": -0.8989258, - "special": false, - "text": "’" - }, - { - "id": 584, - "logprob": -0.2076416, - "special": false, - "text": "ll" - }, - { - "id": 1464, - "logprob": -0.8808594, - "special": false, - "text": " try" - }, - { - "id": 28723, - "logprob": -0.88427734, - "special": false, - "text": "." - }, - { - "id": 13, - "logprob": -0.91064453, - "special": false, - "text": "\n" - }, - { - "id": 13, - "logprob": -0.08105469, - "special": false, - "text": "\n" - }, - { - "id": 28740, - "logprob": -1.8486328, - "special": false, - "text": "1" - }, - { - "id": 28723, - "logprob": -0.111572266, - "special": false, - "text": "." - }, - { - "id": 23626, - "logprob": -3.15625, - "special": false, - "text": " Creative" - }, - { - "id": 13, - "logprob": -0.9194336, - "special": false, - "text": "\n" - }, - { - "id": 28750, - "logprob": -0.24841309, - "special": false, - "text": "2" - }, - { - "id": 28723, - "logprob": -9.393692e-05, - "special": false, - "text": "." - }, - { - "id": 6785, - "logprob": -3.1386719, - "special": false, - "text": " Fun" - }, - { - "id": 1780, - "logprob": -0.53564453, - "special": false, - "text": "ny" - }, - { - "id": 13, - "logprob": -0.09033203, - "special": false, - "text": "\n" - }, - { - "id": 28770, - "logprob": -0.00466156, - "special": false, - "text": "3" - }, - { - "id": 28723, - "logprob": -0.00016450882, - "special": false, - "text": "." + "id": 2, + "logprob": -6.592274e-05, + "special": true, + "text": "" } ] }, - "generated_text": "\n\nI’m not sure if I can come up with 3 unique words that describe me, but I’ll try.\n\n1. Creative\n2. Funny\n3." + "generated_text": " check balance" } diff --git a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json index 91eb5edf..e4777137 100644 --- a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json +++ b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json @@ -13,31 +13,31 @@ }, { "id": 28705, - "logprob": -0.0016384125, + "logprob": -0.001625061, "special": false, "text": " " }, { "id": 1, - "logprob": -1.4931641, + "logprob": -1.4921875, "special": true, "text": "" }, { "id": 28705, - "logprob": -0.00075769424, + "logprob": -0.0007457733, "special": false, "text": " " }, { "id": 28740, - "logprob": -0.25024414, + "logprob": -0.25048828, "special": false, "text": "1" }, { "id": 28740, - "logprob": -0.2631836, + "logprob": -0.26367188, "special": false, "text": "1" }, diff --git a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json deleted file mode 100644 index 8c00dee7..00000000 --- a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json +++ /dev/null @@ -1,251 +0,0 @@ -{ - "details": { - "finish_reason": "length", - "generated_tokens": 40, - "prefill": [], - "seed": null, - "tokens": [ - { - "id": 13, - "logprob": -0.31347656, - "special": false, - "text": "\n" - }, - { - "id": 13, - "logprob": -0.27441406, - "special": false, - "text": "\n" - }, - { - "id": 28737, - "logprob": -2.2285156, - "special": false, - "text": "I" - }, - { - "id": 28809, - "logprob": -1.4677734, - "special": false, - "text": "’" - }, - { - "id": 28719, - "logprob": -0.31762695, - "special": false, - "text": "m" - }, - { - "id": 264, - "logprob": -1.6865234, - "special": false, - "text": " a" - }, - { - "id": 1215, - "logprob": -3.2695312, - "special": false, - "text": " very" - }, - { - "id": 20640, - "logprob": -3.1230469, - "special": false, - "text": " passionate" - }, - { - "id": 1338, - "logprob": -0.48339844, - "special": false, - "text": " person" - }, - { - "id": 28723, - "logprob": -0.9970703, - "special": false, - "text": "." - }, - { - "id": 315, - "logprob": -0.5498047, - "special": false, - "text": " I" - }, - { - "id": 28809, - "logprob": -1.1923828, - "special": false, - "text": "’" - }, - { - "id": 28719, - "logprob": -0.080444336, - "special": false, - "text": "m" - }, - { - "id": 1215, - "logprob": -1.8271484, - "special": false, - "text": " very" - }, - { - "id": 12215, - "logprob": -2.8847656, - "special": false, - "text": " driven" - }, - { - "id": 28723, - "logprob": -1.0927734, - "special": false, - "text": "." - }, - { - "id": 315, - "logprob": -0.4584961, - "special": false, - "text": " I" - }, - { - "id": 28809, - "logprob": -0.5019531, - "special": false, - "text": "’" - }, - { - "id": 28719, - "logprob": -0.030715942, - "special": false, - "text": "m" - }, - { - "id": 1215, - "logprob": -0.96972656, - "special": false, - "text": " very" - }, - { - "id": 7798, - "logprob": -2.8847656, - "special": false, - "text": " determined" - }, - { - "id": 28723, - "logprob": -0.27319336, - "special": false, - "text": "." - }, - { - "id": 13, - "logprob": -0.56396484, - "special": false, - "text": "\n" - }, - { - "id": 13, - "logprob": -0.011016846, - "special": false, - "text": "\n" - }, - { - "id": 3195, - "logprob": -0.7163086, - "special": false, - "text": "What" - }, - { - "id": 349, - "logprob": -1.1611328, - "special": false, - "text": " is" - }, - { - "id": 574, - "logprob": -0.515625, - "special": false, - "text": " your" - }, - { - "id": 6656, - "logprob": -1.0253906, - "special": false, - "text": " favorite" - }, - { - "id": 1970, - "logprob": -2.1738281, - "special": false, - "text": " thing" - }, - { - "id": 684, - "logprob": -0.48364258, - "special": false, - "text": " about" - }, - { - "id": 1250, - "logprob": -1.8876953, - "special": false, - "text": " being" - }, - { - "id": 264, - "logprob": -0.41967773, - "special": false, - "text": " a" - }, - { - "id": 8626, - "logprob": -2.9160156, - "special": false, - "text": " teacher" - }, - { - "id": 28804, - "logprob": -0.11920166, - "special": false, - "text": "?" - }, - { - "id": 13, - "logprob": -0.023727417, - "special": false, - "text": "\n" - }, - { - "id": 13, - "logprob": -0.010848999, - "special": false, - "text": "\n" - }, - { - "id": 28737, - "logprob": -1.0566406, - "special": false, - "text": "I" - }, - { - "id": 2016, - "logprob": -0.7163086, - "special": false, - "text": " love" - }, - { - "id": 272, - "logprob": -1.9169922, - "special": false, - "text": " the" - }, - { - "id": 1639, - "logprob": -2.03125, - "special": false, - "text": " fact" - } - ] - }, - "generated_text": "\n\nI’m a very passionate person. I’m very driven. I’m very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact" -} diff --git a/integration-tests/models/test_lora_mistral.py b/integration-tests/models/test_lora_mistral.py index ccdc1486..ea35fbcb 100644 --- a/integration-tests/models/test_lora_mistral.py +++ b/integration-tests/models/test_lora_mistral.py @@ -10,7 +10,6 @@ def lora_mistral_handle(launcher): "predibase/dbpedia", "predibase/customer_support", ], - cuda_graphs=[0], ) as handle: yield handle @@ -25,9 +24,13 @@ async def lora_mistral(lora_mistral_handle): @pytest.mark.private async def test_lora_mistral(lora_mistral, response_snapshot): response = await lora_mistral.generate( - "Test request", max_new_tokens=10, decoder_input_details=True + "What is Deep Learning?", max_new_tokens=10, decoder_input_details=True + ) + assert ( + response.generated_text == "\n\nDeep learning is a subset of machine learning" ) assert response.details.generated_tokens == 10 + assert response == response_snapshot classification_prompt = """You are given the title and the body of an article below. Please determine the type of the article.\n### Title: Great White Whale\n\n### Body: Great White Whale is the debut album by the Canadian rock band Secret and Whisper. The album was in the works for about a year and was released on February 12 2008. A music video was shot in Pittsburgh for the album's first single XOXOXO. The album reached number 17 on iTunes's top 100 albums in its first week on sale.\n\n### Article Type:""" @@ -38,7 +41,6 @@ classification_prompt = """You are given the title and the body of an article be async def test_lora_mistral_without_adapter(lora_mistral, response_snapshot): response = requests.post( f"{lora_mistral.base_url}/generate", - headers=lora_mistral.headers, json={ "inputs": classification_prompt, "parameters": { @@ -62,7 +64,6 @@ async def test_lora_mistral_without_adapter(lora_mistral, response_snapshot): async def test_lora_mistral_with_dbpedia_adapter(lora_mistral, response_snapshot): response = requests.post( f"{lora_mistral.base_url}/generate", - headers=lora_mistral.headers, json={ "inputs": classification_prompt, "parameters": { @@ -84,13 +85,11 @@ async def test_lora_mistral_with_dbpedia_adapter(lora_mistral, response_snapshot async def test_lora_mistral_with_customer_support_adapter( lora_mistral, response_snapshot ): - print(lora_mistral.base_url) - print(lora_mistral.headers) + prompt = """Consider the case of a customer contacting the support center.\nThe term "task type" refers to the reason for why the customer contacted support.\n\n### The possible task types are: ### \n- replace card\n- transfer money\n- check balance\n- order checks\n- pay bill\n- reset password\n- schedule appointment\n- get branch hours\n- none of the above\n\nSummarize the issue/question/reason that drove the customer to contact support:\n\n### Transcript: [noise] [noise] [noise] [noise] hello hello hi i'm sorry this this call uh hello this is harper valley national bank my name is dawn how can i help you today hi oh okay my name is jennifer brown and i need to check my account balance if i could [noise] [noise] [noise] [noise] what account would you like to check um [noise] uhm my savings account please [noise] [noise] oh but the way that you're doing one moment hello yeah one moment uh huh no problem [noise] your account balance is eighty two dollars is there anything else i can help you with no i don't think so thank you so much you were very helpful thank you have a good day bye bye [noise] you too \n\n### Task Type:\n\ntest_transcript = """ response = requests.post( f"{lora_mistral.base_url}/generate", - headers=lora_mistral.headers, json={ - "inputs": "What are 3 unique words that describe you?", + "inputs": prompt, "parameters": { "max_new_tokens": 40, "adapter_id": "predibase/customer_support", @@ -101,26 +100,20 @@ async def test_lora_mistral_with_customer_support_adapter( assert response.status_code == 200 data = response.json() - assert ( - data["generated_text"] - == "\n\nI’m not sure if I can come up with 3 unique words that describe me, but I’ll try.\n\n1. Creative\n2. Funny\n3." - ) + from text_generation.types import Response + + print(data) + response = Response(**data) + assert data["generated_text"] == " check balance" assert data == response_snapshot - -@pytest.mark.asyncio -@pytest.mark.private -async def test_lora_mistral_without_customer_support_adapter( - lora_mistral, response_snapshot -): response = requests.post( f"{lora_mistral.base_url}/generate", - headers=lora_mistral.headers, json={ - "inputs": "What are 3 unique words that describe you?", + "inputs": prompt, "parameters": { "max_new_tokens": 40, - "details": True, + # "adapter_id": "predibase/customer_support", }, }, ) @@ -129,6 +122,5 @@ async def test_lora_mistral_without_customer_support_adapter( data = response.json() assert ( data["generated_text"] - == "\n\nI’m a very passionate person. I’m very driven. I’m very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact" + == "\n\n### Transcript: [noise] [noise] [noise] [noise] hello hello hi i'm sorry this this call uh hello this is" ) - assert data == response_snapshot diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 2cdccfe0..a3da0b49 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -1686,8 +1686,8 @@ fn main() -> Result<(), LauncherError> { tracing::warn!("Bitsandbytes is deprecated, use `eetq` instead, which provides better latencies overall and is drop-in in most cases."); } let quantize = args.quantize.or(quantize); - let cuda_graphs = match (&args.cuda_graphs, &quantize) { - (Some(cuda_graphs), _) => cuda_graphs.iter().cloned().filter(|&c| c > 0).collect(), + let cuda_graphs = match (&args.cuda_graphs, &quantize, &args.lora_adapters) { + (Some(cuda_graphs), _, _) => cuda_graphs.iter().cloned().filter(|&c| c > 0).collect(), #[allow(deprecated)] ( None, @@ -1696,14 +1696,19 @@ fn main() -> Result<(), LauncherError> { | Quantization::BitsandbytesNf4 | Quantization::BitsandbytesFp4, ), + _, ) => { tracing::warn!("Bitsandbytes doesn't work with cuda graphs, deactivating them"); vec![] } - (None, Some(Quantization::Exl2)) => { + (None, Some(Quantization::Exl2), _) => { tracing::warn!("Exl2 doesn't work with cuda graphs, deactivating them"); vec![] } + (None, _, Some(_lora_adapter)) => { + tracing::warn!("Lora adapters do no work with cuda graphs, deactivating them"); + vec![] + } _ => { let cuda_graphs = vec![1, 2, 4, 8, 16, 32]; tracing::info!("Using default cuda graphs {cuda_graphs:?}");