From b4654a36dc9622cba12351118050b2d5cdb604c3 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 13 Sep 2024 21:48:12 +0200
Subject: [PATCH] Fixing up the tests ?

---
 .../test_lora_mistral/test_lora_mistral.json  | 104 ++++++++
 ...mistral_with_customer_support_adapter.json | 250 ++---------------
 ...est_lora_mistral_with_dbpedia_adapter.json |  10 +-
 ...tral_without_customer_support_adapter.json | 251 ------------------
 integration-tests/models/test_lora_mistral.py |  38 ++-
 launcher/src/main.rs                          |  11 +-
 6 files changed, 149 insertions(+), 515 deletions(-)
 create mode 100644 integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral.json
 delete mode 100644 integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json
diff --git a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral.json b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral.json
new file mode 100644
index 00000000..c7ac0fff
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral.json
@@ -0,0 +1,104 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 1824,
+        "logprob": -6.0625,
+        "text": "What"
+      },
+      {
+        "id": 349,
+        "logprob": -1.4746094,
+        "text": "is"
+      },
+      {
+        "id": 14972,
+        "logprob": -8.7578125,
+        "text": "Deep"
+      },
+      {
+        "id": 17504,
+        "logprob": -2.125,
+        "text": "Learning"
+      },
+      {
+        "id": 28804,
+        "logprob": -0.32128906,
+        "text": "?"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": -0.39331055,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 13,
+        "logprob": -0.20532227,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 23229,
+        "logprob": -0.5996094,
+        "special": false,
+        "text": "Deep"
+      },
+      {
+        "id": 5168,
+        "logprob": -0.67578125,
+        "special": false,
+        "text": " learning"
+      },
+      {
+        "id": 349,
+        "logprob": -0.24951172,
+        "special": false,
+        "text": " is"
+      },
+      {
+        "id": 264,
+        "logprob": -0.38110352,
+        "special": false,
+        "text": " a"
+      },
+      {
+        "id": 19804,
+        "logprob": -1.3125,
+        "special": false,
+        "text": " subset"
+      },
+      {
+        "id": 302,
+        "logprob": -0.009246826,
+        "special": false,
+        "text": " of"
+      },
+      {
+        "id": 5599,
+        "logprob": -0.13928223,
+        "special": false,
+        "text": " machine"
+      },
+      {
+        "id": 5168,
+        "logprob": -0.004638672,
+        "special": false,
+        "text": " learning"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "\n\nDeep learning is a subset of machine learning"
+}
diff --git a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json
index dfdd2cc3..9c4cc84e 100644
--- a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json
+++ b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json
@@ -1,251 +1,35 @@
 {
   "details": {
-    "finish_reason": "length",
-    "generated_tokens": 40,
+    "finish_reason": "eos_token",
+    "generated_tokens": 4,
     "prefill": [],
     "seed": null,
     "tokens": [
       {
-        "id": 13,
-        "logprob": -0.27416992,
-        "special": false,
-        "text": "\n"
+        "id": 1,
+        "logprob": -0.0008506775,
+        "special": true,
+        "text": "<s>"
       },
       {
-        "id": 13,
-        "logprob": -0.17016602,
+        "id": 1877,
+        "logprob": -0.01953125,
         "special": false,
-        "text": "\n"
+        "text": " check"
       },
       {
-        "id": 28737,
-        "logprob": -2.7109375,
+        "id": 7873,
+        "logprob": -5.9247017e-05,
         "special": false,
-        "text": "I"
+        "text": " balance"
       },
       {
-        "id": 28809,
-        "logprob": -1.5,
-        "special": false,
-        "text": "’"
-      },
-      {
-        "id": 28719,
-        "logprob": -0.34204102,
-        "special": false,
-        "text": "m"
-      },
-      {
-        "id": 459,
-        "logprob": -1.6914062,
-        "special": false,
-        "text": " not"
-      },
-      {
-        "id": 1864,
-        "logprob": -0.69140625,
-        "special": false,
-        "text": " sure"
-      },
-      {
-        "id": 513,
-        "logprob": -1.6171875,
-        "special": false,
-        "text": " if"
-      },
-      {
-        "id": 315,
-        "logprob": -1.3837891,
-        "special": false,
-        "text": " I"
-      },
-      {
-        "id": 541,
-        "logprob": -1.2226562,
-        "special": false,
-        "text": " can"
-      },
-      {
-        "id": 1567,
-        "logprob": -1.8652344,
-        "special": false,
-        "text": " come"
-      },
-      {
-        "id": 582,
-        "logprob": -0.0070228577,
-        "special": false,
-        "text": " up"
-      },
-      {
-        "id": 395,
-        "logprob": -0.0054092407,
-        "special": false,
-        "text": " with"
-      },
-      {
-        "id": 28705,
-        "logprob": -0.62597656,
-        "special": false,
-        "text": " "
-      },
-      {
-        "id": 28770,
-        "logprob": -0.0035572052,
-        "special": false,
-        "text": "3"
-      },
-      {
-        "id": 4842,
-        "logprob": -0.93603516,
-        "special": false,
-        "text": " unique"
-      },
-      {
-        "id": 3085,
-        "logprob": -0.028411865,
-        "special": false,
-        "text": " words"
-      },
-      {
-        "id": 369,
-        "logprob": -1.0400391,
-        "special": false,
-        "text": " that"
-      },
-      {
-        "id": 6685,
-        "logprob": -0.09710693,
-        "special": false,
-        "text": " describe"
-      },
-      {
-        "id": 528,
-        "logprob": -0.066467285,
-        "special": false,
-        "text": " me"
-      },
-      {
-        "id": 28725,
-        "logprob": -1.0722656,
-        "special": false,
-        "text": ","
-      },
-      {
-        "id": 562,
-        "logprob": -0.33422852,
-        "special": false,
-        "text": " but"
-      },
-      {
-        "id": 315,
-        "logprob": -0.5136719,
-        "special": false,
-        "text": " I"
-      },
-      {
-        "id": 28809,
-        "logprob": -0.8989258,
-        "special": false,
-        "text": "’"
-      },
-      {
-        "id": 584,
-        "logprob": -0.2076416,
-        "special": false,
-        "text": "ll"
-      },
-      {
-        "id": 1464,
-        "logprob": -0.8808594,
-        "special": false,
-        "text": " try"
-      },
-      {
-        "id": 28723,
-        "logprob": -0.88427734,
-        "special": false,
-        "text": "."
-      },
-      {
-        "id": 13,
-        "logprob": -0.91064453,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 13,
-        "logprob": -0.08105469,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 28740,
-        "logprob": -1.8486328,
-        "special": false,
-        "text": "1"
-      },
-      {
-        "id": 28723,
-        "logprob": -0.111572266,
-        "special": false,
-        "text": "."
-      },
-      {
-        "id": 23626,
-        "logprob": -3.15625,
-        "special": false,
-        "text": " Creative"
-      },
-      {
-        "id": 13,
-        "logprob": -0.9194336,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 28750,
-        "logprob": -0.24841309,
-        "special": false,
-        "text": "2"
-      },
-      {
-        "id": 28723,
-        "logprob": -9.393692e-05,
-        "special": false,
-        "text": "."
-      },
-      {
-        "id": 6785,
-        "logprob": -3.1386719,
-        "special": false,
-        "text": " Fun"
-      },
-      {
-        "id": 1780,
-        "logprob": -0.53564453,
-        "special": false,
-        "text": "ny"
-      },
-      {
-        "id": 13,
-        "logprob": -0.09033203,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 28770,
-        "logprob": -0.00466156,
-        "special": false,
-        "text": "3"
-      },
-      {
-        "id": 28723,
-        "logprob": -0.00016450882,
-        "special": false,
-        "text": "."
+        "id": 2,
+        "logprob": -6.592274e-05,
+        "special": true,
+        "text": "</s>"
       }
     ]
   },
-  "generated_text": "\n\nI’m not sure if I can come up with 3 unique words that describe me, but I’ll try.\n\n1. Creative\n2. Funny\n3."
+  "generated_text": " check balance"
 }
diff --git a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json
index 91eb5edf..e4777137 100644
--- a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json
+++ b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json
@@ -13,31 +13,31 @@
       },
       {
         "id": 28705,
-        "logprob": -0.0016384125,
+        "logprob": -0.001625061,
         "special": false,
         "text": " "
       },
       {
         "id": 1,
-        "logprob": -1.4931641,
+        "logprob": -1.4921875,
         "special": true,
         "text": "<s>"
       },
       {
         "id": 28705,
-        "logprob": -0.00075769424,
+        "logprob": -0.0007457733,
         "special": false,
         "text": " "
       },
       {
         "id": 28740,
-        "logprob": -0.25024414,
+        "logprob": -0.25048828,
         "special": false,
         "text": "1"
       },
       {
         "id": 28740,
-        "logprob": -0.2631836,
+        "logprob": -0.26367188,
         "special": false,
         "text": "1"
       },
diff --git a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json b/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json
deleted file mode 100644
index 8c00dee7..00000000
--- a/integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json
+++ /dev/null
@@ -1,251 +0,0 @@
-{
-  "details": {
-    "finish_reason": "length",
-    "generated_tokens": 40,
-    "prefill": [],
-    "seed": null,
-    "tokens": [
-      {
-        "id": 13,
-        "logprob": -0.31347656,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 13,
-        "logprob": -0.27441406,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 28737,
-        "logprob": -2.2285156,
-        "special": false,
-        "text": "I"
-      },
-      {
-        "id": 28809,
-        "logprob": -1.4677734,
-        "special": false,
-        "text": "’"
-      },
-      {
-        "id": 28719,
-        "logprob": -0.31762695,
-        "special": false,
-        "text": "m"
-      },
-      {
-        "id": 264,
-        "logprob": -1.6865234,
-        "special": false,
-        "text": " a"
-      },
-      {
-        "id": 1215,
-        "logprob": -3.2695312,
-        "special": false,
-        "text": " very"
-      },
-      {
-        "id": 20640,
-        "logprob": -3.1230469,
-        "special": false,
-        "text": " passionate"
-      },
-      {
-        "id": 1338,
-        "logprob": -0.48339844,
-        "special": false,
-        "text": " person"
-      },
-      {
-        "id": 28723,
-        "logprob": -0.9970703,
-        "special": false,
-        "text": "."
-      },
-      {
-        "id": 315,
-        "logprob": -0.5498047,
-        "special": false,
-        "text": " I"
-      },
-      {
-        "id": 28809,
-        "logprob": -1.1923828,
-        "special": false,
-        "text": "’"
-      },
-      {
-        "id": 28719,
-        "logprob": -0.080444336,
-        "special": false,
-        "text": "m"
-      },
-      {
-        "id": 1215,
-        "logprob": -1.8271484,
-        "special": false,
-        "text": " very"
-      },
-      {
-        "id": 12215,
-        "logprob": -2.8847656,
-        "special": false,
-        "text": " driven"
-      },
-      {
-        "id": 28723,
-        "logprob": -1.0927734,
-        "special": false,
-        "text": "."
-      },
-      {
-        "id": 315,
-        "logprob": -0.4584961,
-        "special": false,
-        "text": " I"
-      },
-      {
-        "id": 28809,
-        "logprob": -0.5019531,
-        "special": false,
-        "text": "’"
-      },
-      {
-        "id": 28719,
-        "logprob": -0.030715942,
-        "special": false,
-        "text": "m"
-      },
-      {
-        "id": 1215,
-        "logprob": -0.96972656,
-        "special": false,
-        "text": " very"
-      },
-      {
-        "id": 7798,
-        "logprob": -2.8847656,
-        "special": false,
-        "text": " determined"
-      },
-      {
-        "id": 28723,
-        "logprob": -0.27319336,
-        "special": false,
-        "text": "."
-      },
-      {
-        "id": 13,
-        "logprob": -0.56396484,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 13,
-        "logprob": -0.011016846,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 3195,
-        "logprob": -0.7163086,
-        "special": false,
-        "text": "What"
-      },
-      {
-        "id": 349,
-        "logprob": -1.1611328,
-        "special": false,
-        "text": " is"
-      },
-      {
-        "id": 574,
-        "logprob": -0.515625,
-        "special": false,
-        "text": " your"
-      },
-      {
-        "id": 6656,
-        "logprob": -1.0253906,
-        "special": false,
-        "text": " favorite"
-      },
-      {
-        "id": 1970,
-        "logprob": -2.1738281,
-        "special": false,
-        "text": " thing"
-      },
-      {
-        "id": 684,
-        "logprob": -0.48364258,
-        "special": false,
-        "text": " about"
-      },
-      {
-        "id": 1250,
-        "logprob": -1.8876953,
-        "special": false,
-        "text": " being"
-      },
-      {
-        "id": 264,
-        "logprob": -0.41967773,
-        "special": false,
-        "text": " a"
-      },
-      {
-        "id": 8626,
-        "logprob": -2.9160156,
-        "special": false,
-        "text": " teacher"
-      },
-      {
-        "id": 28804,
-        "logprob": -0.11920166,
-        "special": false,
-        "text": "?"
-      },
-      {
-        "id": 13,
-        "logprob": -0.023727417,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 13,
-        "logprob": -0.010848999,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 28737,
-        "logprob": -1.0566406,
-        "special": false,
-        "text": "I"
-      },
-      {
-        "id": 2016,
-        "logprob": -0.7163086,
-        "special": false,
-        "text": " love"
-      },
-      {
-        "id": 272,
-        "logprob": -1.9169922,
-        "special": false,
-        "text": " the"
-      },
-      {
-        "id": 1639,
-        "logprob": -2.03125,
-        "special": false,
-        "text": " fact"
-      }
-    ]
-  },
-  "generated_text": "\n\nI’m a very passionate person. I’m very driven. I’m very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact"
-}
diff --git a/integration-tests/models/test_lora_mistral.py b/integration-tests/models/test_lora_mistral.py
index ccdc1486..ea35fbcb 100644
--- a/integration-tests/models/test_lora_mistral.py
+++ b/integration-tests/models/test_lora_mistral.py
@@ -10,7 +10,6 @@ def lora_mistral_handle(launcher):
             "predibase/dbpedia",
             "predibase/customer_support",
         ],
-        cuda_graphs=[0],
     ) as handle:
         yield handle
 
@@ -25,9 +24,13 @@ async def lora_mistral(lora_mistral_handle):
 @pytest.mark.private
 async def test_lora_mistral(lora_mistral, response_snapshot):
     response = await lora_mistral.generate(
-        "Test request", max_new_tokens=10, decoder_input_details=True
+        "What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
+    )
+    assert (
+        response.generated_text == "\n\nDeep learning is a subset of machine learning"
     )
     assert response.details.generated_tokens == 10
+    assert response == response_snapshot
 
 
 classification_prompt = """You are given the title and the body of an article below. Please determine the type of the article.\n### Title: Great White Whale\n\n### Body: Great White Whale is the debut album by the Canadian rock band Secret and Whisper. The album was in the works for about a year and was released on February 12 2008. A music video was shot in Pittsburgh for the album's first single XOXOXO. The album reached number 17 on iTunes's top 100 albums in its first week on sale.\n\n### Article Type:"""
@@ -38,7 +41,6 @@ classification_prompt = """You are given the title and the body of an article be
 async def test_lora_mistral_without_adapter(lora_mistral, response_snapshot):
     response = requests.post(
         f"{lora_mistral.base_url}/generate",
-        headers=lora_mistral.headers,
         json={
             "inputs": classification_prompt,
             "parameters": {
@@ -62,7 +64,6 @@ async def test_lora_mistral_without_adapter(lora_mistral, response_snapshot):
 async def test_lora_mistral_with_dbpedia_adapter(lora_mistral, response_snapshot):
     response = requests.post(
         f"{lora_mistral.base_url}/generate",
-        headers=lora_mistral.headers,
         json={
             "inputs": classification_prompt,
             "parameters": {
@@ -84,13 +85,11 @@ async def test_lora_mistral_with_dbpedia_adapter(lora_mistral, response_snapshot
 async def test_lora_mistral_with_customer_support_adapter(
     lora_mistral, response_snapshot
 ):
-    print(lora_mistral.base_url)
-    print(lora_mistral.headers)
+    prompt = """Consider the case of a customer contacting the support center.\nThe term "task type" refers to the reason for why the customer contacted support.\n\n### The possible task types are: ### \n- replace card\n- transfer money\n- check balance\n- order checks\n- pay bill\n- reset password\n- schedule appointment\n- get branch hours\n- none of the above\n\nSummarize the issue/question/reason that drove the customer to contact support:\n\n### Transcript: [noise] [noise] [noise] [noise] hello hello hi i'm sorry this this call uh hello this is harper valley national bank my name is dawn how can i help you today hi oh okay my name is jennifer brown and i need to check my account balance if i could [noise] [noise] [noise] [noise] what account would you like to check um [noise] uhm my savings account please [noise] [noise] oh but the way that you're doing one moment hello yeah one moment uh huh no problem [noise] your account balance is eighty two dollars is there anything else i can help you with no i don't think so thank you so much you were very helpful thank you have a good day bye bye [noise] you too \n\n### Task Type:\n\ntest_transcript = """
     response = requests.post(
         f"{lora_mistral.base_url}/generate",
-        headers=lora_mistral.headers,
         json={
-            "inputs": "What are 3 unique words that describe you?",
+            "inputs": prompt,
             "parameters": {
                 "max_new_tokens": 40,
                 "adapter_id": "predibase/customer_support",
@@ -101,26 +100,20 @@ async def test_lora_mistral_with_customer_support_adapter(
 
     assert response.status_code == 200
     data = response.json()
-    assert (
-        data["generated_text"]
-        == "\n\nI’m not sure if I can come up with 3 unique words that describe me, but I’ll try.\n\n1. Creative\n2. Funny\n3."
-    )
+    from text_generation.types import Response
+
+    print(data)
+    response = Response(**data)
+    assert data["generated_text"] == " check balance"
     assert data == response_snapshot
 
-
-@pytest.mark.asyncio
-@pytest.mark.private
-async def test_lora_mistral_without_customer_support_adapter(
-    lora_mistral, response_snapshot
-):
     response = requests.post(
         f"{lora_mistral.base_url}/generate",
-        headers=lora_mistral.headers,
         json={
-            "inputs": "What are 3 unique words that describe you?",
+            "inputs": prompt,
             "parameters": {
                 "max_new_tokens": 40,
-                "details": True,
+                # "adapter_id": "predibase/customer_support",
             },
         },
     )
@@ -129,6 +122,5 @@ async def test_lora_mistral_without_customer_support_adapter(
     data = response.json()
     assert (
         data["generated_text"]
-        == "\n\nI’m a very passionate person. I’m very driven. I’m very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact"
+        == "\n\n### Transcript: [noise] [noise] [noise] [noise] hello hello hi i'm sorry this this call uh hello this is"
     )
-    assert data == response_snapshot
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 2cdccfe0..a3da0b49 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -1686,8 +1686,8 @@ fn main() -> Result<(), LauncherError> {
         tracing::warn!("Bitsandbytes is deprecated, use `eetq` instead, which provides better latencies overall and is drop-in in most cases.");
     }
     let quantize = args.quantize.or(quantize);
-    let cuda_graphs = match (&args.cuda_graphs, &quantize) {
-        (Some(cuda_graphs), _) => cuda_graphs.iter().cloned().filter(|&c| c > 0).collect(),
+    let cuda_graphs = match (&args.cuda_graphs, &quantize, &args.lora_adapters) {
+        (Some(cuda_graphs), _, _) => cuda_graphs.iter().cloned().filter(|&c| c > 0).collect(),
         #[allow(deprecated)]
         (
             None,
@@ -1696,14 +1696,19 @@ fn main() -> Result<(), LauncherError> {
                 | Quantization::BitsandbytesNf4
                 | Quantization::BitsandbytesFp4,
             ),
+            _,
         ) => {
             tracing::warn!("Bitsandbytes doesn't work with cuda graphs, deactivating them");
             vec![]
         }
-        (None, Some(Quantization::Exl2)) => {
+        (None, Some(Quantization::Exl2), _) => {
             tracing::warn!("Exl2 doesn't work with cuda graphs, deactivating them");
             vec![]
         }
+        (None, _, Some(_lora_adapter)) => {
+            tracing::warn!("Lora adapters do no work with cuda graphs, deactivating them");
+            vec![]
+        }
         _ => {
             let cuda_graphs = vec![1, 2, 4, 8, 16, 32];
             tracing::info!("Using default cuda graphs {cuda_graphs:?}");