diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index 8f59d75a..0b2fabf5 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -232,9 +232,9 @@ def launcher(event_loop):
 
         if num_shard is not None:
             args.extend(["--num-shard", str(num_shard)])
-        if quantize:
+        if quantize is not None:
             args.append("--quantize")
-            args.append("bitsandbytes")
+            args.append(quantize)
         if trust_remote_code:
             args.append("--trust-remote-code")
 
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json
new file mode 100644
index 00000000..fe632aaf
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json
@@ -0,0 +1,103 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 1724,
+        "logprob": -9.953125,
+        "text": "What"
+      },
+      {
+        "id": 338,
+        "logprob": -1.4121094,
+        "text": "is"
+      },
+      {
+        "id": 6483,
+        "logprob": -9.9765625,
+        "text": "deep"
+      },
+      {
+        "id": 6509,
+        "logprob": -1.6767578,
+        "text": "learning"
+      },
+      {
+        "id": 1577,
+        "logprob": -4.5976562,
+        "text": "?"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": -0.21813965,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 2772,
+        "logprob": -1.4130859,
+        "special": false,
+        "text": "De"
+      },
+      {
+        "id": 1022,
+        "logprob": -0.0028419495,
+        "special": false,
+        "text": "ep"
+      },
+      {
+        "id": 6509,
+        "logprob": -0.3244629,
+        "special": false,
+        "text": " learning"
+      },
+      {
+        "id": 338,
+        "logprob": -0.25439453,
+        "special": false,
+        "text": " is"
+      },
+      {
+        "id": 263,
+        "logprob": -0.43774414,
+        "special": false,
+        "text": " a"
+      },
+      {
+        "id": 4933,
+        "logprob": -1.8105469,
+        "special": false,
+        "text": " machine"
+      },
+      {
+        "id": 6509,
+        "logprob": -0.07116699,
+        "special": false,
+        "text": " learning"
+      },
+      {
+        "id": 11043,
+        "logprob": -0.87158203,
+        "special": false,
+        "text": " technique"
+      },
+      {
+        "id": 393,
+        "logprob": -0.91015625,
+        "special": false,
+        "text": " that"
+      }
+    ]
+  },
+  "generated_text": "\nDeep learning is a machine learning technique that"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
new file mode 100644
index 00000000..2b5d4257
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
@@ -0,0 +1,98 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 338,
+        "logprob": -10.8046875,
+        "text": "is"
+      },
+      {
+        "id": 6483,
+        "logprob": -12.6640625,
+        "text": "deep"
+      },
+      {
+        "id": 6509,
+        "logprob": -3.3398438,
+        "text": "learning"
+      },
+      {
+        "id": 1577,
+        "logprob": -8.3828125,
+        "text": "?"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": 0.0,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 4013,
+        "logprob": -2.6992188,
+        "special": false,
+        "text": "This"
+      },
+      {
+        "id": 1139,
+        "logprob": -0.35668945,
+        "special": false,
+        "text": " question"
+      },
+      {
+        "id": 756,
+        "logprob": -0.08251953,
+        "special": false,
+        "text": " has"
+      },
+      {
+        "id": 1063,
+        "logprob": -0.39697266,
+        "special": false,
+        "text": " been"
+      },
+      {
+        "id": 4433,
+        "logprob": 0.0,
+        "special": false,
+        "text": " asked"
+      },
+      {
+        "id": 1784,
+        "logprob": -0.9248047,
+        "special": false,
+        "text": " many"
+      },
+      {
+        "id": 3064,
+        "logprob": 0.0,
+        "special": false,
+        "text": " times"
+      },
+      {
+        "id": 1434,
+        "logprob": -0.90625,
+        "special": false,
+        "text": " before"
+      },
+      {
+        "id": 29892,
+        "logprob": -0.19580078,
+        "special": false,
+        "text": ","
+      }
+    ]
+  },
+  "generated_text": "What is deep learning ?\nThis question has been asked many times before,"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json
new file mode 100644
index 00000000..36c040fe
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json
@@ -0,0 +1,414 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -9.953125,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.4121094,
+          "text": "is"
+        },
+        {
+          "id": 6483,
+          "logprob": -9.9765625,
+          "text": "deep"
+        },
+        {
+          "id": 6509,
+          "logprob": -1.6767578,
+          "text": "learning"
+        },
+        {
+          "id": 1577,
+          "logprob": -4.5976562,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -0.21813965,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2772,
+          "logprob": -1.4189453,
+          "special": false,
+          "text": "De"
+        },
+        {
+          "id": 1022,
+          "logprob": -0.0028419495,
+          "special": false,
+          "text": "ep"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.3244629,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 338,
+          "logprob": -0.25439453,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 263,
+          "logprob": -0.4375,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 4933,
+          "logprob": -1.8105469,
+          "special": false,
+          "text": " machine"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.07116699,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 11043,
+          "logprob": -0.87158203,
+          "special": false,
+          "text": " technique"
+        },
+        {
+          "id": 393,
+          "logprob": -0.91015625,
+          "special": false,
+          "text": " that"
+        }
+      ]
+    },
+    "generated_text": "\nDeep learning is a machine learning technique that"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -9.953125,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.4121094,
+          "text": "is"
+        },
+        {
+          "id": 6483,
+          "logprob": -9.9765625,
+          "text": "deep"
+        },
+        {
+          "id": 6509,
+          "logprob": -1.6767578,
+          "text": "learning"
+        },
+        {
+          "id": 1577,
+          "logprob": -4.5976562,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -0.21813965,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2772,
+          "logprob": -1.4189453,
+          "special": false,
+          "text": "De"
+        },
+        {
+          "id": 1022,
+          "logprob": -0.0028419495,
+          "special": false,
+          "text": "ep"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.3244629,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 338,
+          "logprob": -0.25439453,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 263,
+          "logprob": -0.4375,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 4933,
+          "logprob": -1.8105469,
+          "special": false,
+          "text": " machine"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.07116699,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 11043,
+          "logprob": -0.87158203,
+          "special": false,
+          "text": " technique"
+        },
+        {
+          "id": 393,
+          "logprob": -0.91015625,
+          "special": false,
+          "text": " that"
+        }
+      ]
+    },
+    "generated_text": "\nDeep learning is a machine learning technique that"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -9.953125,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.4121094,
+          "text": "is"
+        },
+        {
+          "id": 6483,
+          "logprob": -9.9765625,
+          "text": "deep"
+        },
+        {
+          "id": 6509,
+          "logprob": -1.6767578,
+          "text": "learning"
+        },
+        {
+          "id": 1577,
+          "logprob": -4.5976562,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -0.21813965,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2772,
+          "logprob": -1.4189453,
+          "special": false,
+          "text": "De"
+        },
+        {
+          "id": 1022,
+          "logprob": -0.0028419495,
+          "special": false,
+          "text": "ep"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.3244629,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 338,
+          "logprob": -0.25439453,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 263,
+          "logprob": -0.4375,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 4933,
+          "logprob": -1.8105469,
+          "special": false,
+          "text": " machine"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.07116699,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 11043,
+          "logprob": -0.87158203,
+          "special": false,
+          "text": " technique"
+        },
+        {
+          "id": 393,
+          "logprob": -0.91015625,
+          "special": false,
+          "text": " that"
+        }
+      ]
+    },
+    "generated_text": "\nDeep learning is a machine learning technique that"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -9.953125,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.4121094,
+          "text": "is"
+        },
+        {
+          "id": 6483,
+          "logprob": -9.9765625,
+          "text": "deep"
+        },
+        {
+          "id": 6509,
+          "logprob": -1.6767578,
+          "text": "learning"
+        },
+        {
+          "id": 1577,
+          "logprob": -4.5976562,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -0.21813965,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2772,
+          "logprob": -1.4189453,
+          "special": false,
+          "text": "De"
+        },
+        {
+          "id": 1022,
+          "logprob": -0.0028419495,
+          "special": false,
+          "text": "ep"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.3244629,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 338,
+          "logprob": -0.25439453,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 263,
+          "logprob": -0.4375,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 4933,
+          "logprob": -1.8105469,
+          "special": false,
+          "text": " machine"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.07116699,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 11043,
+          "logprob": -0.87158203,
+          "special": false,
+          "text": " technique"
+        },
+        {
+          "id": 393,
+          "logprob": -0.91015625,
+          "special": false,
+          "text": " that"
+        }
+      ]
+    },
+    "generated_text": "\nDeep learning is a machine learning technique that"
+  }
+]
diff --git a/integration-tests/models/test_flash_llama_gptq.py b/integration-tests/models/test_flash_llama_gptq.py
new file mode 100644
index 00000000..577b94d9
--- /dev/null
+++ b/integration-tests/models/test_flash_llama_gptq.py
@@ -0,0 +1,58 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_llama_gptq_handle(launcher):
+    with launcher("huggingface/llama-7b-gptq", num_shard=2, quantize="gptq") as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_llama_gptq(flash_llama_gptq_handle):
+    await flash_llama_gptq_handle.health(300)
+    return flash_llama_gptq_handle.client
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_gptq(flash_llama_gptq, response_snapshot):
+    response = await flash_llama_gptq.generate(
+        "What is deep learning ?", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_gptq_all_params(flash_llama_gptq, response_snapshot):
+    response = await flash_llama_gptq.generate(
+        "What is deep learning ?",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["test"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_gptq_load(flash_llama_gptq, generate_load, response_snapshot):
+    responses = await generate_load(flash_llama_gptq, "What is deep learning ?", max_new_tokens=10, n=4)
+
+    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
+
+    assert responses == response_snapshot