Auto max prefill (#2797)

* Attempt at automatic max batch prefill. * Taking into account number of shards. * Adding more cards. * Adding A100 + H100 * Adding a few more cards. * Logprobs cost too much. * h100 better name, and keep factor of 2 * Damn inflated sparse tflops. * Typo in h100. * Updated the flops calculation (checked with fvcore). * chunking by default. * Fix prefix caching for chat completion since we removed logprobs. * More tests. * Dropping all the prefill logprobs. * Add a flag that enables users to get logprobs back. * Repairing prompt token counting. * Fixing a few tests. * Remove some scaffolding. * Attempting to reduces the issues (workarounds for now).
2024-12-06 10:22:00 +05:30 · 2024-12-06 10:22:00 +05:30 · 5df8059037
parent 8c3669b287
commit 5df8059037
130 changed files with 756 additions and 74544 deletions
--- a/docs/source/reference/launcher.md
+++ b/docs/source/reference/launcher.md
@ -467,6 +467,16 @@ Options:
          [env: PAYLOAD_LIMIT=]
          [default: 2000000]

+```
+## ENABLE_PREFILL_LOGPROBS
+```shell
+      --enable-prefill-logprobs
+          Enables prefill logprobs
+          
+          Logprobs in the prompt are deactivated by default because they consume a large amount of VRAM (especially for long prompts). Using this flag reallows users to ask for them.
+          
+          [env: ENABLE_PREFILL_LOGPROBS=]
+
 ```
 ## HELP
 ```shell
--- a/integration-tests/models/snapshots/test.py
+++ b/integration-tests/models/snapshots/test.py
@ -0,0 +1,22 @@
+import os
+import json
+
+
+for root, dirs, files in os.walk("."):
+    for filename in files:
+        if filename.endswith(".json"):
+            with open(os.path.join(root, filename), "r") as f:
+                data = json.load(f)
+
+            print(os.path.join(root, filename))
+            try:
+                if filename.endswith("_load.json"):
+                    for i in range(len(data)):
+                        data[i]["details"]["prefill"] = []
+                else:
+                    data["details"]["prefill"] = []
+            except Exception:
+                pass
+
+            with open(os.path.join(root, filename), "w") as f:
+                json.dump(data, f, indent=2, ensure_ascii=False)
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 3923,
-        "logprob": -6.3867188,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -1.1318359,
-        "text": " is"
-      },
-      {
-        "id": 5655,
-        "logprob": -9.6875,
-        "text": " deep"
-      },
-      {
-        "id": 6975,
-        "logprob": -1.3007812,
-        "text": " learning"
-      },
-      {
-        "id": 30,
-        "logprob": -2.4902344,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int_all_params.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 3923,
-        "logprob": -6.3867188,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -1.1318359,
-        "text": " is"
-      },
-      {
-        "id": 5655,
-        "logprob": -9.6875,
-        "text": " deep"
-      },
-      {
-        "id": 6975,
-        "logprob": -1.3007812,
-        "text": " learning"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int_load.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -6.3867188,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -1.1318359,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -9.6875,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.3007812,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.4902344,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -6.3867188,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -1.1318359,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -9.6875,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.3007812,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.4902344,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -6.3867188,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -1.1318359,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -9.6875,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.3007812,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.4902344,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -6.3867188,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -1.1318359,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -9.6875,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.3007812,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.4902344,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 3838,
-        "logprob": null,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -8.59375,
-        "text": " is"
-      },
-      {
-        "id": 5538,
-        "logprob": -10.921875,
-        "text": " deep"
-      },
-      {
-        "id": 6832,
-        "logprob": -0.56347656,
-        "text": " learning"
-      },
-      {
-        "id": 30,
-        "logprob": -1.5,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight_all_params.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight_all_params.json
@ -3,28 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 3838,
-        "logprob": null,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -8.59375,
-        "text": " is"
-      },
-      {
-        "id": 5538,
-        "logprob": -10.921875,
-        "text": " deep"
-      },
-      {
-        "id": 6832,
-        "logprob": -0.56347656,
-        "text": " learning"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight_load.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight_load.json
@ -4,33 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 3838,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -8.59375,
-          "text": " is"
-        },
-        {
-          "id": 5538,
-          "logprob": -10.921875,
-          "text": " deep"
-        },
-        {
-          "id": 6832,
-          "logprob": -0.56347656,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -1.5,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -103,33 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 3838,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -8.59375,
-          "text": " is"
-        },
-        {
-          "id": 5538,
-          "logprob": -10.921875,
-          "text": " deep"
-        },
-        {
-          "id": 6832,
-          "logprob": -0.56347656,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -1.5,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -202,33 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 3838,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -8.59375,
-          "text": " is"
-        },
-        {
-          "id": 5538,
-          "logprob": -10.921875,
-          "text": " deep"
-        },
-        {
-          "id": 6832,
-          "logprob": -0.56347656,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -1.5,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -301,33 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 3838,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -8.59375,
-          "text": " is"
-        },
-        {
-          "id": 5538,
-          "logprob": -10.921875,
-          "text": " deep"
-        },
-        {
-          "id": 6832,
-          "logprob": -0.56347656,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -1.5,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 3923,
-        "logprob": -7.609375,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -0.92529297,
-        "text": " is"
-      },
-      {
-        "id": 5655,
-        "logprob": -10.0,
-        "text": " deep"
-      },
-      {
-        "id": 6975,
-        "logprob": -0.94628906,
-        "text": " learning"
-      },
-      {
-        "id": 30,
-        "logprob": -2.9042969,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an_all_params.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 3923,
-        "logprob": -7.609375,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -0.92529297,
-        "text": " is"
-      },
-      {
-        "id": 5655,
-        "logprob": -10.0,
-        "text": " deep"
-      },
-      {
-        "id": 6975,
-        "logprob": -0.94628906,
-        "text": " learning"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an_load.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -7.609375,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.92529297,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -10.0,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -0.94628906,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.9042969,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -7.6054688,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.92089844,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -10.0,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -0.94433594,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.90625,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -7.6054688,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.92089844,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -10.0,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -0.94433594,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.90625,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -7.6054688,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.92089844,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -10.0,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -0.94433594,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.90625,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2,
-        "logprob": null,
-        "text": "<bos>"
-      },
-      {
-        "id": 1841,
-        "logprob": -5.46875,
-        "text": "What"
-      },
-      {
-        "id": 603,
-        "logprob": -0.69140625,
-        "text": " is"
-      },
-      {
-        "id": 5271,
-        "logprob": -12.0,
-        "text": " deep"
-      },
-      {
-        "id": 6044,
-        "logprob": -0.32226562,
-        "text": " learning"
-      },
-      {
-        "id": 235336,
-        "logprob": -0.33203125,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16_all_params.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2,
-        "logprob": null,
-        "text": "<bos>"
-      },
-      {
-        "id": 1841,
-        "logprob": -5.46875,
-        "text": "What"
-      },
-      {
-        "id": 603,
-        "logprob": -0.69140625,
-        "text": " is"
-      },
-      {
-        "id": 5271,
-        "logprob": -12.0,
-        "text": " deep"
-      },
-      {
-        "id": 6044,
-        "logprob": -0.32226562,
-        "text": " learning"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16_load.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 1841,
-          "logprob": -5.46875,
-          "text": "What"
-        },
-        {
-          "id": 603,
-          "logprob": -0.69140625,
-          "text": " is"
-        },
-        {
-          "id": 5271,
-          "logprob": -12.0,
-          "text": " deep"
-        },
-        {
-          "id": 6044,
-          "logprob": -0.32226562,
-          "text": " learning"
-        },
-        {
-          "id": 235336,
-          "logprob": -0.33203125,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 1841,
-          "logprob": -5.46875,
-          "text": "What"
-        },
-        {
-          "id": 603,
-          "logprob": -0.71484375,
-          "text": " is"
-        },
-        {
-          "id": 5271,
-          "logprob": -12.0,
-          "text": " deep"
-        },
-        {
-          "id": 6044,
-          "logprob": -0.30859375,
-          "text": " learning"
-        },
-        {
-          "id": 235336,
-          "logprob": -0.3359375,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 1841,
-          "logprob": -5.46875,
-          "text": "What"
-        },
-        {
-          "id": 603,
-          "logprob": -0.71484375,
-          "text": " is"
-        },
-        {
-          "id": 5271,
-          "logprob": -12.0,
-          "text": " deep"
-        },
-        {
-          "id": 6044,
-          "logprob": -0.30859375,
-          "text": " learning"
-        },
-        {
-          "id": 235336,
-          "logprob": -0.3359375,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 1841,
-          "logprob": -5.46875,
-          "text": "What"
-        },
-        {
-          "id": 603,
-          "logprob": -0.71484375,
-          "text": " is"
-        },
-        {
-          "id": 5271,
-          "logprob": -12.0,
-          "text": " deep"
-        },
-        {
-          "id": 6044,
-          "logprob": -0.30859375,
-          "text": " learning"
-        },
-        {
-          "id": 235336,
-          "logprob": -0.3359375,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 3923,
-        "logprob": -7.5390625,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -0.86035156,
-        "text": " is"
-      },
-      {
-        "id": 5655,
-        "logprob": -8.828125,
-        "text": " deep"
-      },
-      {
-        "id": 6975,
-        "logprob": -1.4912109,
-        "text": " learning"
-      },
-      {
-        "id": 30,
-        "logprob": -2.1152344,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24_all_params.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 3923,
-        "logprob": -7.5390625,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -0.86035156,
-        "text": " is"
-      },
-      {
-        "id": 5655,
-        "logprob": -8.828125,
-        "text": " deep"
-      },
-      {
-        "id": 6975,
-        "logprob": -1.4912109,
-        "text": " learning"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24_load.json
+++ b/integration-tests/models/snapshots/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -7.5390625,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.86035156,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -8.828125,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.4912109,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.1152344,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -7.5351562,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.85791016,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -8.828125,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.4882812,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.1210938,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -7.5351562,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.85791016,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -8.828125,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.4882812,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.1210938,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -7.5351562,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.85791016,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -8.828125,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.4882812,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -2.1210938,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_awq/test_flash_llama_awq.json
+++ b/integration-tests/models/snapshots/test_flash_awq/test_flash_llama_awq.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 1724,
-        "logprob": -7.703125,
-        "text": "What"
-      },
-      {
-        "id": 338,
-        "logprob": -1.4765625,
-        "text": "is"
-      },
-      {
-        "id": 21784,
-        "logprob": -9.390625,
-        "text": "Deep"
-      },
-      {
-        "id": 29257,
-        "logprob": -1.8583984,
-        "text": "Learning"
-      },
-      {
-        "id": 29973,
-        "logprob": -0.7548828,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_awq/test_flash_llama_awq_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_awq/test_flash_llama_awq_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 338,
-        "logprob": -9.0859375,
-        "text": "is"
-      },
-      {
-        "id": 21784,
-        "logprob": -10.90625,
-        "text": "Deep"
-      },
-      {
-        "id": 29257,
-        "logprob": -2.65625,
-        "text": "Learning"
-      },
-      {
-        "id": 29973,
-        "logprob": -4.8085938,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_awq/test_flash_llama_awq_load.json
+++ b/integration-tests/models/snapshots/test_flash_awq/test_flash_llama_awq_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -7.703125,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.4765625,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.390625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.8652344,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.7548828,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -7.703125,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.4765625,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.390625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.8583984,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.7548828,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -7.703125,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.4765625,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.390625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.8652344,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.7548828,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -7.703125,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.4765625,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.390625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.8652344,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.7548828,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json
+++ b/integration-tests/models/snapshots/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -7.6914062,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.4746094,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.390625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.8623047,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.7558594,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -7.6914062,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.4746094,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.390625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.8623047,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.7558594,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -7.6914062,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.4746094,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.390625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.8623047,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.7558594,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -7.6914062,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.4746094,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.390625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.8623047,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.7558594,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_awq_sharded/test_flash_llama_awq_sharded.json
+++ b/integration-tests/models/snapshots/test_flash_awq_sharded/test_flash_llama_awq_sharded.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 1724,
-        "logprob": -7.6914062,
-        "text": "What"
-      },
-      {
-        "id": 338,
-        "logprob": -1.4746094,
-        "text": "is"
-      },
-      {
-        "id": 21784,
-        "logprob": -9.390625,
-        "text": "Deep"
-      },
-      {
-        "id": 29257,
-        "logprob": -1.8623047,
-        "text": "Learning"
-      },
-      {
-        "id": 29973,
-        "logprob": -0.7558594,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_deepseek_v2/test_flash_deepseek_v2.json
+++ b/integration-tests/models/snapshots/test_flash_deepseek_v2/test_flash_deepseek_v2.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 100000,
-        "logprob": null,
-        "text": "<｜begin▁of▁sentence｜>"
-      },
-      {
-        "id": 3533,
-        "logprob": -9.625,
-        "text": "Test"
-      },
-      {
-        "id": 3102,
-        "logprob": -11.25,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "eos_token",
    "generated_tokens": 4,
-    "prefill": [
-      {
-        "id": 100000,
-        "logprob": null,
-        "text": "<｜begin▁of▁sentence｜>"
-      },
-      {
-        "id": 3533,
-        "logprob": -9.625,
-        "text": "Test"
-      },
-      {
-        "id": 3102,
-        "logprob": -11.25,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json
+++ b/integration-tests/models/snapshots/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json
@ -4,23 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 100000,
-          "logprob": null,
-          "text": "<｜begin▁of▁sentence｜>"
-        },
-        {
-          "id": 3533,
-          "logprob": -9.625,
-          "text": "Test"
-        },
-        {
-          "id": 3102,
-          "logprob": -11.25,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -93,23 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 100000,
-          "logprob": null,
-          "text": "<｜begin▁of▁sentence｜>"
-        },
-        {
-          "id": 3533,
-          "logprob": -9.625,
-          "text": "Test"
-        },
-        {
-          "id": 3102,
-          "logprob": -11.25,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -182,23 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 100000,
-          "logprob": null,
-          "text": "<｜begin▁of▁sentence｜>"
-        },
-        {
-          "id": 3533,
-          "logprob": -9.625,
-          "text": "Test"
-        },
-        {
-          "id": 3102,
-          "logprob": -11.25,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -271,23 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 100000,
-          "logprob": null,
-          "text": "<｜begin▁of▁sentence｜>"
-        },
-        {
-          "id": 3533,
-          "logprob": -9.625,
-          "text": "Test"
-        },
-        {
-          "id": 3102,
-          "logprob": -11.25,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_falcon/test_flash_falcon.json
+++ b/integration-tests/models/snapshots/test_flash_falcon/test_flash_falcon.json
@ -3,313 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 50,
-        "logprob": null,
-        "text": "G"
-      },
-      {
-        "id": 330,
-        "logprob": -5.96875,
-        "text": "ir"
-      },
-      {
-        "id": 1622,
-        "logprob": -5.6132812,
-        "text": "af"
-      },
-      {
-        "id": 249,
-        "logprob": -6.5039062,
-        "text": "at"
-      },
-      {
-        "id": 1480,
-        "logprob": -8.078125,
-        "text": "ron"
-      },
-      {
-        "id": 304,
-        "logprob": -2.3261719,
-        "text": " is"
-      },
-      {
-        "id": 23866,
-        "logprob": -9.59375,
-        "text": " obsessed"
-      },
-      {
-        "id": 335,
-        "logprob": -0.048339844,
-        "text": " with"
-      },
-      {
-        "id": 26680,
-        "logprob": -4.0,
-        "text": " gir"
-      },
-      {
-        "id": 1903,
-        "logprob": -0.07556152,
-        "text": "aff"
-      },
-      {
-        "id": 255,
-        "logprob": -0.0067749023,
-        "text": "es"
-      },
-      {
-        "id": 23,
-        "logprob": -1.546875,
-        "text": ","
-      },
-      {
-        "id": 248,
-        "logprob": -4.3320312,
-        "text": " the"
-      },
-      {
-        "id": 758,
-        "logprob": -3.734375,
-        "text": " most"
-      },
-      {
-        "id": 21735,
-        "logprob": -5.109375,
-        "text": " glorious"
-      },
-      {
-        "id": 5985,
-        "logprob": -2.09375,
-        "text": " animal"
-      },
-      {
-        "id": 313,
-        "logprob": -1.1835938,
-        "text": " on"
-      },
-      {
-        "id": 248,
-        "logprob": -0.77685547,
-        "text": " the"
-      },
-      {
-        "id": 1936,
-        "logprob": -2.3828125,
-        "text": " face"
-      },
-      {
-        "id": 275,
-        "logprob": -0.004432678,
-        "text": " of"
-      },
-      {
-        "id": 414,
-        "logprob": -1.9677734,
-        "text": " this"
-      },
-      {
-        "id": 6490,
-        "logprob": -2.046875,
-        "text": " Earth"
-      },
-      {
-        "id": 25,
-        "logprob": -0.28198242,
-        "text": "."
-      },
-      {
-        "id": 401,
-        "logprob": -7.9179688,
-        "text": " G"
-      },
-      {
-        "id": 6013,
-        "logprob": -2.2753906,
-        "text": "ira"
-      },
-      {
-        "id": 694,
-        "logprob": -0.6230469,
-        "text": "ft"
-      },
-      {
-        "id": 1480,
-        "logprob": -0.20874023,
-        "text": "ron"
-      },
-      {
-        "id": 9369,
-        "logprob": -4.5507812,
-        "text": " believes"
-      },
-      {
-        "id": 455,
-        "logprob": -4.5664062,
-        "text": " all"
-      },
-      {
-        "id": 599,
-        "logprob": -2.7402344,
-        "text": " other"
-      },
-      {
-        "id": 5632,
-        "logprob": -0.21948242,
-        "text": " animals"
-      },
-      {
-        "id": 362,
-        "logprob": -0.7675781,
-        "text": " are"
-      },
-      {
-        "id": 23981,
-        "logprob": -5.0,
-        "text": " irrelevant"
-      },
-      {
-        "id": 635,
-        "logprob": -4.234375,
-        "text": " when"
-      },
-      {
-        "id": 4354,
-        "logprob": -0.5131836,
-        "text": " compared"
-      },
-      {
-        "id": 271,
-        "logprob": -0.103637695,
-        "text": " to"
-      },
-      {
-        "id": 248,
-        "logprob": -0.58447266,
-        "text": " the"
-      },
-      {
-        "id": 21735,
-        "logprob": -3.6835938,
-        "text": " glorious"
-      },
-      {
-        "id": 64398,
-        "logprob": -1.8173828,
-        "text": " majesty"
-      },
-      {
-        "id": 275,
-        "logprob": -0.23510742,
-        "text": " of"
-      },
-      {
-        "id": 248,
-        "logprob": -0.35473633,
-        "text": " the"
-      },
-      {
-        "id": 26680,
-        "logprob": -0.24633789,
-        "text": " gir"
-      },
-      {
-        "id": 23226,
-        "logprob": -0.02960205,
-        "text": "affe"
-      },
-      {
-        "id": 25,
-        "logprob": -0.17333984,
-        "text": "."
-      },
-      {
-        "id": 193,
-        "logprob": -1.3935547,
-        "text": "\n"
-      },
-      {
-        "id": 23626,
-        "logprob": -10.0625,
-        "text": "Daniel"
-      },
-      {
-        "id": 37,
-        "logprob": -4.59375,
-        "text": ":"
-      },
-      {
-        "id": 23090,
-        "logprob": -6.9375,
-        "text": " Hello"
-      },
-      {
-        "id": 23,
-        "logprob": -0.99365234,
-        "text": ","
-      },
-      {
-        "id": 29033,
-        "logprob": -2.2324219,
-        "text": " Gir"
-      },
-      {
-        "id": 1622,
-        "logprob": -0.10809326,
-        "text": "af"
-      },
-      {
-        "id": 249,
-        "logprob": -0.042663574,
-        "text": "at"
-      },
-      {
-        "id": 1480,
-        "logprob": -0.0024776459,
-        "text": "ron"
-      },
-      {
-        "id": 12,
-        "logprob": -1.4277344,
-        "text": "!"
-      },
-      {
-        "id": 193,
-        "logprob": -1.1015625,
-        "text": "\n"
-      },
-      {
-        "id": 50,
-        "logprob": -0.05709839,
-        "text": "G"
-      },
-      {
-        "id": 330,
-        "logprob": -0.13208008,
-        "text": "ir"
-      },
-      {
-        "id": 1622,
-        "logprob": -0.0071487427,
-        "text": "af"
-      },
-      {
-        "id": 249,
-        "logprob": -0.008468628,
-        "text": "at"
-      },
-      {
-        "id": 1480,
-        "logprob": -0.00068998337,
-        "text": "ron"
-      },
-      {
-        "id": 37,
-        "logprob": -0.0074691772,
-        "text": ":"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_falcon/test_flash_falcon_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_falcon/test_flash_falcon_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 330,
-        "logprob": null,
-        "text": "ir"
-      },
-      {
-        "id": 1622,
-        "logprob": -7.8125,
-        "text": "af"
-      },
-      {
-        "id": 249,
-        "logprob": -4.5,
-        "text": "at"
-      },
-      {
-        "id": 1480,
-        "logprob": -10.875,
-        "text": "ron"
-      },
-      {
-        "id": 37,
-        "logprob": -3.6875,
-        "text": ":"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_falcon/test_flash_falcon_load.json
+++ b/integration-tests/models/snapshots/test_flash_falcon/test_flash_falcon_load.json
--- a/integration-tests/models/snapshots/test_flash_gemma/test_flash_gemma_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_gemma/test_flash_gemma_all_params.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2,
-        "logprob": null,
-        "text": "<bos>"
-      },
-      {
-        "id": 2015,
-        "logprob": -10.0625,
-        "text": "Test"
-      },
-      {
-        "id": 3853,
-        "logprob": -11.0,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_gemma/test_flash_gemma_load.json
+++ b/integration-tests/models/snapshots/test_flash_gemma/test_flash_gemma_load.json
@ -4,23 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 2015,
-          "logprob": -10.0,
-          "text": "Test"
-        },
-        {
-          "id": 3853,
-          "logprob": -10.875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -93,23 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 2015,
-          "logprob": -10.0,
-          "text": "Test"
-        },
-        {
-          "id": 3853,
-          "logprob": -10.875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -182,23 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 2015,
-          "logprob": -10.0,
-          "text": "Test"
-        },
-        {
-          "id": 3853,
-          "logprob": -10.875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -271,23 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 2015,
-          "logprob": -10.0,
-          "text": "Test"
-        },
-        {
-          "id": 3853,
-          "logprob": -10.875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_gemma/test_flash_gemma_simple.json
+++ b/integration-tests/models/snapshots/test_flash_gemma/test_flash_gemma_simple.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2,
-        "logprob": null,
-        "text": "<bos>"
-      },
-      {
-        "id": 2015,
-        "logprob": -10.0625,
-        "text": "Test"
-      },
-      {
-        "id": 3853,
-        "logprob": -11.0,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_gemma2/test_flash_gemma2.json
+++ b/integration-tests/models/snapshots/test_flash_gemma2/test_flash_gemma2.json
@ -3,188 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2,
-        "logprob": null,
-        "text": "<bos>"
-      },
-      {
-        "id": 106,
-        "logprob": -47.25,
-        "text": "<start_of_turn>"
-      },
-      {
-        "id": 1645,
-        "logprob": -18.875,
-        "text": "user"
-      },
-      {
-        "id": 235292,
-        "logprob": -7.15625,
-        "text": ":"
-      },
-      {
-        "id": 108,
-        "logprob": -4.78125,
-        "text": "\n"
-      },
-      {
-        "id": 5559,
-        "logprob": -10.0,
-        "text": "Write"
-      },
-      {
-        "id": 476,
-        "logprob": -0.1171875,
-        "text": " a"
-      },
-      {
-        "id": 19592,
-        "logprob": -2.46875,
-        "text": " poem"
-      },
-      {
-        "id": 577,
-        "logprob": -5.84375,
-        "text": " to"
-      },
-      {
-        "id": 1707,
-        "logprob": -6.375,
-        "text": " help"
-      },
-      {
-        "id": 682,
-        "logprob": -2.125,
-        "text": " me"
-      },
-      {
-        "id": 5434,
-        "logprob": -1.546875,
-        "text": " remember"
-      },
-      {
-        "id": 573,
-        "logprob": -0.62890625,
-        "text": " the"
-      },
-      {
-        "id": 1370,
-        "logprob": -6.65625,
-        "text": " first"
-      },
-      {
-        "id": 235248,
-        "logprob": -1.84375,
-        "text": " "
-      },
-      {
-        "id": 235274,
-        "logprob": -0.45117188,
-        "text": "1"
-      },
-      {
-        "id": 235276,
-        "logprob": -0.07421875,
-        "text": "0"
-      },
-      {
-        "id": 6635,
-        "logprob": -2.109375,
-        "text": " elements"
-      },
-      {
-        "id": 611,
-        "logprob": -0.4140625,
-        "text": " on"
-      },
-      {
-        "id": 573,
-        "logprob": -0.0009536743,
-        "text": " the"
-      },
-      {
-        "id": 26163,
-        "logprob": -0.033203125,
-        "text": " periodic"
-      },
-      {
-        "id": 3037,
-        "logprob": -0.0002670288,
-        "text": " table"
-      },
-      {
-        "id": 235269,
-        "logprob": -4.75,
-        "text": ","
-      },
-      {
-        "id": 7385,
-        "logprob": -11.625,
-        "text": " giving"
-      },
-      {
-        "id": 1853,
-        "logprob": -4.875,
-        "text": " each"
-      },
-      {
-        "id": 5356,
-        "logprob": -0.38867188,
-        "text": " element"
-      },
-      {
-        "id": 1277,
-        "logprob": -3.65625,
-        "text": " its"
-      },
-      {
-        "id": 1997,
-        "logprob": -4.4375,
-        "text": " own"
-      },
-      {
-        "id": 2017,
-        "logprob": -0.29882812,
-        "text": " line"
-      },
-      {
-        "id": 235265,
-        "logprob": -0.16699219,
-        "text": "."
-      },
-      {
-        "id": 107,
-        "logprob": -25.625,
-        "text": "<end_of_turn>"
-      },
-      {
-        "id": 108,
-        "logprob": -6.75,
-        "text": "\n"
-      },
-      {
-        "id": 106,
-        "logprob": -39.5,
-        "text": "<start_of_turn>"
-      },
-      {
-        "id": 2516,
-        "logprob": -32.5,
-        "text": "model"
-      },
-      {
-        "id": 235292,
-        "logprob": -10.125,
-        "text": ":"
-      },
-      {
-        "id": 108,
-        "logprob": -3.421875,
-        "text": "\n"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_gemma2/test_flash_gemma2_load.json
+++ b/integration-tests/models/snapshots/test_flash_gemma2/test_flash_gemma2_load.json
@ -4,188 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 106,
-          "logprob": -47.25,
-          "text": "<start_of_turn>"
-        },
-        {
-          "id": 1645,
-          "logprob": -18.875,
-          "text": "user"
-        },
-        {
-          "id": 235292,
-          "logprob": -7.25,
-          "text": ":"
-        },
-        {
-          "id": 108,
-          "logprob": -4.78125,
-          "text": "\n"
-        },
-        {
-          "id": 5559,
-          "logprob": -10.0,
-          "text": "Write"
-        },
-        {
-          "id": 476,
-          "logprob": -0.111816406,
-          "text": " a"
-        },
-        {
-          "id": 19592,
-          "logprob": -2.46875,
-          "text": " poem"
-        },
-        {
-          "id": 577,
-          "logprob": -5.78125,
-          "text": " to"
-        },
-        {
-          "id": 1707,
-          "logprob": -6.375,
-          "text": " help"
-        },
-        {
-          "id": 682,
-          "logprob": -2.125,
-          "text": " me"
-        },
-        {
-          "id": 5434,
-          "logprob": -1.59375,
-          "text": " remember"
-        },
-        {
-          "id": 573,
-          "logprob": -0.62890625,
-          "text": " the"
-        },
-        {
-          "id": 1370,
-          "logprob": -6.625,
-          "text": " first"
-        },
-        {
-          "id": 235248,
-          "logprob": -1.7421875,
-          "text": " "
-        },
-        {
-          "id": 235274,
-          "logprob": -0.44921875,
-          "text": "1"
-        },
-        {
-          "id": 235276,
-          "logprob": -0.07128906,
-          "text": "0"
-        },
-        {
-          "id": 6635,
-          "logprob": -2.109375,
-          "text": " elements"
-        },
-        {
-          "id": 611,
-          "logprob": -0.40429688,
-          "text": " on"
-        },
-        {
-          "id": 573,
-          "logprob": -0.0009918213,
-          "text": " the"
-        },
-        {
-          "id": 26163,
-          "logprob": -0.03540039,
-          "text": " periodic"
-        },
-        {
-          "id": 3037,
-          "logprob": -0.00028800964,
-          "text": " table"
-        },
-        {
-          "id": 235269,
-          "logprob": -4.71875,
-          "text": ","
-        },
-        {
-          "id": 7385,
-          "logprob": -11.875,
-          "text": " giving"
-        },
-        {
-          "id": 1853,
-          "logprob": -4.875,
-          "text": " each"
-        },
-        {
-          "id": 5356,
-          "logprob": -0.38867188,
-          "text": " element"
-        },
-        {
-          "id": 1277,
-          "logprob": -3.65625,
-          "text": " its"
-        },
-        {
-          "id": 1997,
-          "logprob": -4.4375,
-          "text": " own"
-        },
-        {
-          "id": 2017,
-          "logprob": -0.3046875,
-          "text": " line"
-        },
-        {
-          "id": 235265,
-          "logprob": -0.16113281,
-          "text": "."
-        },
-        {
-          "id": 107,
-          "logprob": -25.625,
-          "text": "<end_of_turn>"
-        },
-        {
-          "id": 108,
-          "logprob": -6.75,
-          "text": "\n"
-        },
-        {
-          "id": 106,
-          "logprob": -39.25,
-          "text": "<start_of_turn>"
-        },
-        {
-          "id": 2516,
-          "logprob": -32.5,
-          "text": "model"
-        },
-        {
-          "id": 235292,
-          "logprob": -10.1875,
-          "text": ":"
-        },
-        {
-          "id": 108,
-          "logprob": -3.296875,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -258,188 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 106,
-          "logprob": -47.25,
-          "text": "<start_of_turn>"
-        },
-        {
-          "id": 1645,
-          "logprob": -18.875,
-          "text": "user"
-        },
-        {
-          "id": 235292,
-          "logprob": -7.25,
-          "text": ":"
-        },
-        {
-          "id": 108,
-          "logprob": -4.78125,
-          "text": "\n"
-        },
-        {
-          "id": 5559,
-          "logprob": -10.0,
-          "text": "Write"
-        },
-        {
-          "id": 476,
-          "logprob": -0.111816406,
-          "text": " a"
-        },
-        {
-          "id": 19592,
-          "logprob": -2.46875,
-          "text": " poem"
-        },
-        {
-          "id": 577,
-          "logprob": -5.78125,
-          "text": " to"
-        },
-        {
-          "id": 1707,
-          "logprob": -6.375,
-          "text": " help"
-        },
-        {
-          "id": 682,
-          "logprob": -2.125,
-          "text": " me"
-        },
-        {
-          "id": 5434,
-          "logprob": -1.59375,
-          "text": " remember"
-        },
-        {
-          "id": 573,
-          "logprob": -0.62890625,
-          "text": " the"
-        },
-        {
-          "id": 1370,
-          "logprob": -6.625,
-          "text": " first"
-        },
-        {
-          "id": 235248,
-          "logprob": -1.7421875,
-          "text": " "
-        },
-        {
-          "id": 235274,
-          "logprob": -0.44921875,
-          "text": "1"
-        },
-        {
-          "id": 235276,
-          "logprob": -0.07128906,
-          "text": "0"
-        },
-        {
-          "id": 6635,
-          "logprob": -2.109375,
-          "text": " elements"
-        },
-        {
-          "id": 611,
-          "logprob": -0.40429688,
-          "text": " on"
-        },
-        {
-          "id": 573,
-          "logprob": -0.0009918213,
-          "text": " the"
-        },
-        {
-          "id": 26163,
-          "logprob": -0.03540039,
-          "text": " periodic"
-        },
-        {
-          "id": 3037,
-          "logprob": -0.00028800964,
-          "text": " table"
-        },
-        {
-          "id": 235269,
-          "logprob": -4.71875,
-          "text": ","
-        },
-        {
-          "id": 7385,
-          "logprob": -11.875,
-          "text": " giving"
-        },
-        {
-          "id": 1853,
-          "logprob": -4.875,
-          "text": " each"
-        },
-        {
-          "id": 5356,
-          "logprob": -0.38867188,
-          "text": " element"
-        },
-        {
-          "id": 1277,
-          "logprob": -3.65625,
-          "text": " its"
-        },
-        {
-          "id": 1997,
-          "logprob": -4.4375,
-          "text": " own"
-        },
-        {
-          "id": 2017,
-          "logprob": -0.3046875,
-          "text": " line"
-        },
-        {
-          "id": 235265,
-          "logprob": -0.16113281,
-          "text": "."
-        },
-        {
-          "id": 107,
-          "logprob": -25.625,
-          "text": "<end_of_turn>"
-        },
-        {
-          "id": 108,
-          "logprob": -6.75,
-          "text": "\n"
-        },
-        {
-          "id": 106,
-          "logprob": -39.25,
-          "text": "<start_of_turn>"
-        },
-        {
-          "id": 2516,
-          "logprob": -32.5,
-          "text": "model"
-        },
-        {
-          "id": 235292,
-          "logprob": -10.1875,
-          "text": ":"
-        },
-        {
-          "id": 108,
-          "logprob": -3.296875,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -512,188 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 106,
-          "logprob": -47.25,
-          "text": "<start_of_turn>"
-        },
-        {
-          "id": 1645,
-          "logprob": -18.875,
-          "text": "user"
-        },
-        {
-          "id": 235292,
-          "logprob": -7.15625,
-          "text": ":"
-        },
-        {
-          "id": 108,
-          "logprob": -4.78125,
-          "text": "\n"
-        },
-        {
-          "id": 5559,
-          "logprob": -10.0,
-          "text": "Write"
-        },
-        {
-          "id": 476,
-          "logprob": -0.1171875,
-          "text": " a"
-        },
-        {
-          "id": 19592,
-          "logprob": -2.46875,
-          "text": " poem"
-        },
-        {
-          "id": 577,
-          "logprob": -5.84375,
-          "text": " to"
-        },
-        {
-          "id": 1707,
-          "logprob": -6.375,
-          "text": " help"
-        },
-        {
-          "id": 682,
-          "logprob": -2.125,
-          "text": " me"
-        },
-        {
-          "id": 5434,
-          "logprob": -1.546875,
-          "text": " remember"
-        },
-        {
-          "id": 573,
-          "logprob": -0.62890625,
-          "text": " the"
-        },
-        {
-          "id": 1370,
-          "logprob": -6.65625,
-          "text": " first"
-        },
-        {
-          "id": 235248,
-          "logprob": -1.84375,
-          "text": " "
-        },
-        {
-          "id": 235274,
-          "logprob": -0.45117188,
-          "text": "1"
-        },
-        {
-          "id": 235276,
-          "logprob": -0.07421875,
-          "text": "0"
-        },
-        {
-          "id": 6635,
-          "logprob": -2.109375,
-          "text": " elements"
-        },
-        {
-          "id": 611,
-          "logprob": -0.4140625,
-          "text": " on"
-        },
-        {
-          "id": 573,
-          "logprob": -0.0009536743,
-          "text": " the"
-        },
-        {
-          "id": 26163,
-          "logprob": -0.033203125,
-          "text": " periodic"
-        },
-        {
-          "id": 3037,
-          "logprob": -0.0002670288,
-          "text": " table"
-        },
-        {
-          "id": 235269,
-          "logprob": -4.75,
-          "text": ","
-        },
-        {
-          "id": 7385,
-          "logprob": -11.625,
-          "text": " giving"
-        },
-        {
-          "id": 1853,
-          "logprob": -4.875,
-          "text": " each"
-        },
-        {
-          "id": 5356,
-          "logprob": -0.38867188,
-          "text": " element"
-        },
-        {
-          "id": 1277,
-          "logprob": -3.65625,
-          "text": " its"
-        },
-        {
-          "id": 1997,
-          "logprob": -4.4375,
-          "text": " own"
-        },
-        {
-          "id": 2017,
-          "logprob": -0.29882812,
-          "text": " line"
-        },
-        {
-          "id": 235265,
-          "logprob": -0.16699219,
-          "text": "."
-        },
-        {
-          "id": 107,
-          "logprob": -25.625,
-          "text": "<end_of_turn>"
-        },
-        {
-          "id": 108,
-          "logprob": -6.75,
-          "text": "\n"
-        },
-        {
-          "id": 106,
-          "logprob": -39.5,
-          "text": "<start_of_turn>"
-        },
-        {
-          "id": 2516,
-          "logprob": -32.5,
-          "text": "model"
-        },
-        {
-          "id": 235292,
-          "logprob": -10.125,
-          "text": ":"
-        },
-        {
-          "id": 108,
-          "logprob": -3.421875,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -766,188 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 106,
-          "logprob": -47.25,
-          "text": "<start_of_turn>"
-        },
-        {
-          "id": 1645,
-          "logprob": -18.875,
-          "text": "user"
-        },
-        {
-          "id": 235292,
-          "logprob": -7.25,
-          "text": ":"
-        },
-        {
-          "id": 108,
-          "logprob": -4.78125,
-          "text": "\n"
-        },
-        {
-          "id": 5559,
-          "logprob": -10.0,
-          "text": "Write"
-        },
-        {
-          "id": 476,
-          "logprob": -0.111816406,
-          "text": " a"
-        },
-        {
-          "id": 19592,
-          "logprob": -2.46875,
-          "text": " poem"
-        },
-        {
-          "id": 577,
-          "logprob": -5.78125,
-          "text": " to"
-        },
-        {
-          "id": 1707,
-          "logprob": -6.375,
-          "text": " help"
-        },
-        {
-          "id": 682,
-          "logprob": -2.125,
-          "text": " me"
-        },
-        {
-          "id": 5434,
-          "logprob": -1.59375,
-          "text": " remember"
-        },
-        {
-          "id": 573,
-          "logprob": -0.62890625,
-          "text": " the"
-        },
-        {
-          "id": 1370,
-          "logprob": -6.625,
-          "text": " first"
-        },
-        {
-          "id": 235248,
-          "logprob": -1.7421875,
-          "text": " "
-        },
-        {
-          "id": 235274,
-          "logprob": -0.44921875,
-          "text": "1"
-        },
-        {
-          "id": 235276,
-          "logprob": -0.07128906,
-          "text": "0"
-        },
-        {
-          "id": 6635,
-          "logprob": -2.109375,
-          "text": " elements"
-        },
-        {
-          "id": 611,
-          "logprob": -0.40429688,
-          "text": " on"
-        },
-        {
-          "id": 573,
-          "logprob": -0.0009918213,
-          "text": " the"
-        },
-        {
-          "id": 26163,
-          "logprob": -0.03540039,
-          "text": " periodic"
-        },
-        {
-          "id": 3037,
-          "logprob": -0.00028800964,
-          "text": " table"
-        },
-        {
-          "id": 235269,
-          "logprob": -4.71875,
-          "text": ","
-        },
-        {
-          "id": 7385,
-          "logprob": -11.875,
-          "text": " giving"
-        },
-        {
-          "id": 1853,
-          "logprob": -4.875,
-          "text": " each"
-        },
-        {
-          "id": 5356,
-          "logprob": -0.38867188,
-          "text": " element"
-        },
-        {
-          "id": 1277,
-          "logprob": -3.65625,
-          "text": " its"
-        },
-        {
-          "id": 1997,
-          "logprob": -4.4375,
-          "text": " own"
-        },
-        {
-          "id": 2017,
-          "logprob": -0.3046875,
-          "text": " line"
-        },
-        {
-          "id": 235265,
-          "logprob": -0.16113281,
-          "text": "."
-        },
-        {
-          "id": 107,
-          "logprob": -25.625,
-          "text": "<end_of_turn>"
-        },
-        {
-          "id": 108,
-          "logprob": -6.75,
-          "text": "\n"
-        },
-        {
-          "id": 106,
-          "logprob": -39.25,
-          "text": "<start_of_turn>"
-        },
-        {
-          "id": 2516,
-          "logprob": -32.5,
-          "text": "model"
-        },
-        {
-          "id": 235292,
-          "logprob": -10.1875,
-          "text": ":"
-        },
-        {
-          "id": 108,
-          "logprob": -3.296875,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_gemma_gptq/test_flash_gemma_gptq.json
+++ b/integration-tests/models/snapshots/test_flash_gemma_gptq/test_flash_gemma_gptq.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2,
-        "logprob": null,
-        "text": "<bos>"
-      },
-      {
-        "id": 2015,
-        "logprob": -9.640625,
-        "text": "Test"
-      },
-      {
-        "id": 3853,
-        "logprob": -10.34375,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_gemma_gptq/test_flash_gemma_gptq_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_gemma_gptq/test_flash_gemma_gptq_all_params.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2,
-        "logprob": null,
-        "text": "<bos>"
-      },
-      {
-        "id": 2015,
-        "logprob": -9.6484375,
-        "text": "Test"
-      },
-      {
-        "id": 3853,
-        "logprob": -10.3671875,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_gemma_gptq/test_flash_gemma_gptq_load.json
+++ b/integration-tests/models/snapshots/test_flash_gemma_gptq/test_flash_gemma_gptq_load.json
@ -4,23 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 2015,
-          "logprob": -9.6484375,
-          "text": "Test"
-        },
-        {
-          "id": 3853,
-          "logprob": -10.359375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -93,23 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 2015,
-          "logprob": -9.6484375,
-          "text": "Test"
-        },
-        {
-          "id": 3853,
-          "logprob": -10.34375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -182,23 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 2015,
-          "logprob": -9.640625,
-          "text": "Test"
-        },
-        {
-          "id": 3853,
-          "logprob": -10.3671875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -271,23 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2,
-          "logprob": null,
-          "text": "<bos>"
-        },
-        {
-          "id": 2015,
-          "logprob": -9.6484375,
-          "text": "Test"
-        },
-        {
-          "id": 3853,
-          "logprob": -10.359375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_gpt2/test_flash_gpt2.json
+++ b/integration-tests/models/snapshots/test_flash_gpt2/test_flash_gpt2.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2061,
-        "logprob": null,
-        "text": "What"
-      },
-      {
-        "id": 318,
-        "logprob": -3.1835938,
-        "text": " is"
-      },
-      {
-        "id": 2769,
-        "logprob": -9.171875,
-        "text": " deep"
-      },
-      {
-        "id": 4673,
-        "logprob": -1.6425781,
-        "text": " learning"
-      },
-      {
-        "id": 30,
-        "logprob": -0.7314453,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_gpt2/test_flash_gpt2_load.json
+++ b/integration-tests/models/snapshots/test_flash_gpt2/test_flash_gpt2_load.json
@ -4,33 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2061,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 318,
-          "logprob": -3.1835938,
-          "text": " is"
-        },
-        {
-          "id": 2769,
-          "logprob": -9.171875,
-          "text": " deep"
-        },
-        {
-          "id": 4673,
-          "logprob": -1.6425781,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -0.7314453,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -103,33 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2061,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 318,
-          "logprob": -3.1660156,
-          "text": " is"
-        },
-        {
-          "id": 2769,
-          "logprob": -9.1796875,
-          "text": " deep"
-        },
-        {
-          "id": 4673,
-          "logprob": -1.6376953,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -0.72216797,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -202,33 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2061,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 318,
-          "logprob": -3.1660156,
-          "text": " is"
-        },
-        {
-          "id": 2769,
-          "logprob": -9.1796875,
-          "text": " deep"
-        },
-        {
-          "id": 4673,
-          "logprob": -1.6376953,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -0.72216797,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -301,33 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2061,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 318,
-          "logprob": -3.1660156,
-          "text": " is"
-        },
-        {
-          "id": 2769,
-          "logprob": -9.1796875,
-          "text": " deep"
-        },
-        {
-          "id": 4673,
-          "logprob": -1.6376953,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -0.72216797,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_grammar_llama/test_flash_llama_grammar.json
+++ b/integration-tests/models/snapshots/test_flash_grammar_llama/test_flash_llama_grammar.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 4321,
-        "logprob": -13.90625,
-        "text": "Test"
-      },
-      {
-        "id": 2009,
-        "logprob": -12.328125,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_grammar_llama/test_flash_llama_grammar_json.json
+++ b/integration-tests/models/snapshots/test_flash_grammar_llama/test_flash_llama_grammar_json.json
@ -3,88 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "eos_token",
    "generated_tokens": 30,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 5235,
-        "logprob": -10.0625,
-        "text": "info"
-      },
-      {
-        "id": 29901,
-        "logprob": -3.2324219,
-        "text": ":"
-      },
-      {
-        "id": 13260,
-        "logprob": -10.625,
-        "text": "dav"
-      },
-      {
-        "id": 333,
-        "logprob": -0.08276367,
-        "text": "id"
-      },
-      {
-        "id": 8753,
-        "logprob": -7.5273438,
-        "text": "hol"
-      },
-      {
-        "id": 17559,
-        "logprob": -3.8476562,
-        "text": "tz"
-      },
-      {
-        "id": 763,
-        "logprob": -10.140625,
-        "text": "like"
-      },
-      {
-        "id": 10697,
-        "logprob": -10.1953125,
-        "text": "trees"
-      },
-      {
-        "id": 322,
-        "logprob": -2.5742188,
-        "text": "and"
-      },
-      {
-        "id": 756,
-        "logprob": -7.4882812,
-        "text": "has"
-      },
-      {
-        "id": 1023,
-        "logprob": -5.0507812,
-        "text": "two"
-      },
-      {
-        "id": 274,
-        "logprob": -5.3164062,
-        "text": "c"
-      },
-      {
-        "id": 1446,
-        "logprob": -0.6694336,
-        "text": "ats"
-      },
-      {
-        "id": 29889,
-        "logprob": -0.9995117,
-        "text": "."
-      },
-      {
-        "id": 29871,
-        "logprob": -4.2421875,
-        "text": ""
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_grammar_llama/test_flash_llama_grammar_load.json
+++ b/integration-tests/models/snapshots/test_flash_grammar_llama/test_flash_llama_grammar_load.json
@ -4,53 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1024,
-          "logprob": -10.578125,
-          "text": "name"
-        },
-        {
-          "id": 29901,
-          "logprob": -3.0332031,
-          "text": ":"
-        },
-        {
-          "id": 13260,
-          "logprob": -9.171875,
-          "text": "dav"
-        },
-        {
-          "id": 333,
-          "logprob": -0.04257202,
-          "text": "id"
-        },
-        {
-          "id": 29889,
-          "logprob": -2.4785156,
-          "text": "."
-        },
-        {
-          "id": 4876,
-          "logprob": -10.7890625,
-          "text": "email"
-        },
-        {
-          "id": 29901,
-          "logprob": -0.32495117,
-          "text": ":"
-        },
-        {
-          "id": 259,
-          "logprob": -9.4921875,
-          "text": " "
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -123,53 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1024,
-          "logprob": -10.578125,
-          "text": "name"
-        },
-        {
-          "id": 29901,
-          "logprob": -3.03125,
-          "text": ":"
-        },
-        {
-          "id": 13260,
-          "logprob": -9.171875,
-          "text": "dav"
-        },
-        {
-          "id": 333,
-          "logprob": -0.04244995,
-          "text": "id"
-        },
-        {
-          "id": 29889,
-          "logprob": -2.4863281,
-          "text": "."
-        },
-        {
-          "id": 4876,
-          "logprob": -10.7890625,
-          "text": "email"
-        },
-        {
-          "id": 29901,
-          "logprob": -0.32714844,
-          "text": ":"
-        },
-        {
-          "id": 259,
-          "logprob": -9.4921875,
-          "text": " "
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -242,53 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1024,
-          "logprob": -10.578125,
-          "text": "name"
-        },
-        {
-          "id": 29901,
-          "logprob": -3.0332031,
-          "text": ":"
-        },
-        {
-          "id": 13260,
-          "logprob": -9.171875,
-          "text": "dav"
-        },
-        {
-          "id": 333,
-          "logprob": -0.04257202,
-          "text": "id"
-        },
-        {
-          "id": 29889,
-          "logprob": -2.4785156,
-          "text": "."
-        },
-        {
-          "id": 4876,
-          "logprob": -10.7890625,
-          "text": "email"
-        },
-        {
-          "id": 29901,
-          "logprob": -0.32495117,
-          "text": ":"
-        },
-        {
-          "id": 259,
-          "logprob": -9.4921875,
-          "text": " "
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -361,53 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1024,
-          "logprob": -10.578125,
-          "text": "name"
-        },
-        {
-          "id": 29901,
-          "logprob": -3.0332031,
-          "text": ":"
-        },
-        {
-          "id": 13260,
-          "logprob": -9.171875,
-          "text": "dav"
-        },
-        {
-          "id": 333,
-          "logprob": -0.04257202,
-          "text": "id"
-        },
-        {
-          "id": 29889,
-          "logprob": -2.4785156,
-          "text": "."
-        },
-        {
-          "id": 4876,
-          "logprob": -10.7890625,
-          "text": "email"
-        },
-        {
-          "id": 29901,
-          "logprob": -0.32495117,
-          "text": ":"
-        },
-        {
-          "id": 259,
-          "logprob": -9.4921875,
-          "text": " "
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_grammar_llama/test_flash_llama_grammar_regex.json
+++ b/integration-tests/models/snapshots/test_flash_grammar_llama/test_flash_llama_grammar_regex.json
@ -3,43 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 806,
-        "logprob": -11.890625,
-        "text": "Wh"
-      },
-      {
-        "id": 1446,
-        "logprob": -3.6699219,
-        "text": "ats"
-      },
-      {
-        "id": 2921,
-        "logprob": -7.8203125,
-        "text": "Go"
-      },
-      {
-        "id": 468,
-        "logprob": -8.0703125,
-        "text": "og"
-      },
-      {
-        "id": 793,
-        "logprob": -2.1875,
-        "text": "les"
-      },
-      {
-        "id": 16332,
-        "logprob": -9.7109375,
-        "text": "DNS"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_all_params.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "stop_sequence",
    "generated_tokens": 5,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 4321,
-        "logprob": -8.6875,
-        "text": "Test"
-      },
-      {
-        "id": 2009,
-        "logprob": -11.546875,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_load.json
@ -4,23 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -8.6875,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -11.546875,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -93,23 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -8.6875,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -11.546875,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -182,23 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -8.6875,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -11.546875,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -271,23 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -8.6875,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -11.546875,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_simple.json
+++ b/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_simple.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 4321,
-        "logprob": -8.6875,
-        "text": "Test"
-      },
-      {
-        "id": 2009,
-        "logprob": -11.546875,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_exl2/test_flash_llama_exl2.json
+++ b/integration-tests/models/snapshots/test_flash_llama_exl2/test_flash_llama_exl2.json
@ -3,18 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2323,
-        "logprob": null,
-        "text": "Test"
-      },
-      {
-        "id": 1715,
-        "logprob": -11.4375,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_exl2/test_flash_llama_exl2_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama_exl2/test_flash_llama_exl2_all_params.json
@ -3,18 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2323,
-        "logprob": null,
-        "text": "Test"
-      },
-      {
-        "id": 1715,
-        "logprob": -11.453125,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_exl2/test_flash_llama_exl2_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama_exl2/test_flash_llama_exl2_load.json
@ -4,18 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2323,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -11.453125,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -88,18 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2323,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -11.40625,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -172,18 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2323,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -11.421875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -256,18 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2323,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -11.4140625,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_llama_fp8/test_flash_llama_fp8.json
+++ b/integration-tests/models/snapshots/test_flash_llama_fp8/test_flash_llama_fp8.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 2323,
-        "logprob": -9.421875,
-        "text": "Test"
-      },
-      {
-        "id": 1715,
-        "logprob": -10.546875,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 2323,
-        "logprob": -9.5234375,
-        "text": "Test"
-      },
-      {
-        "id": 1715,
-        "logprob": -10.421875,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_fp8/test_flash_llama_fp8_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama_fp8/test_flash_llama_fp8_load.json
@ -4,23 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 2323,
-          "logprob": -9.5625,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -10.375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -93,23 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 2323,
-          "logprob": -9.5625,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -10.375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -182,23 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 2323,
-          "logprob": -9.5625,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -10.375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -271,23 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 2323,
-          "logprob": -9.5625,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -10.375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache.json
+++ b/integration-tests/models/snapshots/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 3923,
-        "logprob": -6.1875,
-        "text": "What"
-      },
-      {
-        "id": 374,
-        "logprob": -0.93359375,
-        "text": " is"
-      },
-      {
-        "id": 5655,
-        "logprob": -9.875,
-        "text": " deep"
-      },
-      {
-        "id": 6975,
-        "logprob": -1.1796875,
-        "text": " learning"
-      },
-      {
-        "id": 30,
-        "logprob": -1.75,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 128000,
-        "logprob": null,
-        "text": "<|begin_of_text|>"
-      },
-      {
-        "id": 374,
-        "logprob": -18.0,
-        "text": " is"
-      },
-      {
-        "id": 5655,
-        "logprob": -11.8359375,
-        "text": " deep"
-      },
-      {
-        "id": 6975,
-        "logprob": -2.0703125,
-        "text": " learning"
-      },
-      {
-        "id": 30,
-        "logprob": -5.9765625,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -6.1875,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.93359375,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -9.875,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.1796875,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -1.75,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -6.21875,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.95703125,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -9.9375,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.1328125,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -1.75,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -6.21875,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.95703125,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -9.9375,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.1328125,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -1.75,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 128000,
-          "logprob": null,
-          "text": "<|begin_of_text|>"
-        },
-        {
-          "id": 3923,
-          "logprob": -6.21875,
-          "text": "What"
-        },
-        {
-          "id": 374,
-          "logprob": -0.95703125,
-          "text": " is"
-        },
-        {
-          "id": 5655,
-          "logprob": -9.9375,
-          "text": " deep"
-        },
-        {
-          "id": 6975,
-          "logprob": -1.1328125,
-          "text": " learning"
-        },
-        {
-          "id": 30,
-          "logprob": -1.75,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq.json
+++ b/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq.json
@ -3,18 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2323,
-        "logprob": null,
-        "text": "Test"
-      },
-      {
-        "id": 1715,
-        "logprob": -11.34375,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
@ -3,18 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2323,
-        "logprob": null,
-        "text": "Test"
-      },
-      {
-        "id": 1715,
-        "logprob": -11.34375,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq_load.json
@ -4,18 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2323,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -11.34375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -88,18 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2323,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -11.34375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -172,18 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2323,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -11.34375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -256,18 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2323,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1715,
-          "logprob": -11.34375,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_llama_marlin/test_flash_llama_marlin.json
+++ b/integration-tests/models/snapshots/test_flash_llama_marlin/test_flash_llama_marlin.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 4321,
-        "logprob": -12.390625,
-        "text": "Test"
-      },
-      {
-        "id": 2009,
-        "logprob": -11.0625,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_marlin/test_flash_llama_marlin_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama_marlin/test_flash_llama_marlin_all_params.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 4321,
-        "logprob": -12.390625,
-        "text": "Test"
-      },
-      {
-        "id": 2009,
-        "logprob": -11.0625,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_marlin/test_flash_llama_marlin_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama_marlin/test_flash_llama_marlin_load.json
@ -4,23 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -12.390625,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -11.0625,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -93,23 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -12.390625,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -11.0625,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -182,23 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -12.390625,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -11.0625,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -271,23 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -12.390625,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -11.0625,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_llama_marlin_24/test_flash_llama_marlin.json
+++ b/integration-tests/models/snapshots/test_flash_llama_marlin_24/test_flash_llama_marlin.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 4321,
-        "logprob": -9.0859375,
-        "text": "Test"
-      },
-      {
-        "id": 2009,
-        "logprob": -16.359375,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_marlin_24/test_flash_llama_marlin24_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama_marlin_24/test_flash_llama_marlin24_all_params.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 4321,
-        "logprob": -9.0859375,
-        "text": "Test"
-      },
-      {
-        "id": 2009,
-        "logprob": -16.359375,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_llama_marlin_24/test_flash_llama_marlin24_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama_marlin_24/test_flash_llama_marlin24_load.json
@ -4,23 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -9.0859375,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -16.359375,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -93,23 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -9.0859375,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -16.359375,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -182,23 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -9.0859375,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -16.359375,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -271,23 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4321,
-          "logprob": -9.0859375,
-          "text": "Test"
-        },
-        {
-          "id": 2009,
-          "logprob": -16.359375,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_medusa/test_flash_medusa_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_medusa/test_flash_medusa_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 338,
-        "logprob": -10.0078125,
-        "text": "is"
-      },
-      {
-        "id": 21784,
-        "logprob": -15.515625,
-        "text": "Deep"
-      },
-      {
-        "id": 29257,
-        "logprob": -2.8847656,
-        "text": "Learning"
-      },
-      {
-        "id": 29973,
-        "logprob": -4.140625,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_medusa/test_flash_medusa_load.json
+++ b/integration-tests/models/snapshots/test_flash_medusa/test_flash_medusa_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -10.734375,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.5488281,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.2890625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.2753906,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.48046875,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -107,38 +76,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -10.734375,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.5488281,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.2890625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.2724609,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.47729492,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -210,38 +148,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -10.734375,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.5488281,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.2890625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.2724609,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.47729492,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -313,38 +220,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1724,
-          "logprob": -10.734375,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -1.5488281,
-          "text": "is"
-        },
-        {
-          "id": 21784,
-          "logprob": -9.2890625,
-          "text": "Deep"
-        },
-        {
-          "id": 29257,
-          "logprob": -1.2724609,
-          "text": "Learning"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.47729492,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_medusa/test_flash_medusa_simple.json
+++ b/integration-tests/models/snapshots/test_flash_medusa/test_flash_medusa_simple.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 1724,
-        "logprob": -10.734375,
-        "text": "What"
-      },
-      {
-        "id": 338,
-        "logprob": -1.5488281,
-        "text": "is"
-      },
-      {
-        "id": 21784,
-        "logprob": -9.2890625,
-        "text": "Deep"
-      },
-      {
-        "id": 29257,
-        "logprob": -1.2753906,
-        "text": "Learning"
-      },
-      {
-        "id": 29973,
-        "logprob": -0.48046875,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mistral/test_flash_mistral.json
+++ b/integration-tests/models/snapshots/test_flash_mistral/test_flash_mistral.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 3735,
-        "logprob": -12.9140625,
-        "text": "Test"
-      },
-      {
-        "id": 2159,
-        "logprob": -10.7578125,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mistral/test_flash_mistral_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_mistral/test_flash_mistral_all_params.json
@ -3,23 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 3735,
-        "logprob": -12.9140625,
-        "text": "Test"
-      },
-      {
-        "id": 2159,
-        "logprob": -10.7578125,
-        "text": "request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mistral/test_flash_mistral_load.json
+++ b/integration-tests/models/snapshots/test_flash_mistral/test_flash_mistral_load.json
@ -4,23 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 3735,
-          "logprob": -12.9140625,
-          "text": "Test"
-        },
-        {
-          "id": 2159,
-          "logprob": -10.7578125,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -93,23 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 3735,
-          "logprob": -12.9140625,
-          "text": "Test"
-        },
-        {
-          "id": 2159,
-          "logprob": -10.7578125,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -182,23 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 3735,
-          "logprob": -12.9140625,
-          "text": "Test"
-        },
-        {
-          "id": 2159,
-          "logprob": -10.7578125,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -271,23 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 3735,
-          "logprob": -12.9140625,
-          "text": "Test"
-        },
-        {
-          "id": 2159,
-          "logprob": -10.7578125,
-          "text": "request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_mixtral/test_flash_mixtral.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral/test_flash_mixtral.json
@ -3,48 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 1824,
-        "logprob": -6.1445312,
-        "text": "What"
-      },
-      {
-        "id": 349,
-        "logprob": -1.4648438,
-        "text": "is"
-      },
-      {
-        "id": 21135,
-        "logprob": -13.6875,
-        "text": "gradient"
-      },
-      {
-        "id": 24871,
-        "logprob": -1.6005859,
-        "text": "descent"
-      },
-      {
-        "id": 28804,
-        "logprob": -0.39526367,
-        "text": "?"
-      },
-      {
-        "id": 13,
-        "logprob": -0.640625,
-        "text": "\n"
-      },
-      {
-        "id": 13,
-        "logprob": -0.18774414,
-        "text": "\n"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mixtral/test_flash_mixtral_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral/test_flash_mixtral_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 24871,
-        "logprob": -17.234375,
-        "text": "descent"
-      },
-      {
-        "id": 28804,
-        "logprob": -7.4375,
-        "text": "?"
-      },
-      {
-        "id": 13,
-        "logprob": -0.8046875,
-        "text": "\n"
-      },
-      {
-        "id": 13,
-        "logprob": -0.33032227,
-        "text": "\n"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mixtral/test_flash_mixtral_load.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral/test_flash_mixtral_load.json
@ -4,48 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -6.1445312,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -1.4648438,
-          "text": "is"
-        },
-        {
-          "id": 21135,
-          "logprob": -13.6875,
-          "text": "gradient"
-        },
-        {
-          "id": 24871,
-          "logprob": -1.6005859,
-          "text": "descent"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.39526367,
-          "text": "?"
-        },
-        {
-          "id": 13,
-          "logprob": -0.640625,
-          "text": "\n"
-        },
-        {
-          "id": 13,
-          "logprob": -0.18774414,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -118,48 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -6.1445312,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -1.4677734,
-          "text": "is"
-        },
-        {
-          "id": 21135,
-          "logprob": -13.6875,
-          "text": "gradient"
-        },
-        {
-          "id": 24871,
-          "logprob": -1.6015625,
-          "text": "descent"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.39453125,
-          "text": "?"
-        },
-        {
-          "id": 13,
-          "logprob": -0.6435547,
-          "text": "\n"
-        },
-        {
-          "id": 13,
-          "logprob": -0.18713379,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -232,48 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -6.140625,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -1.4658203,
-          "text": "is"
-        },
-        {
-          "id": 21135,
-          "logprob": -13.6796875,
-          "text": "gradient"
-        },
-        {
-          "id": 24871,
-          "logprob": -1.5898438,
-          "text": "descent"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.3955078,
-          "text": "?"
-        },
-        {
-          "id": 13,
-          "logprob": -0.64501953,
-          "text": "\n"
-        },
-        {
-          "id": 13,
-          "logprob": -0.18493652,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -346,48 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -6.1328125,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -1.4658203,
-          "text": "is"
-        },
-        {
-          "id": 21135,
-          "logprob": -13.6796875,
-          "text": "gradient"
-        },
-        {
-          "id": 24871,
-          "logprob": -1.5947266,
-          "text": "descent"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.39648438,
-          "text": "?"
-        },
-        {
-          "id": 13,
-          "logprob": -0.6464844,
-          "text": "\n"
-        },
-        {
-          "id": 13,
-          "logprob": -0.18688965,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 1824,
-        "logprob": -12.296875,
-        "text": "What"
-      },
-      {
-        "id": 349,
-        "logprob": -0.97216797,
-        "text": "is"
-      },
-      {
-        "id": 3534,
-        "logprob": -10.1796875,
-        "text": "deep"
-      },
-      {
-        "id": 5168,
-        "logprob": -0.9658203,
-        "text": "learning"
-      },
-      {
-        "id": 28804,
-        "logprob": -0.44384766,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 349,
-        "logprob": -13.921875,
-        "text": "is"
-      },
-      {
-        "id": 3534,
-        "logprob": -11.2265625,
-        "text": "deep"
-      },
-      {
-        "id": 5168,
-        "logprob": -2.3886719,
-        "text": "learning"
-      },
-      {
-        "id": 28804,
-        "logprob": -4.7109375,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq_load.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_awq/test_flash_mixtral_awq_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -12.296875,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -0.97216797,
-          "text": "is"
-        },
-        {
-          "id": 3534,
-          "logprob": -10.1796875,
-          "text": "deep"
-        },
-        {
-          "id": 5168,
-          "logprob": -0.9658203,
-          "text": "learning"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.44384766,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -12.34375,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -0.96728516,
-          "text": "is"
-        },
-        {
-          "id": 3534,
-          "logprob": -10.1796875,
-          "text": "deep"
-        },
-        {
-          "id": 5168,
-          "logprob": -0.97265625,
-          "text": "learning"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.44189453,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -12.34375,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -0.96728516,
-          "text": "is"
-        },
-        {
-          "id": 3534,
-          "logprob": -10.1796875,
-          "text": "deep"
-        },
-        {
-          "id": 5168,
-          "logprob": -0.97265625,
-          "text": "learning"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.44189453,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -12.34375,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -0.96728516,
-          "text": "is"
-        },
-        {
-          "id": 3534,
-          "logprob": -10.1796875,
-          "text": "deep"
-        },
-        {
-          "id": 5168,
-          "logprob": -0.97265625,
-          "text": "learning"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.44189453,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json
@ -3,38 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 1824,
-        "logprob": -9.2890625,
-        "text": "What"
-      },
-      {
-        "id": 349,
-        "logprob": -1.1503906,
-        "text": "is"
-      },
-      {
-        "id": 3534,
-        "logprob": -9.5859375,
-        "text": "deep"
-      },
-      {
-        "id": 5168,
-        "logprob": -1.3945312,
-        "text": "learning"
-      },
-      {
-        "id": 28804,
-        "logprob": -0.4555664,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1,
-        "logprob": null,
-        "text": "<s>"
-      },
-      {
-        "id": 349,
-        "logprob": -12.0546875,
-        "text": "is"
-      },
-      {
-        "id": 3534,
-        "logprob": -10.53125,
-        "text": "deep"
-      },
-      {
-        "id": 5168,
-        "logprob": -2.71875,
-        "text": "learning"
-      },
-      {
-        "id": 28804,
-        "logprob": -5.0078125,
-        "text": "?"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json
+++ b/integration-tests/models/snapshots/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json
@ -4,38 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -9.2890625,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -1.1503906,
-          "text": "is"
-        },
-        {
-          "id": 3534,
-          "logprob": -9.5859375,
-          "text": "deep"
-        },
-        {
-          "id": 5168,
-          "logprob": -1.3945312,
-          "text": "learning"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.4555664,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -108,38 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -9.2890625,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -1.1425781,
-          "text": "is"
-        },
-        {
-          "id": 3534,
-          "logprob": -9.59375,
-          "text": "deep"
-        },
-        {
-          "id": 5168,
-          "logprob": -1.390625,
-          "text": "learning"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.45532227,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -212,38 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -9.2890625,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -1.1425781,
-          "text": "is"
-        },
-        {
-          "id": 3534,
-          "logprob": -9.59375,
-          "text": "deep"
-        },
-        {
-          "id": 5168,
-          "logprob": -1.390625,
-          "text": "learning"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.45532227,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -316,38 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 1824,
-          "logprob": -9.2890625,
-          "text": "What"
-        },
-        {
-          "id": 349,
-          "logprob": -1.1425781,
-          "text": "is"
-        },
-        {
-          "id": 3534,
-          "logprob": -9.59375,
-          "text": "deep"
-        },
-        {
-          "id": 5168,
-          "logprob": -1.390625,
-          "text": "learning"
-        },
-        {
-          "id": 28804,
-          "logprob": -0.45532227,
-          "text": "?"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_neox/test_flash_neox.json
+++ b/integration-tests/models/snapshots/test_flash_neox/test_flash_neox.json
@ -3,48 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 50278,
-        "logprob": null,
-        "text": "<|USER|>"
-      },
-      {
-        "id": 1276,
-        "logprob": -4.5546875,
-        "text": "What"
-      },
-      {
-        "id": 434,
-        "logprob": -4.234375,
-        "text": "'s"
-      },
-      {
-        "id": 634,
-        "logprob": -5.1054688,
-        "text": " your"
-      },
-      {
-        "id": 12315,
-        "logprob": -9.953125,
-        "text": " mood"
-      },
-      {
-        "id": 3063,
-        "logprob": -4.0820312,
-        "text": " today"
-      },
-      {
-        "id": 32,
-        "logprob": -0.15148926,
-        "text": "?"
-      },
-      {
-        "id": 50279,
-        "logprob": -0.27026367,
-        "text": "<|ASSISTANT|>"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_neox/test_flash_neox_load.json
+++ b/integration-tests/models/snapshots/test_flash_neox/test_flash_neox_load.json
@ -4,48 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|USER|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -4.5546875,
-          "text": "What"
-        },
-        {
-          "id": 434,
-          "logprob": -4.234375,
-          "text": "'s"
-        },
-        {
-          "id": 634,
-          "logprob": -5.21875,
-          "text": " your"
-        },
-        {
-          "id": 12315,
-          "logprob": -9.9375,
-          "text": " mood"
-        },
-        {
-          "id": 3063,
-          "logprob": -4.1015625,
-          "text": " today"
-        },
-        {
-          "id": 32,
-          "logprob": -0.15319824,
-          "text": "?"
-        },
-        {
-          "id": 50279,
-          "logprob": -0.2614746,
-          "text": "<|ASSISTANT|>"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -117,48 +76,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|USER|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -4.5546875,
-          "text": "What"
-        },
-        {
-          "id": 434,
-          "logprob": -4.234375,
-          "text": "'s"
-        },
-        {
-          "id": 634,
-          "logprob": -5.1054688,
-          "text": " your"
-        },
-        {
-          "id": 12315,
-          "logprob": -9.953125,
-          "text": " mood"
-        },
-        {
-          "id": 3063,
-          "logprob": -4.0820312,
-          "text": " today"
-        },
-        {
-          "id": 32,
-          "logprob": -0.15148926,
-          "text": "?"
-        },
-        {
-          "id": 50279,
-          "logprob": -0.27026367,
-          "text": "<|ASSISTANT|>"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -230,48 +148,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|USER|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -4.5546875,
-          "text": "What"
-        },
-        {
-          "id": 434,
-          "logprob": -4.234375,
-          "text": "'s"
-        },
-        {
-          "id": 634,
-          "logprob": -5.21875,
-          "text": " your"
-        },
-        {
-          "id": 12315,
-          "logprob": -9.9375,
-          "text": " mood"
-        },
-        {
-          "id": 3063,
-          "logprob": -4.1015625,
-          "text": " today"
-        },
-        {
-          "id": 32,
-          "logprob": -0.15319824,
-          "text": "?"
-        },
-        {
-          "id": 50279,
-          "logprob": -0.2614746,
-          "text": "<|ASSISTANT|>"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -343,48 +220,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|USER|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -4.5546875,
-          "text": "What"
-        },
-        {
-          "id": 434,
-          "logprob": -4.234375,
-          "text": "'s"
-        },
-        {
-          "id": 634,
-          "logprob": -5.21875,
-          "text": " your"
-        },
-        {
-          "id": 12315,
-          "logprob": -9.9375,
-          "text": " mood"
-        },
-        {
-          "id": 3063,
-          "logprob": -4.1015625,
-          "text": " today"
-        },
-        {
-          "id": 32,
-          "logprob": -0.15319824,
-          "text": "?"
-        },
-        {
-          "id": 50279,
-          "logprob": -0.2614746,
-          "text": "<|ASSISTANT|>"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_neox_sharded/test_flash_neox.json
+++ b/integration-tests/models/snapshots/test_flash_neox_sharded/test_flash_neox.json
@ -3,98 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 50278,
-        "logprob": null,
-        "text": "<|prompter|>"
-      },
-      {
-        "id": 1276,
-        "logprob": -8.03125,
-        "text": "What"
-      },
-      {
-        "id": 310,
-        "logprob": -5.421875,
-        "text": " is"
-      },
-      {
-        "id": 247,
-        "logprob": -2.1601562,
-        "text": " a"
-      },
-      {
-        "id": 1167,
-        "logprob": -5.4609375,
-        "text": " mem"
-      },
-      {
-        "id": 70,
-        "logprob": -0.005657196,
-        "text": "e"
-      },
-      {
-        "id": 13,
-        "logprob": -7.28125,
-        "text": ","
-      },
-      {
-        "id": 285,
-        "logprob": -0.2980957,
-        "text": " and"
-      },
-      {
-        "id": 752,
-        "logprob": -2.1679688,
-        "text": " what"
-      },
-      {
-        "id": 434,
-        "logprob": -5.6210938,
-        "text": "'s"
-      },
-      {
-        "id": 253,
-        "logprob": -0.81103516,
-        "text": " the"
-      },
-      {
-        "id": 2892,
-        "logprob": -6.6640625,
-        "text": " history"
-      },
-      {
-        "id": 3212,
-        "logprob": -2.265625,
-        "text": " behind"
-      },
-      {
-        "id": 436,
-        "logprob": -11.5078125,
-        "text": " this"
-      },
-      {
-        "id": 3159,
-        "logprob": -2.1582031,
-        "text": " word"
-      },
-      {
-        "id": 32,
-        "logprob": -0.008720398,
-        "text": "?"
-      },
-      {
-        "id": 0,
-        "logprob": -2.4726562,
-        "text": "<|endoftext|>"
-      },
-      {
-        "id": 50281,
-        "logprob": -18.265625,
-        "text": "<|assistant|>"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_neox_sharded/test_flash_neox_load.json
+++ b/integration-tests/models/snapshots/test_flash_neox_sharded/test_flash_neox_load.json
@ -4,98 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|prompter|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -8.03125,
-          "text": "What"
-        },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1601562,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.4609375,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005657196,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.28125,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.2980957,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1679688,
-          "text": " what"
-        },
-        {
-          "id": 434,
-          "logprob": -5.6210938,
-          "text": "'s"
-        },
-        {
-          "id": 253,
-          "logprob": -0.81103516,
-          "text": " the"
-        },
-        {
-          "id": 2892,
-          "logprob": -6.6640625,
-          "text": " history"
-        },
-        {
-          "id": 3212,
-          "logprob": -2.265625,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.5078125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1582031,
-          "text": " word"
-        },
-        {
-          "id": 32,
-          "logprob": -0.008720398,
-          "text": "?"
-        },
-        {
-          "id": 0,
-          "logprob": -2.4726562,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.265625,
-          "text": "<|assistant|>"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -167,98 +76,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|prompter|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -8.03125,
-          "text": "What"
-        },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1601562,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.4609375,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005657196,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.28125,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.2980957,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1679688,
-          "text": " what"
-        },
-        {
-          "id": 434,
-          "logprob": -5.6210938,
-          "text": "'s"
-        },
-        {
-          "id": 253,
-          "logprob": -0.81103516,
-          "text": " the"
-        },
-        {
-          "id": 2892,
-          "logprob": -6.6640625,
-          "text": " history"
-        },
-        {
-          "id": 3212,
-          "logprob": -2.265625,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.5078125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1582031,
-          "text": " word"
-        },
-        {
-          "id": 32,
-          "logprob": -0.008720398,
-          "text": "?"
-        },
-        {
-          "id": 0,
-          "logprob": -2.4726562,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.265625,
-          "text": "<|assistant|>"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -330,98 +148,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|prompter|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -8.03125,
-          "text": "What"
-        },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1601562,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.4609375,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005657196,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.28125,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.2980957,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1679688,
-          "text": " what"
-        },
-        {
-          "id": 434,
-          "logprob": -5.6210938,
-          "text": "'s"
-        },
-        {
-          "id": 253,
-          "logprob": -0.81103516,
-          "text": " the"
-        },
-        {
-          "id": 2892,
-          "logprob": -6.6640625,
-          "text": " history"
-        },
-        {
-          "id": 3212,
-          "logprob": -2.265625,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.5078125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1582031,
-          "text": " word"
-        },
-        {
-          "id": 32,
-          "logprob": -0.008720398,
-          "text": "?"
-        },
-        {
-          "id": 0,
-          "logprob": -2.4726562,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.265625,
-          "text": "<|assistant|>"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -493,98 +220,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|prompter|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -8.03125,
-          "text": "What"
-        },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1601562,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.4609375,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005657196,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.28125,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.2980957,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1679688,
-          "text": " what"
-        },
-        {
-          "id": 434,
-          "logprob": -5.6210938,
-          "text": "'s"
-        },
-        {
-          "id": 253,
-          "logprob": -0.81103516,
-          "text": " the"
-        },
-        {
-          "id": 2892,
-          "logprob": -6.6640625,
-          "text": " history"
-        },
-        {
-          "id": 3212,
-          "logprob": -2.265625,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.5078125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1582031,
-          "text": " word"
-        },
-        {
-          "id": 32,
-          "logprob": -0.008720398,
-          "text": "?"
-        },
-        {
-          "id": 0,
-          "logprob": -2.4726562,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.265625,
-          "text": "<|assistant|>"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_phi/test_flash_phi.json
+++ b/integration-tests/models/snapshots/test_flash_phi/test_flash_phi.json
@ -3,18 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 14402,
-        "logprob": null,
-        "text": "Test"
-      },
-      {
-        "id": 2581,
-        "logprob": -11.6171875,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_phi/test_flash_phi_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_phi/test_flash_phi_all_params.json
@ -3,18 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "stop_sequence",
    "generated_tokens": 6,
-    "prefill": [
-      {
-        "id": 14402,
-        "logprob": null,
-        "text": "Test"
-      },
-      {
-        "id": 2581,
-        "logprob": -11.6171875,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_phi/test_flash_phi_load.json
+++ b/integration-tests/models/snapshots/test_flash_phi/test_flash_phi_load.json
@ -4,18 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 14402,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 2581,
-          "logprob": -11.6171875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -88,18 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 14402,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 2581,
-          "logprob": -11.6171875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -172,18 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 14402,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 2581,
-          "logprob": -11.6171875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -256,18 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 14402,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 2581,
-          "logprob": -11.6171875,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_phi35_moe/test_flash_phi35_moe.json
+++ b/integration-tests/models/snapshots/test_flash_phi35_moe/test_flash_phi35_moe.json
@ -3,43 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 1724,
-        "logprob": null,
-        "text": "What"
-      },
-      {
-        "id": 338,
-        "logprob": -0.6201172,
-        "text": "is"
-      },
-      {
-        "id": 16030,
-        "logprob": -13.6484375,
-        "text": "gradient"
-      },
-      {
-        "id": 26815,
-        "logprob": -0.003894806,
-        "text": "descent"
-      },
-      {
-        "id": 29973,
-        "logprob": -2.6386719,
-        "text": "?"
-      },
-      {
-        "id": 13,
-        "logprob": -6.46875,
-        "text": "\n"
-      },
-      {
-        "id": 13,
-        "logprob": -6.6875,
-        "text": "\n"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json
@ -3,33 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 338,
-        "logprob": null,
-        "text": "is"
-      },
-      {
-        "id": 16030,
-        "logprob": -13.328125,
-        "text": "gradient"
-      },
-      {
-        "id": 26815,
-        "logprob": -0.24023438,
-        "text": "descent"
-      },
-      {
-        "id": 29973,
-        "logprob": -3.1386719,
-        "text": "?"
-      },
-      {
-        "id": 13,
-        "logprob": -3.0878906,
-        "text": "\n"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_phi35_moe/test_flash_phi35_moe_load.json
+++ b/integration-tests/models/snapshots/test_flash_phi35_moe/test_flash_phi35_moe_load.json
@ -4,43 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1724,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -0.6201172,
-          "text": "is"
-        },
-        {
-          "id": 16030,
-          "logprob": -13.6484375,
-          "text": "gradient"
-        },
-        {
-          "id": 26815,
-          "logprob": -0.003894806,
-          "text": "descent"
-        },
-        {
-          "id": 29973,
-          "logprob": -2.6386719,
-          "text": "?"
-        },
-        {
-          "id": 13,
-          "logprob": -6.46875,
-          "text": "\n"
-        },
-        {
-          "id": 13,
-          "logprob": -6.6875,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -113,43 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1724,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -0.6113281,
-          "text": "is"
-        },
-        {
-          "id": 16030,
-          "logprob": -13.6640625,
-          "text": "gradient"
-        },
-        {
-          "id": 26815,
-          "logprob": -0.003929138,
-          "text": "descent"
-        },
-        {
-          "id": 29973,
-          "logprob": -2.625,
-          "text": "?"
-        },
-        {
-          "id": 13,
-          "logprob": -6.484375,
-          "text": "\n"
-        },
-        {
-          "id": 13,
-          "logprob": -6.6875,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -222,43 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1724,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -0.609375,
-          "text": "is"
-        },
-        {
-          "id": 16030,
-          "logprob": -13.671875,
-          "text": "gradient"
-        },
-        {
-          "id": 26815,
-          "logprob": -0.0040016174,
-          "text": "descent"
-        },
-        {
-          "id": 29973,
-          "logprob": -2.6230469,
-          "text": "?"
-        },
-        {
-          "id": 13,
-          "logprob": -6.453125,
-          "text": "\n"
-        },
-        {
-          "id": 13,
-          "logprob": -6.6875,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -331,43 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1724,
-          "logprob": null,
-          "text": "What"
-        },
-        {
-          "id": 338,
-          "logprob": -0.609375,
-          "text": "is"
-        },
-        {
-          "id": 16030,
-          "logprob": -13.6640625,
-          "text": "gradient"
-        },
-        {
-          "id": 26815,
-          "logprob": -0.0038967133,
-          "text": "descent"
-        },
-        {
-          "id": 29973,
-          "logprob": -2.6347656,
-          "text": "?"
-        },
-        {
-          "id": 13,
-          "logprob": -6.453125,
-          "text": "\n"
-        },
-        {
-          "id": 13,
-          "logprob": -6.6875,
-          "text": "\n"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_qwen2/test_flash_qwen2.json
+++ b/integration-tests/models/snapshots/test_flash_qwen2/test_flash_qwen2.json
@ -3,18 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2271,
-        "logprob": null,
-        "text": "Test"
-      },
-      {
-        "id": 1681,
-        "logprob": -8.8515625,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_qwen2/test_flash_qwen2_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_qwen2/test_flash_qwen2_all_params.json
@ -3,18 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 2271,
-        "logprob": null,
-        "text": "Test"
-      },
-      {
-        "id": 1681,
-        "logprob": -8.8515625,
-        "text": " request"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_qwen2/test_flash_qwen2_load.json
+++ b/integration-tests/models/snapshots/test_flash_qwen2/test_flash_qwen2_load.json
@ -4,18 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2271,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1681,
-          "logprob": -8.8515625,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -88,18 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2271,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1681,
-          "logprob": -8.8515625,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -172,18 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2271,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1681,
-          "logprob": -8.8515625,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -256,18 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 2271,
-          "logprob": null,
-          "text": "Test"
-        },
-        {
-          "id": 1681,
-          "logprob": -8.8515625,
-          "text": " request"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_santacoder/test_flash_santacoder.json
+++ b/integration-tests/models/snapshots/test_flash_santacoder/test_flash_santacoder.json
@ -3,28 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 563,
-        "logprob": null,
-        "text": "def"
-      },
-      {
-        "id": 942,
-        "logprob": -5.1367188,
-        "text": " print"
-      },
-      {
-        "id": 62,
-        "logprob": -0.24450684,
-        "text": "_"
-      },
-      {
-        "id": 7196,
-        "logprob": -6.9609375,
-        "text": "hello"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_santacoder/test_flash_santacoder_load.json
+++ b/integration-tests/models/snapshots/test_flash_santacoder/test_flash_santacoder_load.json
@ -4,28 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 563,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 942,
-          "logprob": -5.1367188,
-          "text": " print"
-        },
-        {
-          "id": 62,
-          "logprob": -0.24450684,
-          "text": "_"
-        },
-        {
-          "id": 7196,
-          "logprob": -6.9609375,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -97,28 +76,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 563,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 942,
-          "logprob": -5.1367188,
-          "text": " print"
-        },
-        {
-          "id": 62,
-          "logprob": -0.24450684,
-          "text": "_"
-        },
-        {
-          "id": 7196,
-          "logprob": -6.9609375,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -190,28 +148,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 563,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 942,
-          "logprob": -5.1367188,
-          "text": " print"
-        },
-        {
-          "id": 62,
-          "logprob": -0.24450684,
-          "text": "_"
-        },
-        {
-          "id": 7196,
-          "logprob": -6.9609375,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -283,28 +220,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 563,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 942,
-          "logprob": -5.1367188,
-          "text": " print"
-        },
-        {
-          "id": 62,
-          "logprob": -0.24450684,
-          "text": "_"
-        },
-        {
-          "id": 7196,
-          "logprob": -6.9609375,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_starcoder/test_flash_starcoder.json
+++ b/integration-tests/models/snapshots/test_flash_starcoder/test_flash_starcoder.json
@ -3,28 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 589,
-        "logprob": null,
-        "text": "def"
-      },
-      {
-        "id": 1459,
-        "logprob": -5.6289062,
-        "text": " print"
-      },
-      {
-        "id": 81,
-        "logprob": -1.6005859,
-        "text": "_"
-      },
-      {
-        "id": 7656,
-        "logprob": -5.9921875,
-        "text": "hello"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_starcoder/test_flash_starcoder_default_params.json
+++ b/integration-tests/models/snapshots/test_flash_starcoder/test_flash_starcoder_default_params.json
@ -3,28 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 60,
-    "prefill": [
-      {
-        "id": 589,
-        "logprob": null,
-        "text": "def"
-      },
-      {
-        "id": 1459,
-        "logprob": -5.625,
-        "text": " print"
-      },
-      {
-        "id": 81,
-        "logprob": -1.6064453,
-        "text": "_"
-      },
-      {
-        "id": 7656,
-        "logprob": -5.9921875,
-        "text": "hello"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_starcoder/test_flash_starcoder_load.json
+++ b/integration-tests/models/snapshots/test_flash_starcoder/test_flash_starcoder_load.json
@ -4,28 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 589,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 1459,
-          "logprob": -5.6289062,
-          "text": " print"
-        },
-        {
-          "id": 81,
-          "logprob": -1.6005859,
-          "text": "_"
-        },
-        {
-          "id": 7656,
-          "logprob": -5.9921875,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -97,28 +76,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 589,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 1459,
-          "logprob": -5.6289062,
-          "text": " print"
-        },
-        {
-          "id": 81,
-          "logprob": -1.6005859,
-          "text": "_"
-        },
-        {
-          "id": 7656,
-          "logprob": -5.9921875,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -190,28 +148,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 589,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 1459,
-          "logprob": -5.6289062,
-          "text": " print"
-        },
-        {
-          "id": 81,
-          "logprob": -1.6005859,
-          "text": "_"
-        },
-        {
-          "id": 7656,
-          "logprob": -5.9921875,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -283,28 +220,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 589,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 1459,
-          "logprob": -5.6289062,
-          "text": " print"
-        },
-        {
-          "id": 81,
-          "logprob": -1.6005859,
-          "text": "_"
-        },
-        {
-          "id": 7656,
-          "logprob": -5.9921875,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_starcoder2/test_flash_starcoder2.json
+++ b/integration-tests/models/snapshots/test_flash_starcoder2/test_flash_starcoder2.json
@ -3,28 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
-    "prefill": [
-      {
-        "id": 610,
-        "logprob": null,
-        "text": "def"
-      },
-      {
-        "id": 1489,
-        "logprob": -5.2617188,
-        "text": " print"
-      },
-      {
-        "id": 100,
-        "logprob": -0.38476562,
-        "text": "_"
-      },
-      {
-        "id": 7670,
-        "logprob": -7.640625,
-        "text": "hello"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_starcoder2/test_flash_starcoder2_default_params.json
+++ b/integration-tests/models/snapshots/test_flash_starcoder2/test_flash_starcoder2_default_params.json
@ -3,28 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 60,
-    "prefill": [
-      {
-        "id": 610,
-        "logprob": null,
-        "text": "def"
-      },
-      {
-        "id": 1489,
-        "logprob": -5.265625,
-        "text": " print"
-      },
-      {
-        "id": 100,
-        "logprob": -0.38305664,
-        "text": "_"
-      },
-      {
-        "id": 7670,
-        "logprob": -7.640625,
-        "text": "hello"
-      }
-    ],
+    "prefill": [],
    "seed": 0,
    "tokens": [
      {
--- a/integration-tests/models/snapshots/test_flash_starcoder2/test_flash_starcoder2_load.json
+++ b/integration-tests/models/snapshots/test_flash_starcoder2/test_flash_starcoder2_load.json
@ -4,28 +4,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 610,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 1489,
-          "logprob": -5.2617188,
-          "text": " print"
-        },
-        {
-          "id": 100,
-          "logprob": -0.38476562,
-          "text": "_"
-        },
-        {
-          "id": 7670,
-          "logprob": -7.640625,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -98,28 +77,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 610,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 1489,
-          "logprob": -5.2617188,
-          "text": " print"
-        },
-        {
-          "id": 100,
-          "logprob": -0.38476562,
-          "text": "_"
-        },
-        {
-          "id": 7670,
-          "logprob": -7.640625,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -192,28 +150,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 610,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 1489,
-          "logprob": -5.2617188,
-          "text": " print"
-        },
-        {
-          "id": 100,
-          "logprob": -0.38476562,
-          "text": "_"
-        },
-        {
-          "id": 7670,
-          "logprob": -7.640625,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
@ -286,28 +223,7 @@
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 610,
-          "logprob": null,
-          "text": "def"
-        },
-        {
-          "id": 1489,
-          "logprob": -5.2617188,
-          "text": " print"
-        },
-        {
-          "id": 100,
-          "logprob": -0.38476562,
-          "text": "_"
-        },
-        {
-          "id": 7670,
-          "logprob": -7.640625,
-          "text": "hello"
-        }
-      ],
+      "prefill": [],
      "seed": null,
      "tokens": [
        {
--- a/integration-tests/models/snapshots/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json
+++ b/integration-tests/models/snapshots/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json
@ -3,68 +3,7 @@
    "best_of_sequences": null,
    "finish_reason": "eos_token",
    "generated_tokens": 2,
-    "prefill": [
-      {
-        "id": 589,
-        "logprob": null,
-        "text": "def"
-      },
-      {
-        "id": 3226,
-        "logprob": -9.0234375,
-        "text": " ge"
-      },
-      {
-        "id": 21017,
-        "logprob": -9.0859375,
-        "text": "ometric"
-      },
-      {
-        "id": 81,
-        "logprob": -0.25585938,
-        "text": "_"
-      },
-      {
-        "id": 6009,
-        "logprob": -2.1972656,
-        "text": "mean"
-      },
-      {
-        "id": 26,
-        "logprob": -0.2998047,
-        "text": "("
-      },
-      {
-        "id": 62,
-        "logprob": -5.6445312,
-        "text": "L"
-      },
-      {
-        "id": 44,
-        "logprob": -3.0839844,
-        "text": ":"
-      },
-      {
-        "id": 1682,
-        "logprob": -0.6748047,
-        "text": " List"
-      },
-      {
-        "id": 77,
-        "logprob": -0.3864746,
-        "text": "["
-      },
-      {
-        "id": 1808,
-        "logprob": -0.9355469,
-        "text": "float"
-      },
-      {
-        "id": 10794,
-        "logprob": -2.5371094,
-        "text": "]):"
-      }
-    ],
+    "prefill": [],
    "seed": null,
    "tokens": [
      {
--- a/Show More
+++ b/Show More