Remove the stripping of the prefix space (and any other mangling that tokenizers might do). (#1065)

Superseed #1024 # What does this PR do?   Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR.  --------- Co-authored-by: bangoz <ch_xie@pku.edu.cn>
2023-09-27 12:13:45 +02:00 · 2023-09-27 12:13:45 +02:00 · b32e9ce9d5
parent 95a4bb696a
commit b32e9ce9d5
10 changed files with 350 additions and 341 deletions
--- a/integration-tests/models/snapshots/test_flash_llama/test_flash_llama.json
+++ b/integration-tests/models/snapshots/test_flash_llama/test_flash_llama.json
@ -16,7 +16,7 @@
      },
      {
        "id": 2009,
-        "logprob": -11.5546875,
+        "logprob": -11.546875,
        "text": "request"
      }
    ],
@ -24,65 +24,66 @@
    "tokens": [
      {
        "id": 363,
-        "logprob": -1.5380859,
+        "logprob": -1.5351562,
        "special": false,
        "text": " for"
      },
      {
        "id": 847,
-        "logprob": -2.5917969,
+        "logprob": -2.5722656,
        "special": false,
        "text": " /"
      },
      {
        "id": 2754,
-        "logprob": -2.2773438,
+        "logprob": -2.2714844,
        "special": false,
        "text": "api"
      },
      {
        "id": 29914,
-        "logprob": -0.034362793,
+        "logprob": -0.03414917,
        "special": false,
        "text": "/"
      },
      {
        "id": 29894,
-        "logprob": -0.96533203,
+        "logprob": -0.95996094,
        "special": false,
        "text": "v"
      },
      {
        "id": 29896,
-        "logprob": -0.36669922,
+        "logprob": -0.3635254,
        "special": false,
        "text": "1"
      },
      {
        "id": 29914,
-        "logprob": -0.013122559,
+        "logprob": -0.013031006,
        "special": false,
        "text": "/"
      },
      {
        "id": 16418,
-        "logprob": -3.1503906,
+        "logprob": -3.1523438,
        "special": false,
        "text": "projects"
      },
      {
        "id": 29914,
-        "logprob": -0.43652344,
+        "logprob": -0.43701172,
        "special": false,
        "text": "/"
      },
      {
        "id": 29896,
-        "logprob": -1.9404297,
+        "logprob": -1.9394531,
        "special": false,
        "text": "1"
      }
-    ]
+    ],
+    "top_tokens": null
  },
  "generated_text": " for /api/v1/projects/1"
 }
--- a/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_all_params.json
@ -16,7 +16,7 @@
      },
      {
        "id": 2009,
-        "logprob": -11.5546875,
+        "logprob": -11.546875,
        "text": "request"
      }
    ],
@ -24,19 +24,19 @@
    "tokens": [
      {
        "id": 5229,
-        "logprob": -2.5683594,
+        "logprob": -2.5839844,
        "special": false,
        "text": " failed"
      },
      {
        "id": 29901,
-        "logprob": -0.45336914,
+        "logprob": -0.44970703,
        "special": false,
        "text": ":"
      },
      {
        "id": 4829,
-        "logprob": -1.8408203,
+        "logprob": -1.8339844,
        "special": false,
        "text": " Error"
      },
@ -52,7 +52,8 @@
        "special": false,
        "text": " test"
      }
-    ]
+    ],
+    "top_tokens": null
  },
  "generated_text": "Test request failed: Error in test"
 }
--- a/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_load.json
@ -17,7 +17,7 @@
        },
        {
          "id": 2009,
-          "logprob": -11.5546875,
+          "logprob": -11.546875,
          "text": "request"
        }
      ],
@ -25,25 +25,25 @@
      "tokens": [
        {
          "id": 363,
-          "logprob": -1.5380859,
+          "logprob": -1.5351562,
          "special": false,
          "text": " for"
        },
        {
          "id": 847,
-          "logprob": -2.5859375,
+          "logprob": -2.5566406,
          "special": false,
          "text": " /"
        },
        {
          "id": 2754,
-          "logprob": -2.2695312,
+          "logprob": -2.2519531,
          "special": false,
          "text": "api"
        },
        {
          "id": 29914,
-          "logprob": -0.03439331,
+          "logprob": -0.03414917,
          "special": false,
          "text": "/"
        },
@ -55,13 +55,13 @@
        },
        {
          "id": 29896,
-          "logprob": -0.36694336,
+          "logprob": -0.3647461,
          "special": false,
          "text": "1"
        },
        {
          "id": 29914,
-          "logprob": -0.013114929,
+          "logprob": -0.012901306,
          "special": false,
          "text": "/"
        },
@ -73,17 +73,18 @@
        },
        {
          "id": 29914,
-          "logprob": -0.43847656,
+          "logprob": -0.4362793,
          "special": false,
          "text": "/"
        },
        {
          "id": 29896,
-          "logprob": -1.9433594,
+          "logprob": -1.9394531,
          "special": false,
          "text": "1"
        }
-      ]
+      ],
+      "top_tokens": null
    },
    "generated_text": " for /api/v1/projects/1"
  },
@ -105,7 +106,7 @@
        },
        {
          "id": 2009,
-          "logprob": -11.5546875,
+          "logprob": -11.546875,
          "text": "request"
        }
      ],
@ -113,43 +114,43 @@
      "tokens": [
        {
          "id": 363,
-          "logprob": -1.5322266,
+          "logprob": -1.5332031,
          "special": false,
          "text": " for"
        },
        {
          "id": 847,
-          "logprob": -2.5585938,
+          "logprob": -2.5625,
          "special": false,
          "text": " /"
        },
        {
          "id": 2754,
-          "logprob": -2.265625,
+          "logprob": -2.2617188,
          "special": false,
          "text": "api"
        },
        {
          "id": 29914,
-          "logprob": -0.034088135,
+          "logprob": -0.033996582,
          "special": false,
          "text": "/"
        },
        {
          "id": 29894,
-          "logprob": -0.96240234,
+          "logprob": -0.9609375,
          "special": false,
          "text": "v"
        },
        {
          "id": 29896,
-          "logprob": -0.36816406,
+          "logprob": -0.36572266,
          "special": false,
          "text": "1"
        },
        {
          "id": 29914,
-          "logprob": -0.013191223,
+          "logprob": -0.0129776,
          "special": false,
          "text": "/"
        },
@ -161,17 +162,18 @@
        },
        {
          "id": 29914,
-          "logprob": -0.43774414,
+          "logprob": -0.4362793,
          "special": false,
          "text": "/"
        },
        {
          "id": 29896,
-          "logprob": -1.9443359,
+          "logprob": -1.9394531,
          "special": false,
          "text": "1"
        }
-      ]
+      ],
+      "top_tokens": null
    },
    "generated_text": " for /api/v1/projects/1"
  },
@ -193,7 +195,7 @@
        },
        {
          "id": 2009,
-          "logprob": -11.5546875,
+          "logprob": -11.546875,
          "text": "request"
        }
      ],
@ -201,43 +203,43 @@
      "tokens": [
        {
          "id": 363,
-          "logprob": -1.5322266,
+          "logprob": -1.5332031,
          "special": false,
          "text": " for"
        },
        {
          "id": 847,
-          "logprob": -2.5585938,
+          "logprob": -2.5625,
          "special": false,
          "text": " /"
        },
        {
          "id": 2754,
-          "logprob": -2.265625,
+          "logprob": -2.2617188,
          "special": false,
          "text": "api"
        },
        {
          "id": 29914,
-          "logprob": -0.034088135,
+          "logprob": -0.033996582,
          "special": false,
          "text": "/"
        },
        {
          "id": 29894,
-          "logprob": -0.96240234,
+          "logprob": -0.9609375,
          "special": false,
          "text": "v"
        },
        {
          "id": 29896,
-          "logprob": -0.36816406,
+          "logprob": -0.36572266,
          "special": false,
          "text": "1"
        },
        {
          "id": 29914,
-          "logprob": -0.013191223,
+          "logprob": -0.0129776,
          "special": false,
          "text": "/"
        },
@ -249,17 +251,18 @@
        },
        {
          "id": 29914,
-          "logprob": -0.43774414,
+          "logprob": -0.4362793,
          "special": false,
          "text": "/"
        },
        {
          "id": 29896,
-          "logprob": -1.9443359,
+          "logprob": -1.9394531,
          "special": false,
          "text": "1"
        }
-      ]
+      ],
+      "top_tokens": null
    },
    "generated_text": " for /api/v1/projects/1"
  },
@ -281,7 +284,7 @@
        },
        {
          "id": 2009,
-          "logprob": -11.5546875,
+          "logprob": -11.546875,
          "text": "request"
        }
      ],
@ -289,43 +292,43 @@
      "tokens": [
        {
          "id": 363,
-          "logprob": -1.5322266,
+          "logprob": -1.5332031,
          "special": false,
          "text": " for"
        },
        {
          "id": 847,
-          "logprob": -2.5585938,
+          "logprob": -2.5625,
          "special": false,
          "text": " /"
        },
        {
          "id": 2754,
-          "logprob": -2.265625,
+          "logprob": -2.2617188,
          "special": false,
          "text": "api"
        },
        {
          "id": 29914,
-          "logprob": -0.034088135,
+          "logprob": -0.033996582,
          "special": false,
          "text": "/"
        },
        {
          "id": 29894,
-          "logprob": -0.96240234,
+          "logprob": -0.9609375,
          "special": false,
          "text": "v"
        },
        {
          "id": 29896,
-          "logprob": -0.36816406,
+          "logprob": -0.36572266,
          "special": false,
          "text": "1"
        },
        {
          "id": 29914,
-          "logprob": -0.013191223,
+          "logprob": -0.0129776,
          "special": false,
          "text": "/"
        },
@ -337,17 +340,18 @@
        },
        {
          "id": 29914,
-          "logprob": -0.43774414,
+          "logprob": -0.4362793,
          "special": false,
          "text": "/"
        },
        {
          "id": 29896,
-          "logprob": -1.9443359,
+          "logprob": -1.9394531,
          "special": false,
          "text": "1"
        }
-      ]
+      ],
+      "top_tokens": null
    },
    "generated_text": " for /api/v1/projects/1"
  }
--- a/integration-tests/models/snapshots/test_idefics/test_idefics.json
+++ b/integration-tests/models/snapshots/test_idefics/test_idefics.json
@ -11,22 +11,22 @@
      },
      {
        "id": 4911,
-        "logprob": -5.7773438,
+        "logprob": -5.7851562,
        "text": "User"
      },
      {
        "id": 29901,
-        "logprob": -0.0069999695,
+        "logprob": -0.006996155,
        "text": ":"
      },
      {
        "id": 32000,
-        "logprob": -0.8125,
+        "logprob": -0.81347656,
        "text": "<fake_token_around_image>"
      },
      {
        "id": 32001,
-        "logprob": -6.651878e-05,
+        "logprob": -6.687641e-05,
        "text": "<image>"
      },
      {
@ -36,67 +36,67 @@
      },
      {
        "id": 1815,
-        "logprob": -4.2265625,
+        "logprob": -4.2148438,
        "text": "Can"
      },
      {
        "id": 366,
-        "logprob": -0.013977051,
+        "logprob": -0.014137268,
        "text": "you"
      },
      {
        "id": 2649,
-        "logprob": -4.4375,
+        "logprob": -4.4335938,
        "text": "tell"
      },
      {
        "id": 592,
-        "logprob": -0.29077148,
+        "logprob": -0.2919922,
        "text": "me"
      },
      {
        "id": 263,
-        "logprob": -4.2109375,
+        "logprob": -4.2070312,
        "text": "a"
      },
      {
        "id": 1407,
-        "logprob": -9.4296875,
+        "logprob": -9.421875,
        "text": "very"
      },
      {
        "id": 3273,
-        "logprob": -1.8671875,
+        "logprob": -1.8720703,
        "text": "short"
      },
      {
        "id": 5828,
-        "logprob": -0.26586914,
+        "logprob": -0.26489258,
        "text": "story"
      },
      {
        "id": 2729,
-        "logprob": -3.7460938,
+        "logprob": -3.7441406,
        "text": "based"
      },
      {
        "id": 373,
-        "logprob": -0.0005350113,
+        "logprob": -0.0005393028,
        "text": "on"
      },
      {
        "id": 278,
-        "logprob": -0.13867188,
+        "logprob": -0.140625,
        "text": "the"
      },
      {
        "id": 1967,
-        "logprob": -0.06842041,
+        "logprob": -0.06756592,
        "text": "image"
      },
      {
        "id": 29973,
-        "logprob": -0.15319824,
+        "logprob": -0.15454102,
        "text": "?"
      }
    ],
@ -104,7 +104,7 @@
    "tokens": [
      {
        "id": 32002,
-        "logprob": -0.0019445419,
+        "logprob": -0.0019140244,
        "special": true,
        "text": "<end_of_utterance>"
      },
@ -116,13 +116,13 @@
      },
      {
        "id": 13,
-        "logprob": -1.7881393e-05,
+        "logprob": -1.7642975e-05,
        "special": false,
        "text": "\n"
      },
      {
        "id": 7900,
-        "logprob": -3.0994415e-06,
+        "logprob": -2.9802322e-06,
        "special": false,
        "text": "Ass"
      },
@ -140,25 +140,25 @@
      },
      {
        "id": 319,
-        "logprob": -0.9057617,
+        "logprob": -0.91064453,
        "special": false,
        "text": " A"
      },
      {
        "id": 696,
-        "logprob": -1.2314453,
+        "logprob": -1.2412109,
        "special": false,
        "text": " ro"
      },
      {
        "id": 15664,
-        "logprob": -0.00024914742,
+        "logprob": -0.0002439022,
        "special": false,
        "text": "oster"
      },
      {
        "id": 15028,
-        "logprob": -1.1621094,
+        "logprob": -1.1630859,
        "special": false,
        "text": " stands"
      }
--- a/integration-tests/models/snapshots/test_idefics/test_idefics_load.json
+++ b/integration-tests/models/snapshots/test_idefics/test_idefics_load.json
@ -1,4 +1,173 @@
 [
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 4911,
+          "logprob": -5.7851562,
+          "text": "User"
+        },
+        {
+          "id": 29901,
+          "logprob": -0.006996155,
+          "text": ":"
+        },
+        {
+          "id": 32000,
+          "logprob": -0.81347656,
+          "text": "<fake_token_around_image>"
+        },
+        {
+          "id": 32001,
+          "logprob": -6.687641e-05,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -3.5762787e-07,
+          "text": "<fake_token_around_image>"
+        },
+        {
+          "id": 1815,
+          "logprob": -4.2148438,
+          "text": "Can"
+        },
+        {
+          "id": 366,
+          "logprob": -0.014137268,
+          "text": "you"
+        },
+        {
+          "id": 2649,
+          "logprob": -4.4335938,
+          "text": "tell"
+        },
+        {
+          "id": 592,
+          "logprob": -0.2919922,
+          "text": "me"
+        },
+        {
+          "id": 263,
+          "logprob": -4.2070312,
+          "text": "a"
+        },
+        {
+          "id": 1407,
+          "logprob": -9.421875,
+          "text": "very"
+        },
+        {
+          "id": 3273,
+          "logprob": -1.8720703,
+          "text": "short"
+        },
+        {
+          "id": 5828,
+          "logprob": -0.26489258,
+          "text": "story"
+        },
+        {
+          "id": 2729,
+          "logprob": -3.7441406,
+          "text": "based"
+        },
+        {
+          "id": 373,
+          "logprob": -0.0005393028,
+          "text": "on"
+        },
+        {
+          "id": 278,
+          "logprob": -0.140625,
+          "text": "the"
+        },
+        {
+          "id": 1967,
+          "logprob": -0.06756592,
+          "text": "image"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.15454102,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 32002,
+          "logprob": -0.0019140244,
+          "special": true,
+          "text": "<end_of_utterance>"
+        },
+        {
+          "id": 29871,
+          "logprob": -8.392334e-05,
+          "special": false,
+          "text": " "
+        },
+        {
+          "id": 13,
+          "logprob": -1.7881393e-05,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 7900,
+          "logprob": -2.9802322e-06,
+          "special": false,
+          "text": "Ass"
+        },
+        {
+          "id": 22137,
+          "logprob": 0.0,
+          "special": false,
+          "text": "istant"
+        },
+        {
+          "id": 29901,
+          "logprob": -3.0994415e-06,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 319,
+          "logprob": -0.9057617,
+          "special": false,
+          "text": " A"
+        },
+        {
+          "id": 696,
+          "logprob": -1.2294922,
+          "special": false,
+          "text": " ro"
+        },
+        {
+          "id": 15664,
+          "logprob": -0.00024533272,
+          "special": false,
+          "text": "oster"
+        },
+        {
+          "id": 15028,
+          "logprob": -1.1640625,
+          "special": false,
+          "text": " stands"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " \nAssistant: A rooster stands"
+  },
  {
    "details": {
      "best_of_sequences": null,
@ -17,17 +186,17 @@
        },
        {
          "id": 29901,
-          "logprob": -0.0069999695,
+          "logprob": -0.0070114136,
          "text": ":"
        },
        {
          "id": 32000,
-          "logprob": -0.8125,
+          "logprob": -0.8208008,
          "text": "<fake_token_around_image>"
        },
        {
          "id": 32001,
-          "logprob": -6.651878e-05,
+          "logprob": -6.699562e-05,
          "text": "<image>"
        },
        {
@ -42,17 +211,17 @@
        },
        {
          "id": 366,
-          "logprob": -0.013977051,
+          "logprob": -0.014175415,
          "text": "you"
        },
        {
          "id": 2649,
-          "logprob": -4.4375,
+          "logprob": -4.4296875,
          "text": "tell"
        },
        {
          "id": 592,
-          "logprob": -0.29077148,
+          "logprob": -0.29516602,
          "text": "me"
        },
        {
@ -67,37 +236,37 @@
        },
        {
          "id": 3273,
-          "logprob": -1.8671875,
+          "logprob": -1.8720703,
          "text": "short"
        },
        {
          "id": 5828,
-          "logprob": -0.26586914,
+          "logprob": -0.26879883,
          "text": "story"
        },
        {
          "id": 2729,
-          "logprob": -3.7460938,
+          "logprob": -3.7675781,
          "text": "based"
        },
        {
          "id": 373,
-          "logprob": -0.0005350113,
+          "logprob": -0.0005354881,
          "text": "on"
        },
        {
          "id": 278,
-          "logprob": -0.13867188,
+          "logprob": -0.13671875,
          "text": "the"
        },
        {
          "id": 1967,
-          "logprob": -0.06842041,
+          "logprob": -0.06719971,
          "text": "image"
        },
        {
          "id": 29973,
-          "logprob": -0.15319824,
+          "logprob": -0.15551758,
          "text": "?"
        }
      ],
@ -105,13 +274,13 @@
      "tokens": [
        {
          "id": 32002,
-          "logprob": -0.0019445419,
+          "logprob": -0.0019130707,
          "special": true,
          "text": "<end_of_utterance>"
        },
        {
          "id": 29871,
-          "logprob": -8.416176e-05,
+          "logprob": -8.392334e-05,
          "special": false,
          "text": " "
        },
@ -135,25 +304,25 @@
        },
        {
          "id": 29901,
-          "logprob": -3.2186508e-06,
+          "logprob": -3.0994415e-06,
          "special": false,
          "text": ":"
        },
        {
          "id": 319,
-          "logprob": -0.89941406,
+          "logprob": -0.9013672,
          "special": false,
          "text": " A"
        },
        {
          "id": 696,
-          "logprob": -1.234375,
+          "logprob": -1.2324219,
          "special": false,
          "text": " ro"
        },
        {
          "id": 15664,
-          "logprob": -0.0002465248,
+          "logprob": -0.0002477169,
          "special": false,
          "text": "oster"
        },
@ -181,22 +350,22 @@
        },
        {
          "id": 4911,
-          "logprob": -5.7890625,
+          "logprob": -5.7773438,
          "text": "User"
        },
        {
          "id": 29901,
-          "logprob": -0.0070152283,
+          "logprob": -0.0070114136,
          "text": ":"
        },
        {
          "id": 32000,
-          "logprob": -0.8125,
+          "logprob": -0.8208008,
          "text": "<fake_token_around_image>"
        },
        {
          "id": 32001,
-          "logprob": -6.651878e-05,
+          "logprob": -6.699562e-05,
          "text": "<image>"
        },
        {
@ -211,17 +380,17 @@
        },
        {
          "id": 366,
-          "logprob": -0.014190674,
+          "logprob": -0.014175415,
          "text": "you"
        },
        {
          "id": 2649,
-          "logprob": -4.4140625,
+          "logprob": -4.4296875,
          "text": "tell"
        },
        {
          "id": 592,
-          "logprob": -0.2919922,
+          "logprob": -0.29516602,
          "text": "me"
        },
        {
@ -231,7 +400,7 @@
        },
        {
          "id": 1407,
-          "logprob": -9.4375,
+          "logprob": -9.4296875,
          "text": "very"
        },
        {
@ -241,7 +410,7 @@
        },
        {
          "id": 5828,
-          "logprob": -0.26904297,
+          "logprob": -0.26879883,
          "text": "story"
        },
        {
@ -251,22 +420,22 @@
        },
        {
          "id": 373,
-          "logprob": -0.0005402565,
+          "logprob": -0.0005354881,
          "text": "on"
        },
        {
          "id": 278,
-          "logprob": -0.13867188,
+          "logprob": -0.13671875,
          "text": "the"
        },
        {
          "id": 1967,
-          "logprob": -0.068359375,
+          "logprob": -0.06719971,
          "text": "image"
        },
        {
          "id": 29973,
-          "logprob": -0.15539551,
+          "logprob": -0.15551758,
          "text": "?"
        }
      ],
@ -274,7 +443,7 @@
      "tokens": [
        {
          "id": 32002,
-          "logprob": -0.0019168854,
+          "logprob": -0.001912117,
          "special": true,
          "text": "<end_of_utterance>"
        },
@ -286,7 +455,7 @@
        },
        {
          "id": 13,
-          "logprob": -1.7642975e-05,
+          "logprob": -1.7762184e-05,
          "special": false,
          "text": "\n"
        },
@ -310,25 +479,25 @@
        },
        {
          "id": 319,
-          "logprob": -0.90722656,
+          "logprob": -0.9013672,
          "special": false,
          "text": " A"
        },
        {
          "id": 696,
-          "logprob": -1.2373047,
+          "logprob": -1.2324219,
          "special": false,
          "text": " ro"
        },
        {
          "id": 15664,
-          "logprob": -0.00024938583,
+          "logprob": -0.0002477169,
          "special": false,
          "text": "oster"
        },
        {
          "id": 15028,
-          "logprob": -1.1708984,
+          "logprob": -1.1660156,
          "special": false,
          "text": " stands"
        }
@ -350,22 +519,22 @@
        },
        {
          "id": 4911,
-          "logprob": -5.7890625,
+          "logprob": -5.7773438,
          "text": "User"
        },
        {
          "id": 29901,
-          "logprob": -0.0070152283,
+          "logprob": -0.0070114136,
          "text": ":"
        },
        {
          "id": 32000,
-          "logprob": -0.8125,
+          "logprob": -0.8208008,
          "text": "<fake_token_around_image>"
        },
        {
          "id": 32001,
-          "logprob": -6.663799e-05,
+          "logprob": -6.699562e-05,
          "text": "<image>"
        },
        {
@ -380,17 +549,17 @@
        },
        {
          "id": 366,
-          "logprob": -0.014190674,
+          "logprob": -0.014175415,
          "text": "you"
        },
        {
          "id": 2649,
-          "logprob": -4.4140625,
+          "logprob": -4.4296875,
          "text": "tell"
        },
        {
          "id": 592,
-          "logprob": -0.2919922,
+          "logprob": -0.29516602,
          "text": "me"
        },
        {
@ -400,7 +569,7 @@
        },
        {
          "id": 1407,
-          "logprob": -9.4375,
+          "logprob": -9.4296875,
          "text": "very"
        },
        {
@ -410,7 +579,7 @@
        },
        {
          "id": 5828,
-          "logprob": -0.26904297,
+          "logprob": -0.26879883,
          "text": "story"
        },
        {
@ -420,22 +589,22 @@
        },
        {
          "id": 373,
-          "logprob": -0.0005402565,
+          "logprob": -0.0005354881,
          "text": "on"
        },
        {
          "id": 278,
-          "logprob": -0.13867188,
+          "logprob": -0.13671875,
          "text": "the"
        },
        {
          "id": 1967,
-          "logprob": -0.068359375,
+          "logprob": -0.06719971,
          "text": "image"
        },
        {
          "id": 29973,
-          "logprob": -0.15539551,
+          "logprob": -0.15551758,
          "text": "?"
        }
      ],
@ -443,19 +612,19 @@
      "tokens": [
        {
          "id": 32002,
-          "logprob": -0.0019168854,
+          "logprob": -0.001912117,
          "special": true,
          "text": "<end_of_utterance>"
        },
        {
          "id": 29871,
-          "logprob": -8.404255e-05,
+          "logprob": -8.392334e-05,
          "special": false,
          "text": " "
        },
        {
          "id": 13,
-          "logprob": -1.7642975e-05,
+          "logprob": -1.7762184e-05,
          "special": false,
          "text": "\n"
        },
@ -479,194 +648,25 @@
        },
        {
          "id": 319,
-          "logprob": -0.90722656,
+          "logprob": -0.9013672,
          "special": false,
          "text": " A"
        },
        {
          "id": 696,
-          "logprob": -1.2373047,
+          "logprob": -1.2324219,
          "special": false,
          "text": " ro"
        },
        {
          "id": 15664,
-          "logprob": -0.00024938583,
+          "logprob": -0.0002477169,
          "special": false,
          "text": "oster"
        },
        {
          "id": 15028,
-          "logprob": -1.1708984,
-          "special": false,
-          "text": " stands"
-        }
-      ],
-      "top_tokens": null
-    },
-    "generated_text": "\nAssistant: A rooster stands"
-  },
-  {
-    "details": {
-      "best_of_sequences": null,
-      "finish_reason": "length",
-      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 1,
-          "logprob": null,
-          "text": "<s>"
-        },
-        {
-          "id": 4911,
-          "logprob": -5.7890625,
-          "text": "User"
-        },
-        {
-          "id": 29901,
-          "logprob": -0.0070152283,
-          "text": ":"
-        },
-        {
-          "id": 32000,
-          "logprob": -0.8125,
-          "text": "<fake_token_around_image>"
-        },
-        {
-          "id": 32001,
-          "logprob": -6.663799e-05,
-          "text": "<image>"
-        },
-        {
-          "id": 32000,
-          "logprob": -3.5762787e-07,
-          "text": "<fake_token_around_image>"
-        },
-        {
-          "id": 1815,
-          "logprob": -4.2265625,
-          "text": "Can"
-        },
-        {
-          "id": 366,
-          "logprob": -0.014190674,
-          "text": "you"
-        },
-        {
-          "id": 2649,
-          "logprob": -4.4140625,
-          "text": "tell"
-        },
-        {
-          "id": 592,
-          "logprob": -0.2919922,
-          "text": "me"
-        },
-        {
-          "id": 263,
-          "logprob": -4.2109375,
-          "text": "a"
-        },
-        {
-          "id": 1407,
-          "logprob": -9.4375,
-          "text": "very"
-        },
-        {
-          "id": 3273,
-          "logprob": -1.8720703,
-          "text": "short"
-        },
-        {
-          "id": 5828,
-          "logprob": -0.26904297,
-          "text": "story"
-        },
-        {
-          "id": 2729,
-          "logprob": -3.7675781,
-          "text": "based"
-        },
-        {
-          "id": 373,
-          "logprob": -0.0005402565,
-          "text": "on"
-        },
-        {
-          "id": 278,
-          "logprob": -0.13867188,
-          "text": "the"
-        },
-        {
-          "id": 1967,
-          "logprob": -0.068359375,
-          "text": "image"
-        },
-        {
-          "id": 29973,
-          "logprob": -0.15539551,
-          "text": "?"
-        }
-      ],
-      "seed": null,
-      "tokens": [
-        {
-          "id": 32002,
-          "logprob": -0.0019159317,
-          "special": true,
-          "text": "<end_of_utterance>"
-        },
-        {
-          "id": 29871,
-          "logprob": -8.404255e-05,
-          "special": false,
-          "text": " "
-        },
-        {
-          "id": 13,
-          "logprob": -1.7642975e-05,
-          "special": false,
-          "text": "\n"
-        },
-        {
-          "id": 7900,
-          "logprob": -3.0994415e-06,
-          "special": false,
-          "text": "Ass"
-        },
-        {
-          "id": 22137,
-          "logprob": 0.0,
-          "special": false,
-          "text": "istant"
-        },
-        {
-          "id": 29901,
-          "logprob": -3.0994415e-06,
-          "special": false,
-          "text": ":"
-        },
-        {
-          "id": 319,
-          "logprob": -0.90722656,
-          "special": false,
-          "text": " A"
-        },
-        {
-          "id": 696,
-          "logprob": -1.2373047,
-          "special": false,
-          "text": " ro"
-        },
-        {
-          "id": 15664,
-          "logprob": -0.00024938583,
-          "special": false,
-          "text": "oster"
-        },
-        {
-          "id": 15028,
-          "logprob": -1.1708984,
+          "logprob": -1.1660156,
          "special": false,
          "text": " stands"
        }
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@ -641,8 +641,11 @@ class CausalLM(Model):
            if i % self.world_size == self.rank:
                if stop:
                    # Decode generated tokens
-                    output_text = self.decode(
-                        all_input_ids[-stopping_criteria.current_tokens :, 0]
+                    output_text, _, _ = self.decode_token(
+                        all_input_ids[:, 0],
+                        prefix_offset=len(all_input_ids) - stopping_criteria.current_tokens - 1,
+                        read_offset=len(all_input_ids) - stopping_criteria.current_tokens,
+                        skip_special_tokens=True
                    )
                    # Get seed
                    if isinstance(next_token_chooser.choice, Sampling):
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -793,11 +793,6 @@ class FlashCausalLM(Model):

        return int(num_blocks * BLOCK_SIZE)

-    def decode(self, generated_ids: Union[torch.Tensor, List[int]]) -> str:
-        return self.tokenizer.decode(
-            generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )
-
    def forward(
        self,
        input_ids: torch.Tensor,
@ -1008,8 +1003,11 @@ class FlashCausalLM(Model):
            if i % self.world_size == self.rank:
                if stop:
                    # Decode generated tokens
-                    output_text = self.decode(
-                        all_input_ids[-stopping_criteria.current_tokens :]
+                    output_text, _, _ = self.decode_token(
+                        all_input_ids,
+                        prefix_offset=len(all_input_ids) - stopping_criteria.current_tokens - 1,
+                        read_offset=len(all_input_ids) - stopping_criteria.current_tokens,
+                        skip_special_tokens=True
                    )
                    generated_text = GeneratedText(
                        output_text,
--- a/server/text_generation_server/models/idefics_causal_lm.py
+++ b/server/text_generation_server/models/idefics_causal_lm.py
@ -611,11 +611,6 @@ class IdeficsCausalLM(Model):
    def batch_type(self) -> Type[IdeficsCausalLMBatch]:
        return IdeficsCausalLMBatch

-    def decode(self, generated_ids: List[int]) -> str:
-        return self.tokenizer.decode(
-            generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )
-
    def forward(
        self,
        input_ids,
@ -728,8 +723,11 @@ class IdeficsCausalLM(Model):
            if i % self.world_size == self.rank:
                if stop:
                    # Decode generated tokens
-                    output_text = self.decode(
-                        all_input_ids[-stopping_criteria.current_tokens :, 0]
+                    output_text, _, _ = self.decode_token(
+                        all_input_ids[:, 0],
+                        prefix_offset=len(all_input_ids) - stopping_criteria.current_tokens - 1,
+                        read_offset=len(all_input_ids) - stopping_criteria.current_tokens,
+                        skip_special_tokens=True
                    )
                    # Get seed
                    if isinstance(next_token_chooser.choice, Sampling):
--- a/server/text_generation_server/models/model.py
+++ b/server/text_generation_server/models/model.py
@ -64,16 +64,17 @@ class Model(ABC):
        all_input_ids: List[int],
        prefix_offset: int = 0,
        read_offset: int = 0,
+        skip_special_tokens: bool = False,
    ) -> Tuple[str, int, int]:
        """Hack to hopefully support generate_stream for the maximum number of tokenizers"""

        # The prefix text is necessary only to defeat cleanup algorithms in the decode
        # which decide to add a space or not depending on the surrounding ids.
        prefix_text = self.tokenizer.decode(
-            all_input_ids[prefix_offset:read_offset], skip_special_tokens=False
+            all_input_ids[prefix_offset:read_offset], skip_special_tokens=skip_special_tokens
        )
        new_text = self.tokenizer.decode(
-            all_input_ids[prefix_offset:], skip_special_tokens=False
+            all_input_ids[prefix_offset:], skip_special_tokens=skip_special_tokens
        )

        if len(new_text) > len(prefix_text) and not new_text.endswith("<EFBFBD>"):
--- a/server/text_generation_server/models/seq2seq_lm.py
+++ b/server/text_generation_server/models/seq2seq_lm.py
@ -710,8 +710,11 @@ class Seq2SeqLM(Model):
                if stop:
                    # Slice with decoder_input_length to remove padding
                    # Decode all tokens
-                    output_text = self.decode(
-                        all_decoder_input_ids[-decoder_input_length:]
+                    output_text, _, _ = self.decode_token(
+                        all_decoder_input_ids,
+                        prefix_offset=len(all_decoder_input_ids) - decoder_input_length - 1,
+                        read_offset=len(all_decoder_input_ids) - decoder_input_length,
+                        skip_special_tokens=True
                    )

                    # Get seed