Remove the stripping of the prefix space (and any other mangling that tokenizers might do). (#1065)
Superseed #1024 # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution. Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change. Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost. --> <!-- Remove if not applicable --> Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. <!-- Your PR will be replied to more quickly if you can figure out the right person to tag with @ @OlivierDehaene OR @Narsil --> --------- Co-authored-by: bangoz <ch_xie@pku.edu.cn>
This commit is contained in:
parent
95a4bb696a
commit
b32e9ce9d5
|
@ -16,7 +16,7 @@
|
|||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.5546875,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
|
@ -24,65 +24,66 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 363,
|
||||
"logprob": -1.5380859,
|
||||
"logprob": -1.5351562,
|
||||
"special": false,
|
||||
"text": " for"
|
||||
},
|
||||
{
|
||||
"id": 847,
|
||||
"logprob": -2.5917969,
|
||||
"logprob": -2.5722656,
|
||||
"special": false,
|
||||
"text": " /"
|
||||
},
|
||||
{
|
||||
"id": 2754,
|
||||
"logprob": -2.2773438,
|
||||
"logprob": -2.2714844,
|
||||
"special": false,
|
||||
"text": "api"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.034362793,
|
||||
"logprob": -0.03414917,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29894,
|
||||
"logprob": -0.96533203,
|
||||
"logprob": -0.95996094,
|
||||
"special": false,
|
||||
"text": "v"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -0.36669922,
|
||||
"logprob": -0.3635254,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.013122559,
|
||||
"logprob": -0.013031006,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 16418,
|
||||
"logprob": -3.1503906,
|
||||
"logprob": -3.1523438,
|
||||
"special": false,
|
||||
"text": "projects"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.43652344,
|
||||
"logprob": -0.43701172,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -1.9404297,
|
||||
"logprob": -1.9394531,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
}
|
||||
]
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " for /api/v1/projects/1"
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.5546875,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
|
@ -24,19 +24,19 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 5229,
|
||||
"logprob": -2.5683594,
|
||||
"logprob": -2.5839844,
|
||||
"special": false,
|
||||
"text": " failed"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.45336914,
|
||||
"logprob": -0.44970703,
|
||||
"special": false,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 4829,
|
||||
"logprob": -1.8408203,
|
||||
"logprob": -1.8339844,
|
||||
"special": false,
|
||||
"text": " Error"
|
||||
},
|
||||
|
@ -52,7 +52,8 @@
|
|||
"special": false,
|
||||
"text": " test"
|
||||
}
|
||||
]
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "Test request failed: Error in test"
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.5546875,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
|
@ -25,25 +25,25 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 363,
|
||||
"logprob": -1.5380859,
|
||||
"logprob": -1.5351562,
|
||||
"special": false,
|
||||
"text": " for"
|
||||
},
|
||||
{
|
||||
"id": 847,
|
||||
"logprob": -2.5859375,
|
||||
"logprob": -2.5566406,
|
||||
"special": false,
|
||||
"text": " /"
|
||||
},
|
||||
{
|
||||
"id": 2754,
|
||||
"logprob": -2.2695312,
|
||||
"logprob": -2.2519531,
|
||||
"special": false,
|
||||
"text": "api"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.03439331,
|
||||
"logprob": -0.03414917,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
|
@ -55,13 +55,13 @@
|
|||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -0.36694336,
|
||||
"logprob": -0.3647461,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.013114929,
|
||||
"logprob": -0.012901306,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
|
@ -73,17 +73,18 @@
|
|||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.43847656,
|
||||
"logprob": -0.4362793,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -1.9433594,
|
||||
"logprob": -1.9394531,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
}
|
||||
]
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " for /api/v1/projects/1"
|
||||
},
|
||||
|
@ -105,7 +106,7 @@
|
|||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.5546875,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
|
@ -113,43 +114,43 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 363,
|
||||
"logprob": -1.5322266,
|
||||
"logprob": -1.5332031,
|
||||
"special": false,
|
||||
"text": " for"
|
||||
},
|
||||
{
|
||||
"id": 847,
|
||||
"logprob": -2.5585938,
|
||||
"logprob": -2.5625,
|
||||
"special": false,
|
||||
"text": " /"
|
||||
},
|
||||
{
|
||||
"id": 2754,
|
||||
"logprob": -2.265625,
|
||||
"logprob": -2.2617188,
|
||||
"special": false,
|
||||
"text": "api"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.034088135,
|
||||
"logprob": -0.033996582,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29894,
|
||||
"logprob": -0.96240234,
|
||||
"logprob": -0.9609375,
|
||||
"special": false,
|
||||
"text": "v"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -0.36816406,
|
||||
"logprob": -0.36572266,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.013191223,
|
||||
"logprob": -0.0129776,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
|
@ -161,17 +162,18 @@
|
|||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.43774414,
|
||||
"logprob": -0.4362793,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -1.9443359,
|
||||
"logprob": -1.9394531,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
}
|
||||
]
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " for /api/v1/projects/1"
|
||||
},
|
||||
|
@ -193,7 +195,7 @@
|
|||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.5546875,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
|
@ -201,43 +203,43 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 363,
|
||||
"logprob": -1.5322266,
|
||||
"logprob": -1.5332031,
|
||||
"special": false,
|
||||
"text": " for"
|
||||
},
|
||||
{
|
||||
"id": 847,
|
||||
"logprob": -2.5585938,
|
||||
"logprob": -2.5625,
|
||||
"special": false,
|
||||
"text": " /"
|
||||
},
|
||||
{
|
||||
"id": 2754,
|
||||
"logprob": -2.265625,
|
||||
"logprob": -2.2617188,
|
||||
"special": false,
|
||||
"text": "api"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.034088135,
|
||||
"logprob": -0.033996582,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29894,
|
||||
"logprob": -0.96240234,
|
||||
"logprob": -0.9609375,
|
||||
"special": false,
|
||||
"text": "v"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -0.36816406,
|
||||
"logprob": -0.36572266,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.013191223,
|
||||
"logprob": -0.0129776,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
|
@ -249,17 +251,18 @@
|
|||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.43774414,
|
||||
"logprob": -0.4362793,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -1.9443359,
|
||||
"logprob": -1.9394531,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
}
|
||||
]
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " for /api/v1/projects/1"
|
||||
},
|
||||
|
@ -281,7 +284,7 @@
|
|||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.5546875,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
|
@ -289,43 +292,43 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 363,
|
||||
"logprob": -1.5322266,
|
||||
"logprob": -1.5332031,
|
||||
"special": false,
|
||||
"text": " for"
|
||||
},
|
||||
{
|
||||
"id": 847,
|
||||
"logprob": -2.5585938,
|
||||
"logprob": -2.5625,
|
||||
"special": false,
|
||||
"text": " /"
|
||||
},
|
||||
{
|
||||
"id": 2754,
|
||||
"logprob": -2.265625,
|
||||
"logprob": -2.2617188,
|
||||
"special": false,
|
||||
"text": "api"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.034088135,
|
||||
"logprob": -0.033996582,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29894,
|
||||
"logprob": -0.96240234,
|
||||
"logprob": -0.9609375,
|
||||
"special": false,
|
||||
"text": "v"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -0.36816406,
|
||||
"logprob": -0.36572266,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.013191223,
|
||||
"logprob": -0.0129776,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
|
@ -337,17 +340,18 @@
|
|||
},
|
||||
{
|
||||
"id": 29914,
|
||||
"logprob": -0.43774414,
|
||||
"logprob": -0.4362793,
|
||||
"special": false,
|
||||
"text": "/"
|
||||
},
|
||||
{
|
||||
"id": 29896,
|
||||
"logprob": -1.9443359,
|
||||
"logprob": -1.9394531,
|
||||
"special": false,
|
||||
"text": "1"
|
||||
}
|
||||
]
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " for /api/v1/projects/1"
|
||||
}
|
||||
|
|
|
@ -11,22 +11,22 @@
|
|||
},
|
||||
{
|
||||
"id": 4911,
|
||||
"logprob": -5.7773438,
|
||||
"logprob": -5.7851562,
|
||||
"text": "User"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.0069999695,
|
||||
"logprob": -0.006996155,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 32000,
|
||||
"logprob": -0.8125,
|
||||
"logprob": -0.81347656,
|
||||
"text": "<fake_token_around_image>"
|
||||
},
|
||||
{
|
||||
"id": 32001,
|
||||
"logprob": -6.651878e-05,
|
||||
"logprob": -6.687641e-05,
|
||||
"text": "<image>"
|
||||
},
|
||||
{
|
||||
|
@ -36,67 +36,67 @@
|
|||
},
|
||||
{
|
||||
"id": 1815,
|
||||
"logprob": -4.2265625,
|
||||
"logprob": -4.2148438,
|
||||
"text": "Can"
|
||||
},
|
||||
{
|
||||
"id": 366,
|
||||
"logprob": -0.013977051,
|
||||
"logprob": -0.014137268,
|
||||
"text": "you"
|
||||
},
|
||||
{
|
||||
"id": 2649,
|
||||
"logprob": -4.4375,
|
||||
"logprob": -4.4335938,
|
||||
"text": "tell"
|
||||
},
|
||||
{
|
||||
"id": 592,
|
||||
"logprob": -0.29077148,
|
||||
"logprob": -0.2919922,
|
||||
"text": "me"
|
||||
},
|
||||
{
|
||||
"id": 263,
|
||||
"logprob": -4.2109375,
|
||||
"logprob": -4.2070312,
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": 1407,
|
||||
"logprob": -9.4296875,
|
||||
"logprob": -9.421875,
|
||||
"text": "very"
|
||||
},
|
||||
{
|
||||
"id": 3273,
|
||||
"logprob": -1.8671875,
|
||||
"logprob": -1.8720703,
|
||||
"text": "short"
|
||||
},
|
||||
{
|
||||
"id": 5828,
|
||||
"logprob": -0.26586914,
|
||||
"logprob": -0.26489258,
|
||||
"text": "story"
|
||||
},
|
||||
{
|
||||
"id": 2729,
|
||||
"logprob": -3.7460938,
|
||||
"logprob": -3.7441406,
|
||||
"text": "based"
|
||||
},
|
||||
{
|
||||
"id": 373,
|
||||
"logprob": -0.0005350113,
|
||||
"logprob": -0.0005393028,
|
||||
"text": "on"
|
||||
},
|
||||
{
|
||||
"id": 278,
|
||||
"logprob": -0.13867188,
|
||||
"logprob": -0.140625,
|
||||
"text": "the"
|
||||
},
|
||||
{
|
||||
"id": 1967,
|
||||
"logprob": -0.06842041,
|
||||
"logprob": -0.06756592,
|
||||
"text": "image"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.15319824,
|
||||
"logprob": -0.15454102,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
|
@ -104,7 +104,7 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 32002,
|
||||
"logprob": -0.0019445419,
|
||||
"logprob": -0.0019140244,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
},
|
||||
|
@ -116,13 +116,13 @@
|
|||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7881393e-05,
|
||||
"logprob": -1.7642975e-05,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 7900,
|
||||
"logprob": -3.0994415e-06,
|
||||
"logprob": -2.9802322e-06,
|
||||
"special": false,
|
||||
"text": "Ass"
|
||||
},
|
||||
|
@ -140,25 +140,25 @@
|
|||
},
|
||||
{
|
||||
"id": 319,
|
||||
"logprob": -0.9057617,
|
||||
"logprob": -0.91064453,
|
||||
"special": false,
|
||||
"text": " A"
|
||||
},
|
||||
{
|
||||
"id": 696,
|
||||
"logprob": -1.2314453,
|
||||
"logprob": -1.2412109,
|
||||
"special": false,
|
||||
"text": " ro"
|
||||
},
|
||||
{
|
||||
"id": 15664,
|
||||
"logprob": -0.00024914742,
|
||||
"logprob": -0.0002439022,
|
||||
"special": false,
|
||||
"text": "oster"
|
||||
},
|
||||
{
|
||||
"id": 15028,
|
||||
"logprob": -1.1621094,
|
||||
"logprob": -1.1630859,
|
||||
"special": false,
|
||||
"text": " stands"
|
||||
}
|
||||
|
|
|
@ -1,4 +1,173 @@
|
|||
[
|
||||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4911,
|
||||
"logprob": -5.7851562,
|
||||
"text": "User"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.006996155,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 32000,
|
||||
"logprob": -0.81347656,
|
||||
"text": "<fake_token_around_image>"
|
||||
},
|
||||
{
|
||||
"id": 32001,
|
||||
"logprob": -6.687641e-05,
|
||||
"text": "<image>"
|
||||
},
|
||||
{
|
||||
"id": 32000,
|
||||
"logprob": -3.5762787e-07,
|
||||
"text": "<fake_token_around_image>"
|
||||
},
|
||||
{
|
||||
"id": 1815,
|
||||
"logprob": -4.2148438,
|
||||
"text": "Can"
|
||||
},
|
||||
{
|
||||
"id": 366,
|
||||
"logprob": -0.014137268,
|
||||
"text": "you"
|
||||
},
|
||||
{
|
||||
"id": 2649,
|
||||
"logprob": -4.4335938,
|
||||
"text": "tell"
|
||||
},
|
||||
{
|
||||
"id": 592,
|
||||
"logprob": -0.2919922,
|
||||
"text": "me"
|
||||
},
|
||||
{
|
||||
"id": 263,
|
||||
"logprob": -4.2070312,
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": 1407,
|
||||
"logprob": -9.421875,
|
||||
"text": "very"
|
||||
},
|
||||
{
|
||||
"id": 3273,
|
||||
"logprob": -1.8720703,
|
||||
"text": "short"
|
||||
},
|
||||
{
|
||||
"id": 5828,
|
||||
"logprob": -0.26489258,
|
||||
"text": "story"
|
||||
},
|
||||
{
|
||||
"id": 2729,
|
||||
"logprob": -3.7441406,
|
||||
"text": "based"
|
||||
},
|
||||
{
|
||||
"id": 373,
|
||||
"logprob": -0.0005393028,
|
||||
"text": "on"
|
||||
},
|
||||
{
|
||||
"id": 278,
|
||||
"logprob": -0.140625,
|
||||
"text": "the"
|
||||
},
|
||||
{
|
||||
"id": 1967,
|
||||
"logprob": -0.06756592,
|
||||
"text": "image"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.15454102,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 32002,
|
||||
"logprob": -0.0019140244,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
},
|
||||
{
|
||||
"id": 29871,
|
||||
"logprob": -8.392334e-05,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7881393e-05,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 7900,
|
||||
"logprob": -2.9802322e-06,
|
||||
"special": false,
|
||||
"text": "Ass"
|
||||
},
|
||||
{
|
||||
"id": 22137,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": "istant"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -3.0994415e-06,
|
||||
"special": false,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 319,
|
||||
"logprob": -0.9057617,
|
||||
"special": false,
|
||||
"text": " A"
|
||||
},
|
||||
{
|
||||
"id": 696,
|
||||
"logprob": -1.2294922,
|
||||
"special": false,
|
||||
"text": " ro"
|
||||
},
|
||||
{
|
||||
"id": 15664,
|
||||
"logprob": -0.00024533272,
|
||||
"special": false,
|
||||
"text": "oster"
|
||||
},
|
||||
{
|
||||
"id": 15028,
|
||||
"logprob": -1.1640625,
|
||||
"special": false,
|
||||
"text": " stands"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " \nAssistant: A rooster stands"
|
||||
},
|
||||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
|
@ -17,17 +186,17 @@
|
|||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.0069999695,
|
||||
"logprob": -0.0070114136,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 32000,
|
||||
"logprob": -0.8125,
|
||||
"logprob": -0.8208008,
|
||||
"text": "<fake_token_around_image>"
|
||||
},
|
||||
{
|
||||
"id": 32001,
|
||||
"logprob": -6.651878e-05,
|
||||
"logprob": -6.699562e-05,
|
||||
"text": "<image>"
|
||||
},
|
||||
{
|
||||
|
@ -42,17 +211,17 @@
|
|||
},
|
||||
{
|
||||
"id": 366,
|
||||
"logprob": -0.013977051,
|
||||
"logprob": -0.014175415,
|
||||
"text": "you"
|
||||
},
|
||||
{
|
||||
"id": 2649,
|
||||
"logprob": -4.4375,
|
||||
"logprob": -4.4296875,
|
||||
"text": "tell"
|
||||
},
|
||||
{
|
||||
"id": 592,
|
||||
"logprob": -0.29077148,
|
||||
"logprob": -0.29516602,
|
||||
"text": "me"
|
||||
},
|
||||
{
|
||||
|
@ -67,37 +236,37 @@
|
|||
},
|
||||
{
|
||||
"id": 3273,
|
||||
"logprob": -1.8671875,
|
||||
"logprob": -1.8720703,
|
||||
"text": "short"
|
||||
},
|
||||
{
|
||||
"id": 5828,
|
||||
"logprob": -0.26586914,
|
||||
"logprob": -0.26879883,
|
||||
"text": "story"
|
||||
},
|
||||
{
|
||||
"id": 2729,
|
||||
"logprob": -3.7460938,
|
||||
"logprob": -3.7675781,
|
||||
"text": "based"
|
||||
},
|
||||
{
|
||||
"id": 373,
|
||||
"logprob": -0.0005350113,
|
||||
"logprob": -0.0005354881,
|
||||
"text": "on"
|
||||
},
|
||||
{
|
||||
"id": 278,
|
||||
"logprob": -0.13867188,
|
||||
"logprob": -0.13671875,
|
||||
"text": "the"
|
||||
},
|
||||
{
|
||||
"id": 1967,
|
||||
"logprob": -0.06842041,
|
||||
"logprob": -0.06719971,
|
||||
"text": "image"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.15319824,
|
||||
"logprob": -0.15551758,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
|
@ -105,13 +274,13 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 32002,
|
||||
"logprob": -0.0019445419,
|
||||
"logprob": -0.0019130707,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
},
|
||||
{
|
||||
"id": 29871,
|
||||
"logprob": -8.416176e-05,
|
||||
"logprob": -8.392334e-05,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
|
@ -135,25 +304,25 @@
|
|||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -3.2186508e-06,
|
||||
"logprob": -3.0994415e-06,
|
||||
"special": false,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 319,
|
||||
"logprob": -0.89941406,
|
||||
"logprob": -0.9013672,
|
||||
"special": false,
|
||||
"text": " A"
|
||||
},
|
||||
{
|
||||
"id": 696,
|
||||
"logprob": -1.234375,
|
||||
"logprob": -1.2324219,
|
||||
"special": false,
|
||||
"text": " ro"
|
||||
},
|
||||
{
|
||||
"id": 15664,
|
||||
"logprob": -0.0002465248,
|
||||
"logprob": -0.0002477169,
|
||||
"special": false,
|
||||
"text": "oster"
|
||||
},
|
||||
|
@ -181,22 +350,22 @@
|
|||
},
|
||||
{
|
||||
"id": 4911,
|
||||
"logprob": -5.7890625,
|
||||
"logprob": -5.7773438,
|
||||
"text": "User"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.0070152283,
|
||||
"logprob": -0.0070114136,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 32000,
|
||||
"logprob": -0.8125,
|
||||
"logprob": -0.8208008,
|
||||
"text": "<fake_token_around_image>"
|
||||
},
|
||||
{
|
||||
"id": 32001,
|
||||
"logprob": -6.651878e-05,
|
||||
"logprob": -6.699562e-05,
|
||||
"text": "<image>"
|
||||
},
|
||||
{
|
||||
|
@ -211,17 +380,17 @@
|
|||
},
|
||||
{
|
||||
"id": 366,
|
||||
"logprob": -0.014190674,
|
||||
"logprob": -0.014175415,
|
||||
"text": "you"
|
||||
},
|
||||
{
|
||||
"id": 2649,
|
||||
"logprob": -4.4140625,
|
||||
"logprob": -4.4296875,
|
||||
"text": "tell"
|
||||
},
|
||||
{
|
||||
"id": 592,
|
||||
"logprob": -0.2919922,
|
||||
"logprob": -0.29516602,
|
||||
"text": "me"
|
||||
},
|
||||
{
|
||||
|
@ -231,7 +400,7 @@
|
|||
},
|
||||
{
|
||||
"id": 1407,
|
||||
"logprob": -9.4375,
|
||||
"logprob": -9.4296875,
|
||||
"text": "very"
|
||||
},
|
||||
{
|
||||
|
@ -241,7 +410,7 @@
|
|||
},
|
||||
{
|
||||
"id": 5828,
|
||||
"logprob": -0.26904297,
|
||||
"logprob": -0.26879883,
|
||||
"text": "story"
|
||||
},
|
||||
{
|
||||
|
@ -251,22 +420,22 @@
|
|||
},
|
||||
{
|
||||
"id": 373,
|
||||
"logprob": -0.0005402565,
|
||||
"logprob": -0.0005354881,
|
||||
"text": "on"
|
||||
},
|
||||
{
|
||||
"id": 278,
|
||||
"logprob": -0.13867188,
|
||||
"logprob": -0.13671875,
|
||||
"text": "the"
|
||||
},
|
||||
{
|
||||
"id": 1967,
|
||||
"logprob": -0.068359375,
|
||||
"logprob": -0.06719971,
|
||||
"text": "image"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.15539551,
|
||||
"logprob": -0.15551758,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
|
@ -274,7 +443,7 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 32002,
|
||||
"logprob": -0.0019168854,
|
||||
"logprob": -0.001912117,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
},
|
||||
|
@ -286,7 +455,7 @@
|
|||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7642975e-05,
|
||||
"logprob": -1.7762184e-05,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
|
@ -310,25 +479,25 @@
|
|||
},
|
||||
{
|
||||
"id": 319,
|
||||
"logprob": -0.90722656,
|
||||
"logprob": -0.9013672,
|
||||
"special": false,
|
||||
"text": " A"
|
||||
},
|
||||
{
|
||||
"id": 696,
|
||||
"logprob": -1.2373047,
|
||||
"logprob": -1.2324219,
|
||||
"special": false,
|
||||
"text": " ro"
|
||||
},
|
||||
{
|
||||
"id": 15664,
|
||||
"logprob": -0.00024938583,
|
||||
"logprob": -0.0002477169,
|
||||
"special": false,
|
||||
"text": "oster"
|
||||
},
|
||||
{
|
||||
"id": 15028,
|
||||
"logprob": -1.1708984,
|
||||
"logprob": -1.1660156,
|
||||
"special": false,
|
||||
"text": " stands"
|
||||
}
|
||||
|
@ -350,22 +519,22 @@
|
|||
},
|
||||
{
|
||||
"id": 4911,
|
||||
"logprob": -5.7890625,
|
||||
"logprob": -5.7773438,
|
||||
"text": "User"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.0070152283,
|
||||
"logprob": -0.0070114136,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 32000,
|
||||
"logprob": -0.8125,
|
||||
"logprob": -0.8208008,
|
||||
"text": "<fake_token_around_image>"
|
||||
},
|
||||
{
|
||||
"id": 32001,
|
||||
"logprob": -6.663799e-05,
|
||||
"logprob": -6.699562e-05,
|
||||
"text": "<image>"
|
||||
},
|
||||
{
|
||||
|
@ -380,17 +549,17 @@
|
|||
},
|
||||
{
|
||||
"id": 366,
|
||||
"logprob": -0.014190674,
|
||||
"logprob": -0.014175415,
|
||||
"text": "you"
|
||||
},
|
||||
{
|
||||
"id": 2649,
|
||||
"logprob": -4.4140625,
|
||||
"logprob": -4.4296875,
|
||||
"text": "tell"
|
||||
},
|
||||
{
|
||||
"id": 592,
|
||||
"logprob": -0.2919922,
|
||||
"logprob": -0.29516602,
|
||||
"text": "me"
|
||||
},
|
||||
{
|
||||
|
@ -400,7 +569,7 @@
|
|||
},
|
||||
{
|
||||
"id": 1407,
|
||||
"logprob": -9.4375,
|
||||
"logprob": -9.4296875,
|
||||
"text": "very"
|
||||
},
|
||||
{
|
||||
|
@ -410,7 +579,7 @@
|
|||
},
|
||||
{
|
||||
"id": 5828,
|
||||
"logprob": -0.26904297,
|
||||
"logprob": -0.26879883,
|
||||
"text": "story"
|
||||
},
|
||||
{
|
||||
|
@ -420,22 +589,22 @@
|
|||
},
|
||||
{
|
||||
"id": 373,
|
||||
"logprob": -0.0005402565,
|
||||
"logprob": -0.0005354881,
|
||||
"text": "on"
|
||||
},
|
||||
{
|
||||
"id": 278,
|
||||
"logprob": -0.13867188,
|
||||
"logprob": -0.13671875,
|
||||
"text": "the"
|
||||
},
|
||||
{
|
||||
"id": 1967,
|
||||
"logprob": -0.068359375,
|
||||
"logprob": -0.06719971,
|
||||
"text": "image"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.15539551,
|
||||
"logprob": -0.15551758,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
|
@ -443,19 +612,19 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 32002,
|
||||
"logprob": -0.0019168854,
|
||||
"logprob": -0.001912117,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
},
|
||||
{
|
||||
"id": 29871,
|
||||
"logprob": -8.404255e-05,
|
||||
"logprob": -8.392334e-05,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7642975e-05,
|
||||
"logprob": -1.7762184e-05,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
|
@ -479,194 +648,25 @@
|
|||
},
|
||||
{
|
||||
"id": 319,
|
||||
"logprob": -0.90722656,
|
||||
"logprob": -0.9013672,
|
||||
"special": false,
|
||||
"text": " A"
|
||||
},
|
||||
{
|
||||
"id": 696,
|
||||
"logprob": -1.2373047,
|
||||
"logprob": -1.2324219,
|
||||
"special": false,
|
||||
"text": " ro"
|
||||
},
|
||||
{
|
||||
"id": 15664,
|
||||
"logprob": -0.00024938583,
|
||||
"logprob": -0.0002477169,
|
||||
"special": false,
|
||||
"text": "oster"
|
||||
},
|
||||
{
|
||||
"id": 15028,
|
||||
"logprob": -1.1708984,
|
||||
"special": false,
|
||||
"text": " stands"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "\nAssistant: A rooster stands"
|
||||
},
|
||||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4911,
|
||||
"logprob": -5.7890625,
|
||||
"text": "User"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.0070152283,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 32000,
|
||||
"logprob": -0.8125,
|
||||
"text": "<fake_token_around_image>"
|
||||
},
|
||||
{
|
||||
"id": 32001,
|
||||
"logprob": -6.663799e-05,
|
||||
"text": "<image>"
|
||||
},
|
||||
{
|
||||
"id": 32000,
|
||||
"logprob": -3.5762787e-07,
|
||||
"text": "<fake_token_around_image>"
|
||||
},
|
||||
{
|
||||
"id": 1815,
|
||||
"logprob": -4.2265625,
|
||||
"text": "Can"
|
||||
},
|
||||
{
|
||||
"id": 366,
|
||||
"logprob": -0.014190674,
|
||||
"text": "you"
|
||||
},
|
||||
{
|
||||
"id": 2649,
|
||||
"logprob": -4.4140625,
|
||||
"text": "tell"
|
||||
},
|
||||
{
|
||||
"id": 592,
|
||||
"logprob": -0.2919922,
|
||||
"text": "me"
|
||||
},
|
||||
{
|
||||
"id": 263,
|
||||
"logprob": -4.2109375,
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": 1407,
|
||||
"logprob": -9.4375,
|
||||
"text": "very"
|
||||
},
|
||||
{
|
||||
"id": 3273,
|
||||
"logprob": -1.8720703,
|
||||
"text": "short"
|
||||
},
|
||||
{
|
||||
"id": 5828,
|
||||
"logprob": -0.26904297,
|
||||
"text": "story"
|
||||
},
|
||||
{
|
||||
"id": 2729,
|
||||
"logprob": -3.7675781,
|
||||
"text": "based"
|
||||
},
|
||||
{
|
||||
"id": 373,
|
||||
"logprob": -0.0005402565,
|
||||
"text": "on"
|
||||
},
|
||||
{
|
||||
"id": 278,
|
||||
"logprob": -0.13867188,
|
||||
"text": "the"
|
||||
},
|
||||
{
|
||||
"id": 1967,
|
||||
"logprob": -0.068359375,
|
||||
"text": "image"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.15539551,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 32002,
|
||||
"logprob": -0.0019159317,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
},
|
||||
{
|
||||
"id": 29871,
|
||||
"logprob": -8.404255e-05,
|
||||
"special": false,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7642975e-05,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 7900,
|
||||
"logprob": -3.0994415e-06,
|
||||
"special": false,
|
||||
"text": "Ass"
|
||||
},
|
||||
{
|
||||
"id": 22137,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": "istant"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -3.0994415e-06,
|
||||
"special": false,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 319,
|
||||
"logprob": -0.90722656,
|
||||
"special": false,
|
||||
"text": " A"
|
||||
},
|
||||
{
|
||||
"id": 696,
|
||||
"logprob": -1.2373047,
|
||||
"special": false,
|
||||
"text": " ro"
|
||||
},
|
||||
{
|
||||
"id": 15664,
|
||||
"logprob": -0.00024938583,
|
||||
"special": false,
|
||||
"text": "oster"
|
||||
},
|
||||
{
|
||||
"id": 15028,
|
||||
"logprob": -1.1708984,
|
||||
"logprob": -1.1660156,
|
||||
"special": false,
|
||||
"text": " stands"
|
||||
}
|
||||
|
|
|
@ -641,8 +641,11 @@ class CausalLM(Model):
|
|||
if i % self.world_size == self.rank:
|
||||
if stop:
|
||||
# Decode generated tokens
|
||||
output_text = self.decode(
|
||||
all_input_ids[-stopping_criteria.current_tokens :, 0]
|
||||
output_text, _, _ = self.decode_token(
|
||||
all_input_ids[:, 0],
|
||||
prefix_offset=len(all_input_ids) - stopping_criteria.current_tokens - 1,
|
||||
read_offset=len(all_input_ids) - stopping_criteria.current_tokens,
|
||||
skip_special_tokens=True
|
||||
)
|
||||
# Get seed
|
||||
if isinstance(next_token_chooser.choice, Sampling):
|
||||
|
|
|
@ -793,11 +793,6 @@ class FlashCausalLM(Model):
|
|||
|
||||
return int(num_blocks * BLOCK_SIZE)
|
||||
|
||||
def decode(self, generated_ids: Union[torch.Tensor, List[int]]) -> str:
|
||||
return self.tokenizer.decode(
|
||||
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
||||
)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
input_ids: torch.Tensor,
|
||||
|
@ -1008,8 +1003,11 @@ class FlashCausalLM(Model):
|
|||
if i % self.world_size == self.rank:
|
||||
if stop:
|
||||
# Decode generated tokens
|
||||
output_text = self.decode(
|
||||
all_input_ids[-stopping_criteria.current_tokens :]
|
||||
output_text, _, _ = self.decode_token(
|
||||
all_input_ids,
|
||||
prefix_offset=len(all_input_ids) - stopping_criteria.current_tokens - 1,
|
||||
read_offset=len(all_input_ids) - stopping_criteria.current_tokens,
|
||||
skip_special_tokens=True
|
||||
)
|
||||
generated_text = GeneratedText(
|
||||
output_text,
|
||||
|
|
|
@ -611,11 +611,6 @@ class IdeficsCausalLM(Model):
|
|||
def batch_type(self) -> Type[IdeficsCausalLMBatch]:
|
||||
return IdeficsCausalLMBatch
|
||||
|
||||
def decode(self, generated_ids: List[int]) -> str:
|
||||
return self.tokenizer.decode(
|
||||
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
||||
)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
input_ids,
|
||||
|
@ -728,8 +723,11 @@ class IdeficsCausalLM(Model):
|
|||
if i % self.world_size == self.rank:
|
||||
if stop:
|
||||
# Decode generated tokens
|
||||
output_text = self.decode(
|
||||
all_input_ids[-stopping_criteria.current_tokens :, 0]
|
||||
output_text, _, _ = self.decode_token(
|
||||
all_input_ids[:, 0],
|
||||
prefix_offset=len(all_input_ids) - stopping_criteria.current_tokens - 1,
|
||||
read_offset=len(all_input_ids) - stopping_criteria.current_tokens,
|
||||
skip_special_tokens=True
|
||||
)
|
||||
# Get seed
|
||||
if isinstance(next_token_chooser.choice, Sampling):
|
||||
|
|
|
@ -64,16 +64,17 @@ class Model(ABC):
|
|||
all_input_ids: List[int],
|
||||
prefix_offset: int = 0,
|
||||
read_offset: int = 0,
|
||||
skip_special_tokens: bool = False,
|
||||
) -> Tuple[str, int, int]:
|
||||
"""Hack to hopefully support generate_stream for the maximum number of tokenizers"""
|
||||
|
||||
# The prefix text is necessary only to defeat cleanup algorithms in the decode
|
||||
# which decide to add a space or not depending on the surrounding ids.
|
||||
prefix_text = self.tokenizer.decode(
|
||||
all_input_ids[prefix_offset:read_offset], skip_special_tokens=False
|
||||
all_input_ids[prefix_offset:read_offset], skip_special_tokens=skip_special_tokens
|
||||
)
|
||||
new_text = self.tokenizer.decode(
|
||||
all_input_ids[prefix_offset:], skip_special_tokens=False
|
||||
all_input_ids[prefix_offset:], skip_special_tokens=skip_special_tokens
|
||||
)
|
||||
|
||||
if len(new_text) > len(prefix_text) and not new_text.endswith("<EFBFBD>"):
|
||||
|
|
|
@ -710,8 +710,11 @@ class Seq2SeqLM(Model):
|
|||
if stop:
|
||||
# Slice with decoder_input_length to remove padding
|
||||
# Decode all tokens
|
||||
output_text = self.decode(
|
||||
all_decoder_input_ids[-decoder_input_length:]
|
||||
output_text, _, _ = self.decode_token(
|
||||
all_decoder_input_ids,
|
||||
prefix_offset=len(all_decoder_input_ids) - decoder_input_length - 1,
|
||||
read_offset=len(all_decoder_input_ids) - decoder_input_length,
|
||||
skip_special_tokens=True
|
||||
)
|
||||
|
||||
# Get seed
|
||||
|
|
Loading…
Reference in New Issue