Fix Phi 3.5 MoE tests (#2684)

PR #2682 also fixed in issue in Phi MoE, but it changes the test
outputs a bit. Fix this.
This commit is contained in:
Daniël de Kok 2024-10-24 15:21:50 +02:00 committed by GitHub
parent 1b914f37e7
commit 14a0df3a38
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 214 additions and 214 deletions

View File

@ -11,32 +11,32 @@
},
{
"id": 338,
"logprob": -0.7133789,
"logprob": -0.6201172,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9296875,
"logprob": -13.6484375,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.048919678,
"logprob": -0.003894806,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0078125,
"logprob": -2.6386719,
"text": "?"
},
{
"id": 13,
"logprob": -2.8105469,
"logprob": -6.46875,
"text": "\n"
},
{
"id": 13,
"logprob": -0.84521484,
"logprob": -6.6875,
"text": "\n"
}
],
@ -44,66 +44,66 @@
"tokens": [
{
"id": 25584,
"logprob": -0.017028809,
"logprob": -0.008979797,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.0027313232,
"logprob": -8.34465e-07,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023254395,
"logprob": -0.0009407997,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.0623207e-05,
"logprob": -0.0003838539,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5361328,
"id": 385,
"logprob": -0.24499512,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17578125,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011539459,
"special": false,
"text": "order"
"text": " an"
},
{
"id": 13883,
"logprob": -0.47436523,
"logprob": -0.010406494,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027680397,
"logprob": -0.00024354458,
"special": false,
"text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6582031,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.00092840195,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.19470215,
"special": false,
"text": " in"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
"generated_text": "Gradient descent is an optimization algorithm commonly used in"
}

View File

@ -5,95 +5,95 @@
"generated_tokens": 10,
"prefill": [
{
"id": 16030,
"id": 338,
"logprob": null,
"text": "is"
},
{
"id": 16030,
"logprob": -13.328125,
"text": "gradient"
},
{
"id": 26815,
"logprob": -6.4960938,
"logprob": -0.24023438,
"text": "descent"
},
{
"id": 29973,
"logprob": -5.1484375,
"logprob": -3.1386719,
"text": "?"
},
{
"id": 13,
"logprob": -4.0351562,
"text": "\n"
},
{
"id": 13,
"logprob": -5.2265625,
"logprob": -3.0878906,
"text": "\n"
}
],
"seed": 0,
"tokens": [
{
"id": 10994,
"logprob": -1.1542969,
"special": false,
"text": "Hello"
},
{
"id": 29991,
"id": 25584,
"logprob": 0.0,
"special": false,
"text": "!"
"text": "Grad"
},
{
"id": 739,
"id": 993,
"logprob": 0.0,
"special": false,
"text": " It"
"text": "ient"
},
{
"id": 2444,
"logprob": -0.42260742,
"special": false,
"text": " seems"
},
{
"id": 366,
"id": 2726,
"logprob": 0.0,
"special": false,
"text": " you"
"text": " Des"
},
{
"id": 29915,
"id": 1760,
"logprob": 0.0,
"special": false,
"text": "'"
"text": "cent"
},
{
"id": 276,
"logprob": -0.9838867,
"id": 313,
"logprob": -0.12322998,
"special": false,
"text": "re"
"text": " ("
},
{
"id": 3211,
"id": 29954,
"logprob": 0.0,
"special": false,
"text": " address"
"text": "G"
},
{
"id": 292,
"id": 29928,
"logprob": 0.0,
"special": false,
"text": "ing"
"text": "D"
},
{
"id": 263,
"logprob": -0.15124512,
"id": 29897,
"logprob": 0.0,
"special": false,
"text": " a"
"text": ")"
},
{
"id": 338,
"logprob": -0.6040039,
"special": false,
"text": " is"
},
{
"id": 385,
"logprob": -0.1796875,
"special": false,
"text": " an"
}
],
"top_tokens": null
},
"generated_text": "What is gradient descent?\n\nHello! It seems you're addressing a"
"generated_text": "What is gradient descent?\nGradient Descent (GD) is an"
}

View File

@ -12,32 +12,32 @@
},
{
"id": 338,
"logprob": -0.7133789,
"logprob": -0.6201172,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9296875,
"logprob": -13.6484375,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.048919678,
"logprob": -0.003894806,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0078125,
"logprob": -2.6386719,
"text": "?"
},
{
"id": 13,
"logprob": -2.8105469,
"logprob": -6.46875,
"text": "\n"
},
{
"id": 13,
"logprob": -0.84521484,
"logprob": -6.6875,
"text": "\n"
}
],
@ -45,68 +45,68 @@
"tokens": [
{
"id": 25584,
"logprob": -0.017028809,
"logprob": -0.008979797,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.0028476715,
"logprob": -8.34465e-07,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023971558,
"logprob": -0.00097084045,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.0384789e-05,
"logprob": -0.0003838539,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5229492,
"id": 385,
"logprob": -0.23840332,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17602539,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.000116467476,
"special": false,
"text": "order"
"text": " an"
},
{
"id": 13883,
"logprob": -0.47436523,
"logprob": -0.010406494,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027871132,
"logprob": -0.0002501011,
"special": false,
"text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6582031,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.00092840195,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.18933105,
"special": false,
"text": " in"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
"generated_text": "Gradient descent is an optimization algorithm commonly used in"
},
{
"details": {
@ -121,32 +121,32 @@
},
{
"id": 338,
"logprob": -0.7128906,
"logprob": -0.6113281,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9375,
"logprob": -13.6640625,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.05053711,
"logprob": -0.003929138,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0058594,
"logprob": -2.625,
"text": "?"
},
{
"id": 13,
"logprob": -2.8242188,
"logprob": -6.484375,
"text": "\n"
},
{
"id": 13,
"logprob": -0.84521484,
"logprob": -6.6875,
"text": "\n"
}
],
@ -154,68 +154,68 @@
"tokens": [
{
"id": 25584,
"logprob": -0.018859863,
"logprob": -0.009017944,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.002822876,
"logprob": -9.536743e-07,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023254395,
"logprob": -0.00097084045,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.0384789e-05,
"logprob": -0.0003838539,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5229492,
"id": 385,
"logprob": -0.24499512,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17126465,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.0001155138,
"special": false,
"text": "order"
"text": " an"
},
{
"id": 13883,
"logprob": -0.47436523,
"logprob": -0.010406494,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027036667,
"logprob": -0.0002501011,
"special": false,
"text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6435547,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.0009279251,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.18933105,
"special": false,
"text": " in"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
"generated_text": "Gradient descent is an optimization algorithm commonly used in"
},
{
"details": {
@ -230,32 +230,32 @@
},
{
"id": 338,
"logprob": -0.71484375,
"logprob": -0.609375,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9375,
"logprob": -13.671875,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.049346924,
"logprob": -0.0040016174,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0078125,
"logprob": -2.6230469,
"text": "?"
},
{
"id": 13,
"logprob": -2.8242188,
"logprob": -6.453125,
"text": "\n"
},
{
"id": 13,
"logprob": -0.86328125,
"logprob": -6.6875,
"text": "\n"
}
],
@ -263,68 +263,68 @@
"tokens": [
{
"id": 25584,
"logprob": -0.017196655,
"logprob": -0.008956909,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.0028438568,
"logprob": -8.34465e-07,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023254395,
"logprob": -0.0009407997,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.026558e-05,
"logprob": -0.0003721714,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5229492,
"id": 385,
"logprob": -0.24499512,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17602539,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011622906,
"special": false,
"text": "order"
"text": " an"
},
{
"id": 13883,
"logprob": -0.48608398,
"logprob": -0.010406494,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027894974,
"logprob": -0.0002501011,
"special": false,
"text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6435547,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.00092601776,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.19177246,
"special": false,
"text": " in"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
"generated_text": "Gradient descent is an optimization algorithm commonly used in"
},
{
"details": {
@ -339,32 +339,32 @@
},
{
"id": 338,
"logprob": -0.7192383,
"logprob": -0.609375,
"text": "is"
},
{
"id": 16030,
"logprob": -13.9375,
"logprob": -13.6640625,
"text": "gradient"
},
{
"id": 26815,
"logprob": -0.050445557,
"logprob": -0.0038967133,
"text": "descent"
},
{
"id": 29973,
"logprob": -3.0078125,
"logprob": -2.6347656,
"text": "?"
},
{
"id": 13,
"logprob": -2.8242188,
"logprob": -6.453125,
"text": "\n"
},
{
"id": 13,
"logprob": -0.8276367,
"logprob": -6.6875,
"text": "\n"
}
],
@ -372,67 +372,67 @@
"tokens": [
{
"id": 25584,
"logprob": -0.01727295,
"logprob": -0.008979797,
"special": false,
"text": "Grad"
},
{
"id": 993,
"logprob": -0.0027542114,
"logprob": -9.536743e-07,
"special": false,
"text": "ient"
},
{
"id": 26815,
"logprob": -0.023254395,
"logprob": -0.0009407997,
"special": false,
"text": " descent"
},
{
"id": 338,
"logprob": -2.0384789e-05,
"logprob": -0.00038409233,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.5229492,
"id": 385,
"logprob": -0.24499512,
"special": false,
"text": " a"
},
{
"id": 937,
"logprob": -0.17126465,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011301041,
"special": false,
"text": "order"
"text": " an"
},
{
"id": 13883,
"logprob": -0.48608398,
"logprob": -0.010414124,
"special": false,
"text": " optimization"
},
{
"id": 5687,
"logprob": -0.00027894974,
"logprob": -0.00024354458,
"special": false,
"text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6435547,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.0009279251,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.19470215,
"special": false,
"text": " in"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is a first-order optimization algorithm"
"generated_text": "Gradient descent is an optimization algorithm commonly used in"
}
]

View File

@ -25,7 +25,7 @@ async def test_flash_phi35_moe(flash_phi35_moe, response_snapshot):
assert response.details.generated_tokens == 10
assert (
response.generated_text
== "Gradient descent is a first-order optimization algorithm"
== "Gradient descent is an optimization algorithm commonly used in"
)
assert response == response_snapshot
@ -33,7 +33,7 @@ async def test_flash_phi35_moe(flash_phi35_moe, response_snapshot):
@pytest.mark.asyncio
async def test_flash_phi35_moe_all_params(flash_phi35_moe, response_snapshot):
response = await flash_phi35_moe.generate(
"What is gradient descent?\n\n",
"What is gradient descent?\n",
max_new_tokens=10,
repetition_penalty=1.2,
return_full_text=True,
@ -51,7 +51,7 @@ async def test_flash_phi35_moe_all_params(flash_phi35_moe, response_snapshot):
assert response.details.generated_tokens == 10
assert (
response.generated_text
== "What is gradient descent?\n\nHello! It seems you're addressing a"
== "What is gradient descent?\nGradient Descent (GD) is an"
)
assert response == response_snapshot
@ -66,7 +66,7 @@ async def test_flash_phi35_moe_load(flash_phi35_moe, generate_load, response_sna
assert responses[0].details.generated_tokens == 10
assert (
responses[0].generated_text
== "Gradient descent is a first-order optimization algorithm"
== "Gradient descent is an optimization algorithm commonly used in"
)
assert all(
[r.generated_text == responses[0].generated_text for r in responses]