Fix Phi 3.5 MoE tests (#2684)

PR #2682 also fixed in issue in Phi MoE, but it changes the test
outputs a bit. Fix this.
This commit is contained in:
Daniël de Kok 2024-10-24 15:21:50 +02:00 committed by GitHub
parent 1b914f37e7
commit 14a0df3a38
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 214 additions and 214 deletions

View File

@ -11,32 +11,32 @@
}, },
{ {
"id": 338, "id": 338,
"logprob": -0.7133789, "logprob": -0.6201172,
"text": "is" "text": "is"
}, },
{ {
"id": 16030, "id": 16030,
"logprob": -13.9296875, "logprob": -13.6484375,
"text": "gradient" "text": "gradient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.048919678, "logprob": -0.003894806,
"text": "descent" "text": "descent"
}, },
{ {
"id": 29973, "id": 29973,
"logprob": -3.0078125, "logprob": -2.6386719,
"text": "?" "text": "?"
}, },
{ {
"id": 13, "id": 13,
"logprob": -2.8105469, "logprob": -6.46875,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.84521484, "logprob": -6.6875,
"text": "\n" "text": "\n"
} }
], ],
@ -44,66 +44,66 @@
"tokens": [ "tokens": [
{ {
"id": 25584, "id": 25584,
"logprob": -0.017028809, "logprob": -0.008979797,
"special": false, "special": false,
"text": "Grad" "text": "Grad"
}, },
{ {
"id": 993, "id": 993,
"logprob": -0.0027313232, "logprob": -8.34465e-07,
"special": false, "special": false,
"text": "ient" "text": "ient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.023254395, "logprob": -0.0009407997,
"special": false, "special": false,
"text": " descent" "text": " descent"
}, },
{ {
"id": 338, "id": 338,
"logprob": -2.0623207e-05, "logprob": -0.0003838539,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 263, "id": 385,
"logprob": -0.5361328, "logprob": -0.24499512,
"special": false, "special": false,
"text": " a" "text": " an"
},
{
"id": 937,
"logprob": -0.17578125,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011539459,
"special": false,
"text": "order"
}, },
{ {
"id": 13883, "id": 13883,
"logprob": -0.47436523, "logprob": -0.010406494,
"special": false, "special": false,
"text": " optimization" "text": " optimization"
}, },
{ {
"id": 5687, "id": 5687,
"logprob": -0.00027680397, "logprob": -0.00024354458,
"special": false, "special": false,
"text": " algorithm" "text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6582031,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.00092840195,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.19470215,
"special": false,
"text": " in"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "Gradient descent is a first-order optimization algorithm" "generated_text": "Gradient descent is an optimization algorithm commonly used in"
} }

View File

@ -5,95 +5,95 @@
"generated_tokens": 10, "generated_tokens": 10,
"prefill": [ "prefill": [
{ {
"id": 16030, "id": 338,
"logprob": null, "logprob": null,
"text": "is"
},
{
"id": 16030,
"logprob": -13.328125,
"text": "gradient" "text": "gradient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -6.4960938, "logprob": -0.24023438,
"text": "descent" "text": "descent"
}, },
{ {
"id": 29973, "id": 29973,
"logprob": -5.1484375, "logprob": -3.1386719,
"text": "?" "text": "?"
}, },
{ {
"id": 13, "id": 13,
"logprob": -4.0351562, "logprob": -3.0878906,
"text": "\n"
},
{
"id": 13,
"logprob": -5.2265625,
"text": "\n" "text": "\n"
} }
], ],
"seed": 0, "seed": 0,
"tokens": [ "tokens": [
{ {
"id": 10994, "id": 25584,
"logprob": -1.1542969,
"special": false,
"text": "Hello"
},
{
"id": 29991,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "!" "text": "Grad"
}, },
{ {
"id": 739, "id": 993,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": " It" "text": "ient"
}, },
{ {
"id": 2444, "id": 2726,
"logprob": -0.42260742,
"special": false,
"text": " seems"
},
{
"id": 366,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": " you" "text": " Des"
}, },
{ {
"id": 29915, "id": 1760,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "'" "text": "cent"
}, },
{ {
"id": 276, "id": 313,
"logprob": -0.9838867, "logprob": -0.12322998,
"special": false, "special": false,
"text": "re" "text": " ("
}, },
{ {
"id": 3211, "id": 29954,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": " address" "text": "G"
}, },
{ {
"id": 292, "id": 29928,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "ing" "text": "D"
}, },
{ {
"id": 263, "id": 29897,
"logprob": -0.15124512, "logprob": 0.0,
"special": false, "special": false,
"text": " a" "text": ")"
},
{
"id": 338,
"logprob": -0.6040039,
"special": false,
"text": " is"
},
{
"id": 385,
"logprob": -0.1796875,
"special": false,
"text": " an"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "What is gradient descent?\n\nHello! It seems you're addressing a" "generated_text": "What is gradient descent?\nGradient Descent (GD) is an"
} }

View File

@ -12,32 +12,32 @@
}, },
{ {
"id": 338, "id": 338,
"logprob": -0.7133789, "logprob": -0.6201172,
"text": "is" "text": "is"
}, },
{ {
"id": 16030, "id": 16030,
"logprob": -13.9296875, "logprob": -13.6484375,
"text": "gradient" "text": "gradient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.048919678, "logprob": -0.003894806,
"text": "descent" "text": "descent"
}, },
{ {
"id": 29973, "id": 29973,
"logprob": -3.0078125, "logprob": -2.6386719,
"text": "?" "text": "?"
}, },
{ {
"id": 13, "id": 13,
"logprob": -2.8105469, "logprob": -6.46875,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.84521484, "logprob": -6.6875,
"text": "\n" "text": "\n"
} }
], ],
@ -45,68 +45,68 @@
"tokens": [ "tokens": [
{ {
"id": 25584, "id": 25584,
"logprob": -0.017028809, "logprob": -0.008979797,
"special": false, "special": false,
"text": "Grad" "text": "Grad"
}, },
{ {
"id": 993, "id": 993,
"logprob": -0.0028476715, "logprob": -8.34465e-07,
"special": false, "special": false,
"text": "ient" "text": "ient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.023971558, "logprob": -0.00097084045,
"special": false, "special": false,
"text": " descent" "text": " descent"
}, },
{ {
"id": 338, "id": 338,
"logprob": -2.0384789e-05, "logprob": -0.0003838539,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 263, "id": 385,
"logprob": -0.5229492, "logprob": -0.23840332,
"special": false, "special": false,
"text": " a" "text": " an"
},
{
"id": 937,
"logprob": -0.17602539,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.000116467476,
"special": false,
"text": "order"
}, },
{ {
"id": 13883, "id": 13883,
"logprob": -0.47436523, "logprob": -0.010406494,
"special": false, "special": false,
"text": " optimization" "text": " optimization"
}, },
{ {
"id": 5687, "id": 5687,
"logprob": -0.00027871132, "logprob": -0.0002501011,
"special": false, "special": false,
"text": " algorithm" "text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6582031,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.00092840195,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.18933105,
"special": false,
"text": " in"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "Gradient descent is a first-order optimization algorithm" "generated_text": "Gradient descent is an optimization algorithm commonly used in"
}, },
{ {
"details": { "details": {
@ -121,32 +121,32 @@
}, },
{ {
"id": 338, "id": 338,
"logprob": -0.7128906, "logprob": -0.6113281,
"text": "is" "text": "is"
}, },
{ {
"id": 16030, "id": 16030,
"logprob": -13.9375, "logprob": -13.6640625,
"text": "gradient" "text": "gradient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.05053711, "logprob": -0.003929138,
"text": "descent" "text": "descent"
}, },
{ {
"id": 29973, "id": 29973,
"logprob": -3.0058594, "logprob": -2.625,
"text": "?" "text": "?"
}, },
{ {
"id": 13, "id": 13,
"logprob": -2.8242188, "logprob": -6.484375,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.84521484, "logprob": -6.6875,
"text": "\n" "text": "\n"
} }
], ],
@ -154,68 +154,68 @@
"tokens": [ "tokens": [
{ {
"id": 25584, "id": 25584,
"logprob": -0.018859863, "logprob": -0.009017944,
"special": false, "special": false,
"text": "Grad" "text": "Grad"
}, },
{ {
"id": 993, "id": 993,
"logprob": -0.002822876, "logprob": -9.536743e-07,
"special": false, "special": false,
"text": "ient" "text": "ient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.023254395, "logprob": -0.00097084045,
"special": false, "special": false,
"text": " descent" "text": " descent"
}, },
{ {
"id": 338, "id": 338,
"logprob": -2.0384789e-05, "logprob": -0.0003838539,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 263, "id": 385,
"logprob": -0.5229492, "logprob": -0.24499512,
"special": false, "special": false,
"text": " a" "text": " an"
},
{
"id": 937,
"logprob": -0.17126465,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.0001155138,
"special": false,
"text": "order"
}, },
{ {
"id": 13883, "id": 13883,
"logprob": -0.47436523, "logprob": -0.010406494,
"special": false, "special": false,
"text": " optimization" "text": " optimization"
}, },
{ {
"id": 5687, "id": 5687,
"logprob": -0.00027036667, "logprob": -0.0002501011,
"special": false, "special": false,
"text": " algorithm" "text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6435547,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.0009279251,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.18933105,
"special": false,
"text": " in"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "Gradient descent is a first-order optimization algorithm" "generated_text": "Gradient descent is an optimization algorithm commonly used in"
}, },
{ {
"details": { "details": {
@ -230,32 +230,32 @@
}, },
{ {
"id": 338, "id": 338,
"logprob": -0.71484375, "logprob": -0.609375,
"text": "is" "text": "is"
}, },
{ {
"id": 16030, "id": 16030,
"logprob": -13.9375, "logprob": -13.671875,
"text": "gradient" "text": "gradient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.049346924, "logprob": -0.0040016174,
"text": "descent" "text": "descent"
}, },
{ {
"id": 29973, "id": 29973,
"logprob": -3.0078125, "logprob": -2.6230469,
"text": "?" "text": "?"
}, },
{ {
"id": 13, "id": 13,
"logprob": -2.8242188, "logprob": -6.453125,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.86328125, "logprob": -6.6875,
"text": "\n" "text": "\n"
} }
], ],
@ -263,68 +263,68 @@
"tokens": [ "tokens": [
{ {
"id": 25584, "id": 25584,
"logprob": -0.017196655, "logprob": -0.008956909,
"special": false, "special": false,
"text": "Grad" "text": "Grad"
}, },
{ {
"id": 993, "id": 993,
"logprob": -0.0028438568, "logprob": -8.34465e-07,
"special": false, "special": false,
"text": "ient" "text": "ient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.023254395, "logprob": -0.0009407997,
"special": false, "special": false,
"text": " descent" "text": " descent"
}, },
{ {
"id": 338, "id": 338,
"logprob": -2.026558e-05, "logprob": -0.0003721714,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 263, "id": 385,
"logprob": -0.5229492, "logprob": -0.24499512,
"special": false, "special": false,
"text": " a" "text": " an"
},
{
"id": 937,
"logprob": -0.17602539,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011622906,
"special": false,
"text": "order"
}, },
{ {
"id": 13883, "id": 13883,
"logprob": -0.48608398, "logprob": -0.010406494,
"special": false, "special": false,
"text": " optimization" "text": " optimization"
}, },
{ {
"id": 5687, "id": 5687,
"logprob": -0.00027894974, "logprob": -0.0002501011,
"special": false, "special": false,
"text": " algorithm" "text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6435547,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.00092601776,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.19177246,
"special": false,
"text": " in"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "Gradient descent is a first-order optimization algorithm" "generated_text": "Gradient descent is an optimization algorithm commonly used in"
}, },
{ {
"details": { "details": {
@ -339,32 +339,32 @@
}, },
{ {
"id": 338, "id": 338,
"logprob": -0.7192383, "logprob": -0.609375,
"text": "is" "text": "is"
}, },
{ {
"id": 16030, "id": 16030,
"logprob": -13.9375, "logprob": -13.6640625,
"text": "gradient" "text": "gradient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.050445557, "logprob": -0.0038967133,
"text": "descent" "text": "descent"
}, },
{ {
"id": 29973, "id": 29973,
"logprob": -3.0078125, "logprob": -2.6347656,
"text": "?" "text": "?"
}, },
{ {
"id": 13, "id": 13,
"logprob": -2.8242188, "logprob": -6.453125,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.8276367, "logprob": -6.6875,
"text": "\n" "text": "\n"
} }
], ],
@ -372,67 +372,67 @@
"tokens": [ "tokens": [
{ {
"id": 25584, "id": 25584,
"logprob": -0.01727295, "logprob": -0.008979797,
"special": false, "special": false,
"text": "Grad" "text": "Grad"
}, },
{ {
"id": 993, "id": 993,
"logprob": -0.0027542114, "logprob": -9.536743e-07,
"special": false, "special": false,
"text": "ient" "text": "ient"
}, },
{ {
"id": 26815, "id": 26815,
"logprob": -0.023254395, "logprob": -0.0009407997,
"special": false, "special": false,
"text": " descent" "text": " descent"
}, },
{ {
"id": 338, "id": 338,
"logprob": -2.0384789e-05, "logprob": -0.00038409233,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 263, "id": 385,
"logprob": -0.5229492, "logprob": -0.24499512,
"special": false, "special": false,
"text": " a" "text": " an"
},
{
"id": 937,
"logprob": -0.17126465,
"special": false,
"text": " first"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 2098,
"logprob": -0.00011301041,
"special": false,
"text": "order"
}, },
{ {
"id": 13883, "id": 13883,
"logprob": -0.48608398, "logprob": -0.010414124,
"special": false, "special": false,
"text": " optimization" "text": " optimization"
}, },
{ {
"id": 5687, "id": 5687,
"logprob": -0.00027894974, "logprob": -0.00024354458,
"special": false, "special": false,
"text": " algorithm" "text": " algorithm"
},
{
"id": 15574,
"logprob": -0.6435547,
"special": false,
"text": " commonly"
},
{
"id": 1304,
"logprob": -0.0009279251,
"special": false,
"text": " used"
},
{
"id": 297,
"logprob": -0.19470215,
"special": false,
"text": " in"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "Gradient descent is a first-order optimization algorithm" "generated_text": "Gradient descent is an optimization algorithm commonly used in"
} }
] ]

View File

@ -25,7 +25,7 @@ async def test_flash_phi35_moe(flash_phi35_moe, response_snapshot):
assert response.details.generated_tokens == 10 assert response.details.generated_tokens == 10
assert ( assert (
response.generated_text response.generated_text
== "Gradient descent is a first-order optimization algorithm" == "Gradient descent is an optimization algorithm commonly used in"
) )
assert response == response_snapshot assert response == response_snapshot
@ -33,7 +33,7 @@ async def test_flash_phi35_moe(flash_phi35_moe, response_snapshot):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_flash_phi35_moe_all_params(flash_phi35_moe, response_snapshot): async def test_flash_phi35_moe_all_params(flash_phi35_moe, response_snapshot):
response = await flash_phi35_moe.generate( response = await flash_phi35_moe.generate(
"What is gradient descent?\n\n", "What is gradient descent?\n",
max_new_tokens=10, max_new_tokens=10,
repetition_penalty=1.2, repetition_penalty=1.2,
return_full_text=True, return_full_text=True,
@ -51,7 +51,7 @@ async def test_flash_phi35_moe_all_params(flash_phi35_moe, response_snapshot):
assert response.details.generated_tokens == 10 assert response.details.generated_tokens == 10
assert ( assert (
response.generated_text response.generated_text
== "What is gradient descent?\n\nHello! It seems you're addressing a" == "What is gradient descent?\nGradient Descent (GD) is an"
) )
assert response == response_snapshot assert response == response_snapshot
@ -66,7 +66,7 @@ async def test_flash_phi35_moe_load(flash_phi35_moe, generate_load, response_sna
assert responses[0].details.generated_tokens == 10 assert responses[0].details.generated_tokens == 10
assert ( assert (
responses[0].generated_text responses[0].generated_text
== "Gradient descent is a first-order optimization algorithm" == "Gradient descent is an optimization algorithm commonly used in"
) )
assert all( assert all(
[r.generated_text == responses[0].generated_text for r in responses] [r.generated_text == responses[0].generated_text for r in responses]