diff --git a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe.json b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe.json index 0d6dca31..cfabe3c6 100644 --- a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe.json +++ b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe.json @@ -11,32 +11,32 @@ }, { "id": 338, - "logprob": -0.7133789, + "logprob": -0.6201172, "text": "is" }, { "id": 16030, - "logprob": -13.9296875, + "logprob": -13.6484375, "text": "gradient" }, { "id": 26815, - "logprob": -0.048919678, + "logprob": -0.003894806, "text": "descent" }, { "id": 29973, - "logprob": -3.0078125, + "logprob": -2.6386719, "text": "?" }, { "id": 13, - "logprob": -2.8105469, + "logprob": -6.46875, "text": "\n" }, { "id": 13, - "logprob": -0.84521484, + "logprob": -6.6875, "text": "\n" } ], @@ -44,66 +44,66 @@ "tokens": [ { "id": 25584, - "logprob": -0.017028809, + "logprob": -0.008979797, "special": false, "text": "Grad" }, { "id": 993, - "logprob": -0.0027313232, + "logprob": -8.34465e-07, "special": false, "text": "ient" }, { "id": 26815, - "logprob": -0.023254395, + "logprob": -0.0009407997, "special": false, "text": " descent" }, { "id": 338, - "logprob": -2.0623207e-05, + "logprob": -0.0003838539, "special": false, "text": " is" }, { - "id": 263, - "logprob": -0.5361328, + "id": 385, + "logprob": -0.24499512, "special": false, - "text": " a" - }, - { - "id": 937, - "logprob": -0.17578125, - "special": false, - "text": " first" - }, - { - "id": 29899, - "logprob": 0.0, - "special": false, - "text": "-" - }, - { - "id": 2098, - "logprob": -0.00011539459, - "special": false, - "text": "order" + "text": " an" }, { "id": 13883, - "logprob": -0.47436523, + "logprob": -0.010406494, "special": false, "text": " optimization" }, { "id": 5687, - "logprob": -0.00027680397, + "logprob": -0.00024354458, "special": false, "text": " algorithm" + }, + { + "id": 15574, + "logprob": -0.6582031, + "special": false, + "text": " commonly" + }, + { + "id": 1304, + "logprob": -0.00092840195, + "special": false, + "text": " used" + }, + { + "id": 297, + "logprob": -0.19470215, + "special": false, + "text": " in" } ], "top_tokens": null }, - "generated_text": "Gradient descent is a first-order optimization algorithm" + "generated_text": "Gradient descent is an optimization algorithm commonly used in" } diff --git a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json index 38b80335..b524859f 100644 --- a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json +++ b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json @@ -5,95 +5,95 @@ "generated_tokens": 10, "prefill": [ { - "id": 16030, + "id": 338, "logprob": null, + "text": "is" + }, + { + "id": 16030, + "logprob": -13.328125, "text": "gradient" }, { "id": 26815, - "logprob": -6.4960938, + "logprob": -0.24023438, "text": "descent" }, { "id": 29973, - "logprob": -5.1484375, + "logprob": -3.1386719, "text": "?" }, { "id": 13, - "logprob": -4.0351562, - "text": "\n" - }, - { - "id": 13, - "logprob": -5.2265625, + "logprob": -3.0878906, "text": "\n" } ], "seed": 0, "tokens": [ { - "id": 10994, - "logprob": -1.1542969, - "special": false, - "text": "Hello" - }, - { - "id": 29991, + "id": 25584, "logprob": 0.0, "special": false, - "text": "!" + "text": "Grad" }, { - "id": 739, + "id": 993, "logprob": 0.0, "special": false, - "text": " It" + "text": "ient" }, { - "id": 2444, - "logprob": -0.42260742, - "special": false, - "text": " seems" - }, - { - "id": 366, + "id": 2726, "logprob": 0.0, "special": false, - "text": " you" + "text": " Des" }, { - "id": 29915, + "id": 1760, "logprob": 0.0, "special": false, - "text": "'" + "text": "cent" }, { - "id": 276, - "logprob": -0.9838867, + "id": 313, + "logprob": -0.12322998, "special": false, - "text": "re" + "text": " (" }, { - "id": 3211, + "id": 29954, "logprob": 0.0, "special": false, - "text": " address" + "text": "G" }, { - "id": 292, + "id": 29928, "logprob": 0.0, "special": false, - "text": "ing" + "text": "D" }, { - "id": 263, - "logprob": -0.15124512, + "id": 29897, + "logprob": 0.0, "special": false, - "text": " a" + "text": ")" + }, + { + "id": 338, + "logprob": -0.6040039, + "special": false, + "text": " is" + }, + { + "id": 385, + "logprob": -0.1796875, + "special": false, + "text": " an" } ], "top_tokens": null }, - "generated_text": "What is gradient descent?\n\nHello! It seems you're addressing a" + "generated_text": "What is gradient descent?\nGradient Descent (GD) is an" } diff --git a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_load.json b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_load.json index f1f81152..2c977d8b 100644 --- a/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_load.json +++ b/integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_load.json @@ -12,32 +12,32 @@ }, { "id": 338, - "logprob": -0.7133789, + "logprob": -0.6201172, "text": "is" }, { "id": 16030, - "logprob": -13.9296875, + "logprob": -13.6484375, "text": "gradient" }, { "id": 26815, - "logprob": -0.048919678, + "logprob": -0.003894806, "text": "descent" }, { "id": 29973, - "logprob": -3.0078125, + "logprob": -2.6386719, "text": "?" }, { "id": 13, - "logprob": -2.8105469, + "logprob": -6.46875, "text": "\n" }, { "id": 13, - "logprob": -0.84521484, + "logprob": -6.6875, "text": "\n" } ], @@ -45,68 +45,68 @@ "tokens": [ { "id": 25584, - "logprob": -0.017028809, + "logprob": -0.008979797, "special": false, "text": "Grad" }, { "id": 993, - "logprob": -0.0028476715, + "logprob": -8.34465e-07, "special": false, "text": "ient" }, { "id": 26815, - "logprob": -0.023971558, + "logprob": -0.00097084045, "special": false, "text": " descent" }, { "id": 338, - "logprob": -2.0384789e-05, + "logprob": -0.0003838539, "special": false, "text": " is" }, { - "id": 263, - "logprob": -0.5229492, + "id": 385, + "logprob": -0.23840332, "special": false, - "text": " a" - }, - { - "id": 937, - "logprob": -0.17602539, - "special": false, - "text": " first" - }, - { - "id": 29899, - "logprob": 0.0, - "special": false, - "text": "-" - }, - { - "id": 2098, - "logprob": -0.000116467476, - "special": false, - "text": "order" + "text": " an" }, { "id": 13883, - "logprob": -0.47436523, + "logprob": -0.010406494, "special": false, "text": " optimization" }, { "id": 5687, - "logprob": -0.00027871132, + "logprob": -0.0002501011, "special": false, "text": " algorithm" + }, + { + "id": 15574, + "logprob": -0.6582031, + "special": false, + "text": " commonly" + }, + { + "id": 1304, + "logprob": -0.00092840195, + "special": false, + "text": " used" + }, + { + "id": 297, + "logprob": -0.18933105, + "special": false, + "text": " in" } ], "top_tokens": null }, - "generated_text": "Gradient descent is a first-order optimization algorithm" + "generated_text": "Gradient descent is an optimization algorithm commonly used in" }, { "details": { @@ -121,32 +121,32 @@ }, { "id": 338, - "logprob": -0.7128906, + "logprob": -0.6113281, "text": "is" }, { "id": 16030, - "logprob": -13.9375, + "logprob": -13.6640625, "text": "gradient" }, { "id": 26815, - "logprob": -0.05053711, + "logprob": -0.003929138, "text": "descent" }, { "id": 29973, - "logprob": -3.0058594, + "logprob": -2.625, "text": "?" }, { "id": 13, - "logprob": -2.8242188, + "logprob": -6.484375, "text": "\n" }, { "id": 13, - "logprob": -0.84521484, + "logprob": -6.6875, "text": "\n" } ], @@ -154,68 +154,68 @@ "tokens": [ { "id": 25584, - "logprob": -0.018859863, + "logprob": -0.009017944, "special": false, "text": "Grad" }, { "id": 993, - "logprob": -0.002822876, + "logprob": -9.536743e-07, "special": false, "text": "ient" }, { "id": 26815, - "logprob": -0.023254395, + "logprob": -0.00097084045, "special": false, "text": " descent" }, { "id": 338, - "logprob": -2.0384789e-05, + "logprob": -0.0003838539, "special": false, "text": " is" }, { - "id": 263, - "logprob": -0.5229492, + "id": 385, + "logprob": -0.24499512, "special": false, - "text": " a" - }, - { - "id": 937, - "logprob": -0.17126465, - "special": false, - "text": " first" - }, - { - "id": 29899, - "logprob": 0.0, - "special": false, - "text": "-" - }, - { - "id": 2098, - "logprob": -0.0001155138, - "special": false, - "text": "order" + "text": " an" }, { "id": 13883, - "logprob": -0.47436523, + "logprob": -0.010406494, "special": false, "text": " optimization" }, { "id": 5687, - "logprob": -0.00027036667, + "logprob": -0.0002501011, "special": false, "text": " algorithm" + }, + { + "id": 15574, + "logprob": -0.6435547, + "special": false, + "text": " commonly" + }, + { + "id": 1304, + "logprob": -0.0009279251, + "special": false, + "text": " used" + }, + { + "id": 297, + "logprob": -0.18933105, + "special": false, + "text": " in" } ], "top_tokens": null }, - "generated_text": "Gradient descent is a first-order optimization algorithm" + "generated_text": "Gradient descent is an optimization algorithm commonly used in" }, { "details": { @@ -230,32 +230,32 @@ }, { "id": 338, - "logprob": -0.71484375, + "logprob": -0.609375, "text": "is" }, { "id": 16030, - "logprob": -13.9375, + "logprob": -13.671875, "text": "gradient" }, { "id": 26815, - "logprob": -0.049346924, + "logprob": -0.0040016174, "text": "descent" }, { "id": 29973, - "logprob": -3.0078125, + "logprob": -2.6230469, "text": "?" }, { "id": 13, - "logprob": -2.8242188, + "logprob": -6.453125, "text": "\n" }, { "id": 13, - "logprob": -0.86328125, + "logprob": -6.6875, "text": "\n" } ], @@ -263,68 +263,68 @@ "tokens": [ { "id": 25584, - "logprob": -0.017196655, + "logprob": -0.008956909, "special": false, "text": "Grad" }, { "id": 993, - "logprob": -0.0028438568, + "logprob": -8.34465e-07, "special": false, "text": "ient" }, { "id": 26815, - "logprob": -0.023254395, + "logprob": -0.0009407997, "special": false, "text": " descent" }, { "id": 338, - "logprob": -2.026558e-05, + "logprob": -0.0003721714, "special": false, "text": " is" }, { - "id": 263, - "logprob": -0.5229492, + "id": 385, + "logprob": -0.24499512, "special": false, - "text": " a" - }, - { - "id": 937, - "logprob": -0.17602539, - "special": false, - "text": " first" - }, - { - "id": 29899, - "logprob": 0.0, - "special": false, - "text": "-" - }, - { - "id": 2098, - "logprob": -0.00011622906, - "special": false, - "text": "order" + "text": " an" }, { "id": 13883, - "logprob": -0.48608398, + "logprob": -0.010406494, "special": false, "text": " optimization" }, { "id": 5687, - "logprob": -0.00027894974, + "logprob": -0.0002501011, "special": false, "text": " algorithm" + }, + { + "id": 15574, + "logprob": -0.6435547, + "special": false, + "text": " commonly" + }, + { + "id": 1304, + "logprob": -0.00092601776, + "special": false, + "text": " used" + }, + { + "id": 297, + "logprob": -0.19177246, + "special": false, + "text": " in" } ], "top_tokens": null }, - "generated_text": "Gradient descent is a first-order optimization algorithm" + "generated_text": "Gradient descent is an optimization algorithm commonly used in" }, { "details": { @@ -339,32 +339,32 @@ }, { "id": 338, - "logprob": -0.7192383, + "logprob": -0.609375, "text": "is" }, { "id": 16030, - "logprob": -13.9375, + "logprob": -13.6640625, "text": "gradient" }, { "id": 26815, - "logprob": -0.050445557, + "logprob": -0.0038967133, "text": "descent" }, { "id": 29973, - "logprob": -3.0078125, + "logprob": -2.6347656, "text": "?" }, { "id": 13, - "logprob": -2.8242188, + "logprob": -6.453125, "text": "\n" }, { "id": 13, - "logprob": -0.8276367, + "logprob": -6.6875, "text": "\n" } ], @@ -372,67 +372,67 @@ "tokens": [ { "id": 25584, - "logprob": -0.01727295, + "logprob": -0.008979797, "special": false, "text": "Grad" }, { "id": 993, - "logprob": -0.0027542114, + "logprob": -9.536743e-07, "special": false, "text": "ient" }, { "id": 26815, - "logprob": -0.023254395, + "logprob": -0.0009407997, "special": false, "text": " descent" }, { "id": 338, - "logprob": -2.0384789e-05, + "logprob": -0.00038409233, "special": false, "text": " is" }, { - "id": 263, - "logprob": -0.5229492, + "id": 385, + "logprob": -0.24499512, "special": false, - "text": " a" - }, - { - "id": 937, - "logprob": -0.17126465, - "special": false, - "text": " first" - }, - { - "id": 29899, - "logprob": 0.0, - "special": false, - "text": "-" - }, - { - "id": 2098, - "logprob": -0.00011301041, - "special": false, - "text": "order" + "text": " an" }, { "id": 13883, - "logprob": -0.48608398, + "logprob": -0.010414124, "special": false, "text": " optimization" }, { "id": 5687, - "logprob": -0.00027894974, + "logprob": -0.00024354458, "special": false, "text": " algorithm" + }, + { + "id": 15574, + "logprob": -0.6435547, + "special": false, + "text": " commonly" + }, + { + "id": 1304, + "logprob": -0.0009279251, + "special": false, + "text": " used" + }, + { + "id": 297, + "logprob": -0.19470215, + "special": false, + "text": " in" } ], "top_tokens": null }, - "generated_text": "Gradient descent is a first-order optimization algorithm" + "generated_text": "Gradient descent is an optimization algorithm commonly used in" } ] diff --git a/integration-tests/models/test_flash_phi35_moe.py b/integration-tests/models/test_flash_phi35_moe.py index 2173740a..d3043b02 100644 --- a/integration-tests/models/test_flash_phi35_moe.py +++ b/integration-tests/models/test_flash_phi35_moe.py @@ -25,7 +25,7 @@ async def test_flash_phi35_moe(flash_phi35_moe, response_snapshot): assert response.details.generated_tokens == 10 assert ( response.generated_text - == "Gradient descent is a first-order optimization algorithm" + == "Gradient descent is an optimization algorithm commonly used in" ) assert response == response_snapshot @@ -33,7 +33,7 @@ async def test_flash_phi35_moe(flash_phi35_moe, response_snapshot): @pytest.mark.asyncio async def test_flash_phi35_moe_all_params(flash_phi35_moe, response_snapshot): response = await flash_phi35_moe.generate( - "What is gradient descent?\n\n", + "What is gradient descent?\n", max_new_tokens=10, repetition_penalty=1.2, return_full_text=True, @@ -51,7 +51,7 @@ async def test_flash_phi35_moe_all_params(flash_phi35_moe, response_snapshot): assert response.details.generated_tokens == 10 assert ( response.generated_text - == "What is gradient descent?\n\nHello! It seems you're addressing a" + == "What is gradient descent?\nGradient Descent (GD) is an" ) assert response == response_snapshot @@ -66,7 +66,7 @@ async def test_flash_phi35_moe_load(flash_phi35_moe, generate_load, response_sna assert responses[0].details.generated_tokens == 10 assert ( responses[0].generated_text - == "Gradient descent is a first-order optimization algorithm" + == "Gradient descent is an optimization algorithm commonly used in" ) assert all( [r.generated_text == responses[0].generated_text for r in responses]