Test Marlin MoE with `desc_act=true` (#2622)
Update the Mixtral GPTQ test to use a model with `desc_act=true` and `group_size!=-1` to ensure that we are checking activation sorting/non-full K (with tensor parallelism). The `desc_act=false` case is already checked by the Mixtral AWQ test.
This commit is contained in:
parent
5e0fb46821
commit
7f54b7336a
|
@ -10,80 +10,95 @@
|
|||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -11.0078125,
|
||||
"text": "Test"
|
||||
"id": 1824,
|
||||
"logprob": -9.2890625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -13.59375,
|
||||
"text": "request"
|
||||
"id": 349,
|
||||
"logprob": -1.1503906,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 3534,
|
||||
"logprob": -9.5859375,
|
||||
"text": "deep"
|
||||
},
|
||||
{
|
||||
"id": 5168,
|
||||
"logprob": -1.3945312,
|
||||
"text": "learning"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.4555664,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7089844,
|
||||
"logprob": -0.6953125,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.68847656,
|
||||
"logprob": -0.4777832,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 28771,
|
||||
"logprob": -1.9394531,
|
||||
"id": 23229,
|
||||
"logprob": -0.13256836,
|
||||
"special": false,
|
||||
"text": "#"
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -2.8808594,
|
||||
"id": 5168,
|
||||
"logprob": -0.023849487,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.37280273,
|
||||
"id": 349,
|
||||
"logprob": -0.13977051,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.26098633,
|
||||
"id": 264,
|
||||
"logprob": -0.14489746,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.0017137527,
|
||||
"id": 19804,
|
||||
"logprob": -0.63183594,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 1064,
|
||||
"logprob": -2.2695312,
|
||||
"id": 302,
|
||||
"logprob": -0.010314941,
|
||||
"special": false,
|
||||
"text": "##"
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -1.9238281,
|
||||
"id": 5599,
|
||||
"logprob": -0.0635376,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.48828125,
|
||||
"id": 5168,
|
||||
"logprob": -0.0028572083,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "\n\n# Test request\n\n## Test request"
|
||||
"generated_text": "\n\nDeep learning is a subset of machine learning"
|
||||
}
|
||||
|
|
|
@ -10,42 +10,28 @@
|
|||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -11.0078125,
|
||||
"text": "Test"
|
||||
"id": 349,
|
||||
"logprob": -12.0546875,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -13.59375,
|
||||
"text": "request"
|
||||
"id": 3534,
|
||||
"logprob": -10.53125,
|
||||
"text": "deep"
|
||||
},
|
||||
{
|
||||
"id": 5168,
|
||||
"logprob": -2.71875,
|
||||
"text": "learning"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -5.0078125,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.34838867,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13940,
|
||||
"logprob": -0.38916016,
|
||||
"special": false,
|
||||
"text": "``"
|
||||
},
|
||||
{
|
||||
"id": 28832,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": "`"
|
||||
},
|
||||
{
|
||||
"id": 3371,
|
||||
"logprob": -1.2529297,
|
||||
"special": false,
|
||||
"text": "json"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": 0.0,
|
||||
|
@ -53,37 +39,61 @@
|
|||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 28751,
|
||||
"logprob": 0.0,
|
||||
"id": 23229,
|
||||
"logprob": -0.18237305,
|
||||
"special": false,
|
||||
"text": "{"
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"id": 17504,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " Learning"
|
||||
},
|
||||
{
|
||||
"id": 2287,
|
||||
"id": 349,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": " "
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 345,
|
||||
"id": 264,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": " \""
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 3134,
|
||||
"logprob": -0.640625,
|
||||
"id": 19804,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": "request"
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 302,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 13253,
|
||||
"logprob": -0.6040039,
|
||||
"special": false,
|
||||
"text": " Machine"
|
||||
},
|
||||
{
|
||||
"id": 17504,
|
||||
"logprob": 0.0,
|
||||
"special": false,
|
||||
"text": " Learning"
|
||||
},
|
||||
{
|
||||
"id": 28725,
|
||||
"logprob": -0.11621094,
|
||||
"special": false,
|
||||
"text": ","
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "Test request\n```json\n{\n \"request"
|
||||
"generated_text": "What is deep learning?\nDeep Learning is a subset of Machine Learning,"
|
||||
}
|
||||
|
|
|
@ -11,82 +11,97 @@
|
|||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -11.0078125,
|
||||
"text": "Test"
|
||||
"id": 1824,
|
||||
"logprob": -9.2890625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -13.59375,
|
||||
"text": "request"
|
||||
"id": 349,
|
||||
"logprob": -1.1503906,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 3534,
|
||||
"logprob": -9.5859375,
|
||||
"text": "deep"
|
||||
},
|
||||
{
|
||||
"id": 5168,
|
||||
"logprob": -1.3945312,
|
||||
"text": "learning"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.4555664,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7089844,
|
||||
"logprob": -0.6953125,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.68847656,
|
||||
"logprob": -0.4777832,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 28771,
|
||||
"logprob": -1.9394531,
|
||||
"id": 23229,
|
||||
"logprob": -0.13232422,
|
||||
"special": false,
|
||||
"text": "#"
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -2.8828125,
|
||||
"id": 5168,
|
||||
"logprob": -0.023834229,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.37329102,
|
||||
"id": 349,
|
||||
"logprob": -0.13977051,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.2602539,
|
||||
"id": 264,
|
||||
"logprob": -0.14416504,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.0017185211,
|
||||
"id": 19804,
|
||||
"logprob": -0.63183594,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 1064,
|
||||
"logprob": -2.2753906,
|
||||
"id": 302,
|
||||
"logprob": -0.010223389,
|
||||
"special": false,
|
||||
"text": "##"
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -1.9316406,
|
||||
"id": 5599,
|
||||
"logprob": -0.064208984,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.48217773,
|
||||
"id": 5168,
|
||||
"logprob": -0.0028266907,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "\n\n# Test request\n\n## Test request"
|
||||
"generated_text": "\n\nDeep learning is a subset of machine learning"
|
||||
},
|
||||
{
|
||||
"details": {
|
||||
|
@ -100,82 +115,97 @@
|
|||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -11.0078125,
|
||||
"text": "Test"
|
||||
"id": 1824,
|
||||
"logprob": -9.2890625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -13.59375,
|
||||
"text": "request"
|
||||
"id": 349,
|
||||
"logprob": -1.1425781,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 3534,
|
||||
"logprob": -9.59375,
|
||||
"text": "deep"
|
||||
},
|
||||
{
|
||||
"id": 5168,
|
||||
"logprob": -1.390625,
|
||||
"text": "learning"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.45532227,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7089844,
|
||||
"logprob": -0.6953125,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.68847656,
|
||||
"logprob": -0.48339844,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 28771,
|
||||
"logprob": -1.9394531,
|
||||
"id": 23229,
|
||||
"logprob": -0.13256836,
|
||||
"special": false,
|
||||
"text": "#"
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -2.8828125,
|
||||
"id": 5168,
|
||||
"logprob": -0.02420044,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.37329102,
|
||||
"id": 349,
|
||||
"logprob": -0.13977051,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.2602539,
|
||||
"id": 264,
|
||||
"logprob": -0.14501953,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.0017185211,
|
||||
"id": 19804,
|
||||
"logprob": -0.63134766,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 1064,
|
||||
"logprob": -2.2753906,
|
||||
"id": 302,
|
||||
"logprob": -0.010223389,
|
||||
"special": false,
|
||||
"text": "##"
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -1.9316406,
|
||||
"id": 5599,
|
||||
"logprob": -0.06427002,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.48217773,
|
||||
"id": 5168,
|
||||
"logprob": -0.002817154,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "\n\n# Test request\n\n## Test request"
|
||||
"generated_text": "\n\nDeep learning is a subset of machine learning"
|
||||
},
|
||||
{
|
||||
"details": {
|
||||
|
@ -189,82 +219,97 @@
|
|||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -11.0078125,
|
||||
"text": "Test"
|
||||
"id": 1824,
|
||||
"logprob": -9.2890625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -13.59375,
|
||||
"text": "request"
|
||||
"id": 349,
|
||||
"logprob": -1.1425781,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 3534,
|
||||
"logprob": -9.59375,
|
||||
"text": "deep"
|
||||
},
|
||||
{
|
||||
"id": 5168,
|
||||
"logprob": -1.390625,
|
||||
"text": "learning"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.45532227,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7089844,
|
||||
"logprob": -0.6953125,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.68847656,
|
||||
"logprob": -0.48339844,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 28771,
|
||||
"logprob": -1.9394531,
|
||||
"id": 23229,
|
||||
"logprob": -0.13256836,
|
||||
"special": false,
|
||||
"text": "#"
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -2.8828125,
|
||||
"id": 5168,
|
||||
"logprob": -0.02420044,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.37329102,
|
||||
"id": 349,
|
||||
"logprob": -0.13977051,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.2602539,
|
||||
"id": 264,
|
||||
"logprob": -0.14501953,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.0017185211,
|
||||
"id": 19804,
|
||||
"logprob": -0.63134766,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 1064,
|
||||
"logprob": -2.2753906,
|
||||
"id": 302,
|
||||
"logprob": -0.010223389,
|
||||
"special": false,
|
||||
"text": "##"
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -1.9316406,
|
||||
"id": 5599,
|
||||
"logprob": -0.06427002,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.48217773,
|
||||
"id": 5168,
|
||||
"logprob": -0.002817154,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "\n\n# Test request\n\n## Test request"
|
||||
"generated_text": "\n\nDeep learning is a subset of machine learning"
|
||||
},
|
||||
{
|
||||
"details": {
|
||||
|
@ -278,81 +323,96 @@
|
|||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -11.0078125,
|
||||
"text": "Test"
|
||||
"id": 1824,
|
||||
"logprob": -9.2890625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -13.59375,
|
||||
"text": "request"
|
||||
"id": 349,
|
||||
"logprob": -1.1425781,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 3534,
|
||||
"logprob": -9.59375,
|
||||
"text": "deep"
|
||||
},
|
||||
{
|
||||
"id": 5168,
|
||||
"logprob": -1.390625,
|
||||
"text": "learning"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.45532227,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -1.7089844,
|
||||
"logprob": -0.6953125,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.68847656,
|
||||
"logprob": -0.48339844,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 28771,
|
||||
"logprob": -1.9394531,
|
||||
"id": 23229,
|
||||
"logprob": -0.13256836,
|
||||
"special": false,
|
||||
"text": "#"
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -2.8828125,
|
||||
"id": 5168,
|
||||
"logprob": -0.02420044,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.37329102,
|
||||
"id": 349,
|
||||
"logprob": -0.13977051,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.2602539,
|
||||
"id": 264,
|
||||
"logprob": -0.14501953,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.0017185211,
|
||||
"id": 19804,
|
||||
"logprob": -0.63134766,
|
||||
"special": false,
|
||||
"text": "\n"
|
||||
"text": " subset"
|
||||
},
|
||||
{
|
||||
"id": 1064,
|
||||
"logprob": -2.2753906,
|
||||
"id": 302,
|
||||
"logprob": -0.010223389,
|
||||
"special": false,
|
||||
"text": "##"
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -1.9316406,
|
||||
"id": 5599,
|
||||
"logprob": -0.06427002,
|
||||
"special": false,
|
||||
"text": " Test"
|
||||
"text": " machine"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -0.48217773,
|
||||
"id": 5168,
|
||||
"logprob": -0.002817154,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "\n\n# Test request\n\n## Test request"
|
||||
"generated_text": "\n\nDeep learning is a subset of machine learning"
|
||||
}
|
||||
]
|
||||
|
|
|
@ -3,7 +3,11 @@ import pytest
|
|||
|
||||
@pytest.fixture(scope="module")
|
||||
def flash_mixtral_gptq_handle(launcher):
|
||||
with launcher("TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ", num_shard=2) as handle:
|
||||
with launcher(
|
||||
"TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ",
|
||||
revision="gptq-4bit-128g-actorder_True",
|
||||
num_shard=2,
|
||||
) as handle:
|
||||
yield handle
|
||||
|
||||
|
||||
|
@ -16,7 +20,12 @@ async def flash_mixtral_gptq(flash_mixtral_gptq_handle):
|
|||
@pytest.mark.asyncio
|
||||
async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot):
|
||||
response = await flash_mixtral_gptq.generate(
|
||||
"Test request", max_new_tokens=10, decoder_input_details=True
|
||||
"What is deep learning?", max_new_tokens=10, decoder_input_details=True
|
||||
)
|
||||
|
||||
assert response.details.generated_tokens == 10
|
||||
assert (
|
||||
response.generated_text == "\n\nDeep learning is a subset of machine learning"
|
||||
)
|
||||
|
||||
assert response == response_snapshot
|
||||
|
@ -25,7 +34,7 @@ async def test_flash_mixtral_gptq(flash_mixtral_gptq, response_snapshot):
|
|||
@pytest.mark.asyncio
|
||||
async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapshot):
|
||||
response = await flash_mixtral_gptq.generate(
|
||||
"Test request",
|
||||
"What is deep learning?",
|
||||
max_new_tokens=10,
|
||||
repetition_penalty=1.2,
|
||||
return_full_text=True,
|
||||
|
@ -41,6 +50,10 @@ async def test_flash_mixtral_gptq_all_params(flash_mixtral_gptq, response_snapsh
|
|||
)
|
||||
|
||||
assert response.details.generated_tokens == 10
|
||||
assert (
|
||||
response.generated_text
|
||||
== "What is deep learning?\nDeep Learning is a subset of Machine Learning,"
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
|
@ -49,10 +62,14 @@ async def test_flash_mixtral_gptq_load(
|
|||
flash_mixtral_gptq, generate_load, response_snapshot
|
||||
):
|
||||
responses = await generate_load(
|
||||
flash_mixtral_gptq, "Test request", max_new_tokens=10, n=4
|
||||
flash_mixtral_gptq, "What is deep learning?", max_new_tokens=10, n=4
|
||||
)
|
||||
|
||||
assert len(responses) == 4
|
||||
assert (
|
||||
responses[0].generated_text
|
||||
== "\n\nDeep learning is a subset of machine learning"
|
||||
)
|
||||
assert all(
|
||||
[r.generated_text == responses[0].generated_text for r in responses]
|
||||
), f"{[r.generated_text for r in responses]}"
|
||||
|
|
Loading…
Reference in New Issue