fix(server): fix init for flash causal lm (#352)

Fixes #347
2023-05-22 15:05:32 +02:00 · 2023-05-22 15:05:32 +02:00 · 91d9beec90
parent e649bf9a55
commit 91d9beec90
12 changed files with 1110 additions and 495 deletions
--- a/integration-tests/models/snapshots/test_flash_neox/test_flash_neox.json
+++ b/integration-tests/models/snapshots/test_flash_neox/test_flash_neox.json
@ -7,157 +7,107 @@
      {
        "id": 50278,
        "logprob": null,
-        "text": "<|prompter|>"
+        "text": "<|USER|>"
      },
      {
        "id": 1276,
-        "logprob": -8.03125,
+        "logprob": -4.5546875,
        "text": "What"
      },
      {
        "id": 310,
        "logprob": -5.421875,
        "text": " is"
      },
      {
        "id": 247,
        "logprob": -2.1601562,
        "text": " a"
      },
      {
        "id": 1167,
        "logprob": -5.4609375,
        "text": " mem"
      },
      {
        "id": 70,
        "logprob": -0.005657196,
        "text": "e"
      },
      {
        "id": 13,
        "logprob": -7.28125,
        "text": ","
      },
      {
        "id": 285,
        "logprob": -0.2980957,
        "text": " and"
      },
      {
        "id": 752,
        "logprob": -2.1679688,
        "text": " what"
      },
      {
        "id": 434,
-        "logprob": -5.6210938,
+        "logprob": -4.234375,
        "text": "'s"
      },
      {
-        "id": 253,
+        "id": 634,
-        "logprob": -0.81103516,
+        "logprob": -5.1054688,
-        "text": " the"
+        "text": " your"
      },
      {
-        "id": 2892,
+        "id": 12315,
-        "logprob": -6.6640625,
+        "logprob": -9.953125,
-        "text": " history"
+        "text": " mood"
      },
      {
-        "id": 3212,
+        "id": 3063,
-        "logprob": -2.265625,
+        "logprob": -4.0820312,
-        "text": " behind"
+        "text": " today"
      },
      {
        "id": 436,
        "logprob": -11.5078125,
        "text": " this"
      },
      {
        "id": 3159,
        "logprob": -2.1582031,
        "text": " word"
      },
      {
        "id": 32,
-        "logprob": -0.008720398,
+        "logprob": -0.15148926,
        "text": "?"
      },
      {
-        "id": 0,
+        "id": 50279,
-        "logprob": -2.4726562,
+        "logprob": -0.27026367,
-        "text": "<|endoftext|>"
+        "text": "<|ASSISTANT|>"
      },
      {
        "id": 50281,
        "logprob": -18.265625,
        "text": "<|assistant|>"
      }
    ],
    "seed": null,
    "tokens": [
      {
-        "id": 510,
+        "id": 42,
-        "logprob": -0.63183594,
+        "logprob": -0.88378906,
        "special": false,
-        "text": "The"
+        "text": "I"
      },
      {
-        "id": 3159,
+        "id": 1353,
-        "logprob": -0.5390625,
+        "logprob": -0.94921875,
        "special": false,
-        "text": " word"
+        "text": "'m"
      },
      {
-        "id": 346,
+        "id": 417,
-        "logprob": -0.045684814,
+        "logprob": -2.2402344,
        "special": false,
-        "text": " \""
+        "text": " not"
      },
      {
-        "id": 6441,
+        "id": 2119,
-        "logprob": -0.002090454,
+        "logprob": -0.3725586,
        "special": false,
-        "text": "mem"
+        "text": " sure"
      },
      {
-        "id": 70,
+        "id": 13,
-        "logprob": -1.3589859e-05,
+        "logprob": -1.078125,
        "special": false,
-        "text": "e"
+        "text": ","
      },
      {
-        "id": 3,
+        "id": 534,
-        "logprob": -0.0009455681,
+        "logprob": -0.67822266,
        "special": false,
-        "text": "\""
+        "text": " which"
      },
      {
-        "id": 369,
+        "id": 310,
-        "logprob": -0.088012695,
+        "logprob": -1.3837891,
        "special": false,
-        "text": " was"
+        "text": " is"
      },
      {
-        "id": 806,
+        "id": 253,
-        "logprob": -0.12585449,
+        "logprob": -1.7050781,
        "special": false,
-        "text": " first"
+        "text": " the"
      },
      {
-        "id": 908,
+        "id": 1682,
-        "logprob": -0.017196655,
+        "logprob": -0.052001953,
        "special": false,
-        "text": " used"
+        "text": " best"
      },
      {
-        "id": 275,
+        "id": 1039,
-        "logprob": -0.49731445,
+        "logprob": -2.0390625,
        "special": false,
-        "text": " in"
+        "text": " way"
      }
    ]
  },
-  "generated_text": "The word \"meme\" was first used in"
+  "generated_text": "I'm not sure, which is the best way"
 }
--- a/integration-tests/models/snapshots/test_flash_neox/test_flash_neox_load.json
+++ b/integration-tests/models/snapshots/test_flash_neox/test_flash_neox_load.json
@ -8,159 +8,109 @@
        {
          "id": 50278,
          "logprob": null,
-          "text": "<|prompter|>"
+          "text": "<|USER|>"
        },
        {
          "id": 1276,
-          "logprob": -8.03125,
+          "logprob": -4.5546875,
          "text": "What"
        },
        {
          "id": 310,
          "logprob": -5.421875,
          "text": " is"
        },
        {
          "id": 247,
          "logprob": -2.1601562,
          "text": " a"
        },
        {
          "id": 1167,
          "logprob": -5.4609375,
          "text": " mem"
        },
        {
          "id": 70,
          "logprob": -0.005657196,
          "text": "e"
        },
        {
          "id": 13,
          "logprob": -7.28125,
          "text": ","
        },
        {
          "id": 285,
          "logprob": -0.2980957,
          "text": " and"
        },
        {
          "id": 752,
          "logprob": -2.1679688,
          "text": " what"
        },
        {
          "id": 434,
-          "logprob": -5.6210938,
+          "logprob": -4.234375,
          "text": "'s"
        },
        {
-          "id": 253,
+          "id": 634,
-          "logprob": -0.81103516,
+          "logprob": -5.21875,
-          "text": " the"
+          "text": " your"
        },
        {
-          "id": 2892,
+          "id": 12315,
-          "logprob": -6.6640625,
+          "logprob": -9.9375,
-          "text": " history"
+          "text": " mood"
        },
        {
-          "id": 3212,
+          "id": 3063,
-          "logprob": -2.265625,
+          "logprob": -4.1015625,
-          "text": " behind"
+          "text": " today"
        },
        {
          "id": 436,
          "logprob": -11.5078125,
          "text": " this"
        },
        {
          "id": 3159,
          "logprob": -2.1582031,
          "text": " word"
        },
        {
          "id": 32,
-          "logprob": -0.008720398,
+          "logprob": -0.15319824,
          "text": "?"
        },
        {
-          "id": 0,
+          "id": 50279,
-          "logprob": -2.4726562,
+          "logprob": -0.2614746,
-          "text": "<|endoftext|>"
+          "text": "<|ASSISTANT|>"
        },
        {
          "id": 50281,
          "logprob": -18.265625,
          "text": "<|assistant|>"
        }
      ],
      "seed": null,
      "tokens": [
        {
-          "id": 510,
+          "id": 42,
-          "logprob": -0.63183594,
+          "logprob": -0.8886719,
          "special": false,
-          "text": "The"
+          "text": "I"
        },
        {
-          "id": 3159,
+          "id": 1353,
-          "logprob": -0.5488281,
+          "logprob": -0.98046875,
          "special": false,
-          "text": " word"
+          "text": "'m"
        },
        {
-          "id": 346,
+          "id": 417,
-          "logprob": -0.045684814,
+          "logprob": -2.2265625,
          "special": false,
-          "text": " \""
+          "text": " not"
        },
        {
-          "id": 6441,
+          "id": 2119,
-          "logprob": -0.00207901,
+          "logprob": -0.3479004,
          "special": false,
-          "text": "mem"
+          "text": " sure"
        },
        {
-          "id": 70,
+          "id": 13,
-          "logprob": -1.335144e-05,
+          "logprob": -1.0117188,
          "special": false,
-          "text": "e"
+          "text": ","
        },
        {
-          "id": 3,
+          "id": 534,
-          "logprob": -0.00097227097,
+          "logprob": -0.67871094,
          "special": false,
-          "text": "\""
+          "text": " which"
        },
        {
-          "id": 369,
+          "id": 310,
-          "logprob": -0.0892334,
+          "logprob": -1.421875,
          "special": false,
-          "text": " was"
+          "text": " is"
        },
        {
-          "id": 806,
+          "id": 253,
-          "logprob": -0.12463379,
+          "logprob": -1.7382812,
          "special": false,
-          "text": " first"
+          "text": " the"
        },
        {
-          "id": 908,
+          "id": 1682,
-          "logprob": -0.01737976,
+          "logprob": -0.051330566,
          "special": false,
-          "text": " used"
+          "text": " best"
        },
        {
-          "id": 275,
+          "id": 1039,
-          "logprob": -0.50341797,
+          "logprob": -2.0390625,
          "special": false,
-          "text": " in"
+          "text": " way"
        }
      ]
    },
-    "generated_text": "The word \"meme\" was first used in"
+    "generated_text": "I'm not sure, which is the best way"
  },
  {
    "details": {
@ -171,159 +121,109 @@
        {
          "id": 50278,
          "logprob": null,
-          "text": "<|prompter|>"
+          "text": "<|USER|>"
        },
        {
          "id": 1276,
-          "logprob": -8.03125,
+          "logprob": -4.5546875,
          "text": "What"
        },
        {
          "id": 310,
          "logprob": -5.421875,
          "text": " is"
        },
        {
          "id": 247,
          "logprob": -2.1601562,
          "text": " a"
        },
        {
          "id": 1167,
          "logprob": -5.4609375,
          "text": " mem"
        },
        {
          "id": 70,
          "logprob": -0.005657196,
          "text": "e"
        },
        {
          "id": 13,
          "logprob": -7.28125,
          "text": ","
        },
        {
          "id": 285,
          "logprob": -0.2980957,
          "text": " and"
        },
        {
          "id": 752,
          "logprob": -2.1679688,
          "text": " what"
        },
        {
          "id": 434,
-          "logprob": -5.6210938,
+          "logprob": -4.234375,
          "text": "'s"
        },
        {
-          "id": 253,
+          "id": 634,
-          "logprob": -0.81103516,
+          "logprob": -5.1054688,
-          "text": " the"
+          "text": " your"
        },
        {
-          "id": 2892,
+          "id": 12315,
-          "logprob": -6.6640625,
+          "logprob": -9.953125,
-          "text": " history"
+          "text": " mood"
        },
        {
-          "id": 3212,
+          "id": 3063,
-          "logprob": -2.265625,
+          "logprob": -4.0820312,
-          "text": " behind"
+          "text": " today"
        },
        {
          "id": 436,
          "logprob": -11.5078125,
          "text": " this"
        },
        {
          "id": 3159,
          "logprob": -2.1582031,
          "text": " word"
        },
        {
          "id": 32,
-          "logprob": -0.008720398,
+          "logprob": -0.15148926,
          "text": "?"
        },
        {
-          "id": 0,
+          "id": 50279,
-          "logprob": -2.4726562,
+          "logprob": -0.27026367,
-          "text": "<|endoftext|>"
+          "text": "<|ASSISTANT|>"
        },
        {
          "id": 50281,
          "logprob": -18.265625,
          "text": "<|assistant|>"
        }
      ],
      "seed": null,
      "tokens": [
        {
-          "id": 510,
+          "id": 42,
-          "logprob": -0.63183594,
+          "logprob": -0.88378906,
          "special": false,
-          "text": "The"
+          "text": "I"
        },
        {
-          "id": 3159,
+          "id": 1353,
-          "logprob": -0.5488281,
+          "logprob": -0.9819336,
          "special": false,
-          "text": " word"
+          "text": "'m"
        },
        {
-          "id": 346,
+          "id": 417,
-          "logprob": -0.045684814,
+          "logprob": -2.2421875,
          "special": false,
-          "text": " \""
+          "text": " not"
        },
        {
-          "id": 6441,
+          "id": 2119,
-          "logprob": -0.00207901,
+          "logprob": -0.3474121,
          "special": false,
-          "text": "mem"
+          "text": " sure"
        },
        {
-          "id": 70,
+          "id": 13,
-          "logprob": -1.335144e-05,
+          "logprob": -1.078125,
          "special": false,
-          "text": "e"
+          "text": ","
        },
        {
-          "id": 3,
+          "id": 534,
-          "logprob": -0.00097227097,
+          "logprob": -0.69140625,
          "special": false,
-          "text": "\""
+          "text": " which"
        },
        {
-          "id": 369,
+          "id": 310,
-          "logprob": -0.0892334,
+          "logprob": -1.4072266,
          "special": false,
-          "text": " was"
+          "text": " is"
        },
        {
-          "id": 806,
+          "id": 253,
-          "logprob": -0.12463379,
+          "logprob": -1.7041016,
          "special": false,
-          "text": " first"
+          "text": " the"
        },
        {
-          "id": 908,
+          "id": 1682,
-          "logprob": -0.01737976,
+          "logprob": -0.053375244,
          "special": false,
-          "text": " used"
+          "text": " best"
        },
        {
-          "id": 275,
+          "id": 1039,
-          "logprob": -0.50341797,
+          "logprob": -2.0351562,
          "special": false,
-          "text": " in"
+          "text": " way"
        }
      ]
    },
-    "generated_text": "The word \"meme\" was first used in"
+    "generated_text": "I'm not sure, which is the best way"
  },
  {
    "details": {
@ -334,159 +234,109 @@
        {
          "id": 50278,
          "logprob": null,
-          "text": "<|prompter|>"
+          "text": "<|USER|>"
        },
        {
          "id": 1276,
-          "logprob": -8.03125,
+          "logprob": -4.5546875,
          "text": "What"
        },
        {
          "id": 310,
          "logprob": -5.421875,
          "text": " is"
        },
        {
          "id": 247,
          "logprob": -2.1601562,
          "text": " a"
        },
        {
          "id": 1167,
          "logprob": -5.4609375,
          "text": " mem"
        },
        {
          "id": 70,
          "logprob": -0.005657196,
          "text": "e"
        },
        {
          "id": 13,
          "logprob": -7.28125,
          "text": ","
        },
        {
          "id": 285,
          "logprob": -0.2980957,
          "text": " and"
        },
        {
          "id": 752,
          "logprob": -2.1679688,
          "text": " what"
        },
        {
          "id": 434,
-          "logprob": -5.6210938,
+          "logprob": -4.234375,
          "text": "'s"
        },
        {
-          "id": 253,
+          "id": 634,
-          "logprob": -0.81103516,
+          "logprob": -5.21875,
-          "text": " the"
+          "text": " your"
        },
        {
-          "id": 2892,
+          "id": 12315,
-          "logprob": -6.6640625,
+          "logprob": -9.9375,
-          "text": " history"
+          "text": " mood"
        },
        {
-          "id": 3212,
+          "id": 3063,
-          "logprob": -2.265625,
+          "logprob": -4.1015625,
-          "text": " behind"
+          "text": " today"
        },
        {
          "id": 436,
          "logprob": -11.5078125,
          "text": " this"
        },
        {
          "id": 3159,
          "logprob": -2.1582031,
          "text": " word"
        },
        {
          "id": 32,
-          "logprob": -0.008720398,
+          "logprob": -0.15319824,
          "text": "?"
        },
        {
-          "id": 0,
+          "id": 50279,
-          "logprob": -2.4726562,
+          "logprob": -0.2614746,
-          "text": "<|endoftext|>"
+          "text": "<|ASSISTANT|>"
        },
        {
          "id": 50281,
          "logprob": -18.265625,
          "text": "<|assistant|>"
        }
      ],
      "seed": null,
      "tokens": [
        {
-          "id": 510,
+          "id": 42,
-          "logprob": -0.63183594,
+          "logprob": -0.8886719,
          "special": false,
-          "text": "The"
+          "text": "I"
        },
        {
-          "id": 3159,
+          "id": 1353,
-          "logprob": -0.5488281,
+          "logprob": -0.98046875,
          "special": false,
-          "text": " word"
+          "text": "'m"
        },
        {
-          "id": 346,
+          "id": 417,
-          "logprob": -0.045684814,
+          "logprob": -2.2265625,
          "special": false,
-          "text": " \""
+          "text": " not"
        },
        {
-          "id": 6441,
+          "id": 2119,
-          "logprob": -0.00207901,
+          "logprob": -0.3479004,
          "special": false,
-          "text": "mem"
+          "text": " sure"
        },
        {
-          "id": 70,
+          "id": 13,
-          "logprob": -1.335144e-05,
+          "logprob": -1.0117188,
          "special": false,
-          "text": "e"
+          "text": ","
        },
        {
-          "id": 3,
+          "id": 534,
-          "logprob": -0.00097227097,
+          "logprob": -0.67871094,
          "special": false,
-          "text": "\""
+          "text": " which"
        },
        {
-          "id": 369,
+          "id": 310,
-          "logprob": -0.0892334,
+          "logprob": -1.421875,
          "special": false,
-          "text": " was"
+          "text": " is"
        },
        {
-          "id": 806,
+          "id": 253,
-          "logprob": -0.12463379,
+          "logprob": -1.7382812,
          "special": false,
-          "text": " first"
+          "text": " the"
        },
        {
-          "id": 908,
+          "id": 1682,
-          "logprob": -0.01737976,
+          "logprob": -0.051330566,
          "special": false,
-          "text": " used"
+          "text": " best"
        },
        {
-          "id": 275,
+          "id": 1039,
-          "logprob": -0.50341797,
+          "logprob": -2.0390625,
          "special": false,
-          "text": " in"
+          "text": " way"
        }
      ]
    },
-    "generated_text": "The word \"meme\" was first used in"
+    "generated_text": "I'm not sure, which is the best way"
  },
  {
    "details": {
@ -497,158 +347,108 @@
        {
          "id": 50278,
          "logprob": null,
-          "text": "<|prompter|>"
+          "text": "<|USER|>"
        },
        {
          "id": 1276,
-          "logprob": -8.03125,
+          "logprob": -4.5546875,
          "text": "What"
        },
        {
          "id": 310,
          "logprob": -5.421875,
          "text": " is"
        },
        {
          "id": 247,
          "logprob": -2.1601562,
          "text": " a"
        },
        {
          "id": 1167,
          "logprob": -5.4609375,
          "text": " mem"
        },
        {
          "id": 70,
          "logprob": -0.005657196,
          "text": "e"
        },
        {
          "id": 13,
          "logprob": -7.28125,
          "text": ","
        },
        {
          "id": 285,
          "logprob": -0.2980957,
          "text": " and"
        },
        {
          "id": 752,
          "logprob": -2.1679688,
          "text": " what"
        },
        {
          "id": 434,
-          "logprob": -5.6210938,
+          "logprob": -4.234375,
          "text": "'s"
        },
        {
-          "id": 253,
+          "id": 634,
-          "logprob": -0.81103516,
+          "logprob": -5.21875,
-          "text": " the"
+          "text": " your"
        },
        {
-          "id": 2892,
+          "id": 12315,
-          "logprob": -6.6640625,
+          "logprob": -9.9375,
-          "text": " history"
+          "text": " mood"
        },
        {
-          "id": 3212,
+          "id": 3063,
-          "logprob": -2.265625,
+          "logprob": -4.1015625,
-          "text": " behind"
+          "text": " today"
        },
        {
          "id": 436,
          "logprob": -11.5078125,
          "text": " this"
        },
        {
          "id": 3159,
          "logprob": -2.1582031,
          "text": " word"
        },
        {
          "id": 32,
-          "logprob": -0.008720398,
+          "logprob": -0.15319824,
          "text": "?"
        },
        {
-          "id": 0,
+          "id": 50279,
-          "logprob": -2.4726562,
+          "logprob": -0.2614746,
-          "text": "<|endoftext|>"
+          "text": "<|ASSISTANT|>"
        },
        {
          "id": 50281,
          "logprob": -18.265625,
          "text": "<|assistant|>"
        }
      ],
      "seed": null,
      "tokens": [
        {
-          "id": 510,
+          "id": 42,
-          "logprob": -0.63183594,
+          "logprob": -0.8886719,
          "special": false,
-          "text": "The"
+          "text": "I"
        },
        {
-          "id": 3159,
+          "id": 1353,
-          "logprob": -0.5488281,
+          "logprob": -0.98046875,
          "special": false,
-          "text": " word"
+          "text": "'m"
        },
        {
-          "id": 346,
+          "id": 417,
-          "logprob": -0.045684814,
+          "logprob": -2.2265625,
          "special": false,
-          "text": " \""
+          "text": " not"
        },
        {
-          "id": 6441,
+          "id": 2119,
-          "logprob": -0.00207901,
+          "logprob": -0.3479004,
          "special": false,
-          "text": "mem"
+          "text": " sure"
        },
        {
-          "id": 70,
+          "id": 13,
-          "logprob": -1.335144e-05,
+          "logprob": -1.0117188,
          "special": false,
-          "text": "e"
+          "text": ","
        },
        {
-          "id": 3,
+          "id": 534,
-          "logprob": -0.00097227097,
+          "logprob": -0.67871094,
          "special": false,
-          "text": "\""
+          "text": " which"
        },
        {
-          "id": 369,
+          "id": 310,
-          "logprob": -0.0892334,
+          "logprob": -1.421875,
          "special": false,
-          "text": " was"
+          "text": " is"
        },
        {
-          "id": 806,
+          "id": 253,
-          "logprob": -0.12463379,
+          "logprob": -1.7382812,
          "special": false,
-          "text": " first"
+          "text": " the"
        },
        {
-          "id": 908,
+          "id": 1682,
-          "logprob": -0.01737976,
+          "logprob": -0.051330566,
          "special": false,
-          "text": " used"
+          "text": " best"
        },
        {
-          "id": 275,
+          "id": 1039,
-          "logprob": -0.50341797,
+          "logprob": -2.0390625,
          "special": false,
-          "text": " in"
+          "text": " way"
        }
      ]
    },
-    "generated_text": "The word \"meme\" was first used in"
+    "generated_text": "I'm not sure, which is the best way"
  }
 ]
--- a/integration-tests/models/snapshots/test_flash_neox_sharded/test_flash_neox.json
+++ b/integration-tests/models/snapshots/test_flash_neox_sharded/test_flash_neox.json
@ -0,0 +1,163 @@
 {
  "details": {
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
    "prefill": [
      {
        "id": 50278,
        "logprob": null,
        "text": "<|prompter|>"
      },
      {
        "id": 1276,
        "logprob": -8.03125,
        "text": "What"
      },
      {
        "id": 310,
        "logprob": -5.421875,
        "text": " is"
      },
      {
        "id": 247,
        "logprob": -2.1601562,
        "text": " a"
      },
      {
        "id": 1167,
        "logprob": -5.4609375,
        "text": " mem"
      },
      {
        "id": 70,
        "logprob": -0.005657196,
        "text": "e"
      },
      {
        "id": 13,
        "logprob": -7.28125,
        "text": ","
      },
      {
        "id": 285,
        "logprob": -0.2980957,
        "text": " and"
      },
      {
        "id": 752,
        "logprob": -2.1679688,
        "text": " what"
      },
      {
        "id": 434,
        "logprob": -5.6210938,
        "text": "'s"
      },
      {
        "id": 253,
        "logprob": -0.81103516,
        "text": " the"
      },
      {
        "id": 2892,
        "logprob": -6.6640625,
        "text": " history"
      },
      {
        "id": 3212,
        "logprob": -2.265625,
        "text": " behind"
      },
      {
        "id": 436,
        "logprob": -11.5078125,
        "text": " this"
      },
      {
        "id": 3159,
        "logprob": -2.1582031,
        "text": " word"
      },
      {
        "id": 32,
        "logprob": -0.008720398,
        "text": "?"
      },
      {
        "id": 0,
        "logprob": -2.4726562,
        "text": "<|endoftext|>"
      },
      {
        "id": 50281,
        "logprob": -18.265625,
        "text": "<|assistant|>"
      }
    ],
    "seed": null,
    "tokens": [
      {
        "id": 510,
        "logprob": -0.63183594,
        "special": false,
        "text": "The"
      },
      {
        "id": 3159,
        "logprob": -0.5390625,
        "special": false,
        "text": " word"
      },
      {
        "id": 346,
        "logprob": -0.045684814,
        "special": false,
        "text": " \""
      },
      {
        "id": 6441,
        "logprob": -0.002090454,
        "special": false,
        "text": "mem"
      },
      {
        "id": 70,
        "logprob": -1.3589859e-05,
        "special": false,
        "text": "e"
      },
      {
        "id": 3,
        "logprob": -0.0009455681,
        "special": false,
        "text": "\""
      },
      {
        "id": 369,
        "logprob": -0.088012695,
        "special": false,
        "text": " was"
      },
      {
        "id": 806,
        "logprob": -0.12585449,
        "special": false,
        "text": " first"
      },
      {
        "id": 908,
        "logprob": -0.017196655,
        "special": false,
        "text": " used"
      },
      {
        "id": 275,
        "logprob": -0.49731445,
        "special": false,
        "text": " in"
      }
    ]
  },
  "generated_text": "The word \"meme\" was first used in"
 }
--- a/integration-tests/models/snapshots/test_flash_neox_sharded/test_flash_neox_load.json
+++ b/integration-tests/models/snapshots/test_flash_neox_sharded/test_flash_neox_load.json
@ -0,0 +1,654 @@
 [
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 50278,
          "logprob": null,
          "text": "<|prompter|>"
        },
        {
          "id": 1276,
          "logprob": -8.03125,
          "text": "What"
        },
        {
          "id": 310,
          "logprob": -5.421875,
          "text": " is"
        },
        {
          "id": 247,
          "logprob": -2.1601562,
          "text": " a"
        },
        {
          "id": 1167,
          "logprob": -5.4609375,
          "text": " mem"
        },
        {
          "id": 70,
          "logprob": -0.005657196,
          "text": "e"
        },
        {
          "id": 13,
          "logprob": -7.28125,
          "text": ","
        },
        {
          "id": 285,
          "logprob": -0.2980957,
          "text": " and"
        },
        {
          "id": 752,
          "logprob": -2.1679688,
          "text": " what"
        },
        {
          "id": 434,
          "logprob": -5.6210938,
          "text": "'s"
        },
        {
          "id": 253,
          "logprob": -0.81103516,
          "text": " the"
        },
        {
          "id": 2892,
          "logprob": -6.6640625,
          "text": " history"
        },
        {
          "id": 3212,
          "logprob": -2.265625,
          "text": " behind"
        },
        {
          "id": 436,
          "logprob": -11.5078125,
          "text": " this"
        },
        {
          "id": 3159,
          "logprob": -2.1582031,
          "text": " word"
        },
        {
          "id": 32,
          "logprob": -0.008720398,
          "text": "?"
        },
        {
          "id": 0,
          "logprob": -2.4726562,
          "text": "<|endoftext|>"
        },
        {
          "id": 50281,
          "logprob": -18.265625,
          "text": "<|assistant|>"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 510,
          "logprob": -0.63183594,
          "special": false,
          "text": "The"
        },
        {
          "id": 3159,
          "logprob": -0.5488281,
          "special": false,
          "text": " word"
        },
        {
          "id": 346,
          "logprob": -0.045684814,
          "special": false,
          "text": " \""
        },
        {
          "id": 6441,
          "logprob": -0.00207901,
          "special": false,
          "text": "mem"
        },
        {
          "id": 70,
          "logprob": -1.335144e-05,
          "special": false,
          "text": "e"
        },
        {
          "id": 3,
          "logprob": -0.00097227097,
          "special": false,
          "text": "\""
        },
        {
          "id": 369,
          "logprob": -0.0892334,
          "special": false,
          "text": " was"
        },
        {
          "id": 806,
          "logprob": -0.12463379,
          "special": false,
          "text": " first"
        },
        {
          "id": 908,
          "logprob": -0.01737976,
          "special": false,
          "text": " used"
        },
        {
          "id": 275,
          "logprob": -0.50341797,
          "special": false,
          "text": " in"
        }
      ]
    },
    "generated_text": "The word \"meme\" was first used in"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 50278,
          "logprob": null,
          "text": "<|prompter|>"
        },
        {
          "id": 1276,
          "logprob": -8.03125,
          "text": "What"
        },
        {
          "id": 310,
          "logprob": -5.421875,
          "text": " is"
        },
        {
          "id": 247,
          "logprob": -2.1601562,
          "text": " a"
        },
        {
          "id": 1167,
          "logprob": -5.4609375,
          "text": " mem"
        },
        {
          "id": 70,
          "logprob": -0.005657196,
          "text": "e"
        },
        {
          "id": 13,
          "logprob": -7.28125,
          "text": ","
        },
        {
          "id": 285,
          "logprob": -0.2980957,
          "text": " and"
        },
        {
          "id": 752,
          "logprob": -2.1679688,
          "text": " what"
        },
        {
          "id": 434,
          "logprob": -5.6210938,
          "text": "'s"
        },
        {
          "id": 253,
          "logprob": -0.81103516,
          "text": " the"
        },
        {
          "id": 2892,
          "logprob": -6.6640625,
          "text": " history"
        },
        {
          "id": 3212,
          "logprob": -2.265625,
          "text": " behind"
        },
        {
          "id": 436,
          "logprob": -11.5078125,
          "text": " this"
        },
        {
          "id": 3159,
          "logprob": -2.1582031,
          "text": " word"
        },
        {
          "id": 32,
          "logprob": -0.008720398,
          "text": "?"
        },
        {
          "id": 0,
          "logprob": -2.4726562,
          "text": "<|endoftext|>"
        },
        {
          "id": 50281,
          "logprob": -18.265625,
          "text": "<|assistant|>"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 510,
          "logprob": -0.63183594,
          "special": false,
          "text": "The"
        },
        {
          "id": 3159,
          "logprob": -0.5488281,
          "special": false,
          "text": " word"
        },
        {
          "id": 346,
          "logprob": -0.045684814,
          "special": false,
          "text": " \""
        },
        {
          "id": 6441,
          "logprob": -0.00207901,
          "special": false,
          "text": "mem"
        },
        {
          "id": 70,
          "logprob": -1.335144e-05,
          "special": false,
          "text": "e"
        },
        {
          "id": 3,
          "logprob": -0.00097227097,
          "special": false,
          "text": "\""
        },
        {
          "id": 369,
          "logprob": -0.0892334,
          "special": false,
          "text": " was"
        },
        {
          "id": 806,
          "logprob": -0.12463379,
          "special": false,
          "text": " first"
        },
        {
          "id": 908,
          "logprob": -0.01737976,
          "special": false,
          "text": " used"
        },
        {
          "id": 275,
          "logprob": -0.50341797,
          "special": false,
          "text": " in"
        }
      ]
    },
    "generated_text": "The word \"meme\" was first used in"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 50278,
          "logprob": null,
          "text": "<|prompter|>"
        },
        {
          "id": 1276,
          "logprob": -8.03125,
          "text": "What"
        },
        {
          "id": 310,
          "logprob": -5.421875,
          "text": " is"
        },
        {
          "id": 247,
          "logprob": -2.1601562,
          "text": " a"
        },
        {
          "id": 1167,
          "logprob": -5.4609375,
          "text": " mem"
        },
        {
          "id": 70,
          "logprob": -0.005657196,
          "text": "e"
        },
        {
          "id": 13,
          "logprob": -7.28125,
          "text": ","
        },
        {
          "id": 285,
          "logprob": -0.2980957,
          "text": " and"
        },
        {
          "id": 752,
          "logprob": -2.1679688,
          "text": " what"
        },
        {
          "id": 434,
          "logprob": -5.6210938,
          "text": "'s"
        },
        {
          "id": 253,
          "logprob": -0.81103516,
          "text": " the"
        },
        {
          "id": 2892,
          "logprob": -6.6640625,
          "text": " history"
        },
        {
          "id": 3212,
          "logprob": -2.265625,
          "text": " behind"
        },
        {
          "id": 436,
          "logprob": -11.5078125,
          "text": " this"
        },
        {
          "id": 3159,
          "logprob": -2.1582031,
          "text": " word"
        },
        {
          "id": 32,
          "logprob": -0.008720398,
          "text": "?"
        },
        {
          "id": 0,
          "logprob": -2.4726562,
          "text": "<|endoftext|>"
        },
        {
          "id": 50281,
          "logprob": -18.265625,
          "text": "<|assistant|>"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 510,
          "logprob": -0.63183594,
          "special": false,
          "text": "The"
        },
        {
          "id": 3159,
          "logprob": -0.5488281,
          "special": false,
          "text": " word"
        },
        {
          "id": 346,
          "logprob": -0.045684814,
          "special": false,
          "text": " \""
        },
        {
          "id": 6441,
          "logprob": -0.00207901,
          "special": false,
          "text": "mem"
        },
        {
          "id": 70,
          "logprob": -1.335144e-05,
          "special": false,
          "text": "e"
        },
        {
          "id": 3,
          "logprob": -0.00097227097,
          "special": false,
          "text": "\""
        },
        {
          "id": 369,
          "logprob": -0.0892334,
          "special": false,
          "text": " was"
        },
        {
          "id": 806,
          "logprob": -0.12463379,
          "special": false,
          "text": " first"
        },
        {
          "id": 908,
          "logprob": -0.01737976,
          "special": false,
          "text": " used"
        },
        {
          "id": 275,
          "logprob": -0.50341797,
          "special": false,
          "text": " in"
        }
      ]
    },
    "generated_text": "The word \"meme\" was first used in"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 50278,
          "logprob": null,
          "text": "<|prompter|>"
        },
        {
          "id": 1276,
          "logprob": -8.03125,
          "text": "What"
        },
        {
          "id": 310,
          "logprob": -5.421875,
          "text": " is"
        },
        {
          "id": 247,
          "logprob": -2.1601562,
          "text": " a"
        },
        {
          "id": 1167,
          "logprob": -5.4609375,
          "text": " mem"
        },
        {
          "id": 70,
          "logprob": -0.005657196,
          "text": "e"
        },
        {
          "id": 13,
          "logprob": -7.28125,
          "text": ","
        },
        {
          "id": 285,
          "logprob": -0.2980957,
          "text": " and"
        },
        {
          "id": 752,
          "logprob": -2.1679688,
          "text": " what"
        },
        {
          "id": 434,
          "logprob": -5.6210938,
          "text": "'s"
        },
        {
          "id": 253,
          "logprob": -0.81103516,
          "text": " the"
        },
        {
          "id": 2892,
          "logprob": -6.6640625,
          "text": " history"
        },
        {
          "id": 3212,
          "logprob": -2.265625,
          "text": " behind"
        },
        {
          "id": 436,
          "logprob": -11.5078125,
          "text": " this"
        },
        {
          "id": 3159,
          "logprob": -2.1582031,
          "text": " word"
        },
        {
          "id": 32,
          "logprob": -0.008720398,
          "text": "?"
        },
        {
          "id": 0,
          "logprob": -2.4726562,
          "text": "<|endoftext|>"
        },
        {
          "id": 50281,
          "logprob": -18.265625,
          "text": "<|assistant|>"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 510,
          "logprob": -0.63183594,
          "special": false,
          "text": "The"
        },
        {
          "id": 3159,
          "logprob": -0.5488281,
          "special": false,
          "text": " word"
        },
        {
          "id": 346,
          "logprob": -0.045684814,
          "special": false,
          "text": " \""
        },
        {
          "id": 6441,
          "logprob": -0.00207901,
          "special": false,
          "text": "mem"
        },
        {
          "id": 70,
          "logprob": -1.335144e-05,
          "special": false,
          "text": "e"
        },
        {
          "id": 3,
          "logprob": -0.00097227097,
          "special": false,
          "text": "\""
        },
        {
          "id": 369,
          "logprob": -0.0892334,
          "special": false,
          "text": " was"
        },
        {
          "id": 806,
          "logprob": -0.12463379,
          "special": false,
          "text": " first"
        },
        {
          "id": 908,
          "logprob": -0.01737976,
          "special": false,
          "text": " used"
        },
        {
          "id": 275,
          "logprob": -0.50341797,
          "special": false,
          "text": " in"
        }
      ]
    },
    "generated_text": "The word \"meme\" was first used in"
  }
 ]
--- a/integration-tests/models/test_flash_neox.py
+++ b/integration-tests/models/test_flash_neox.py
@ -3,7 +3,7 @@ import pytest
@pytest.fixture(scope="module")
 def flash_neox_handle(launcher):
-    with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle:
+    with launcher("stabilityai/stablelm-tuned-alpha-3b", num_shard=1) as handle:
        yield handle
@ -16,7 +16,7 @@ async def flash_neox(flash_neox_handle):
@pytest.mark.asyncio
 async def test_flash_neox(flash_neox, response_snapshot):
    response = await flash_neox.generate(
-        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
+        "<|USER|>What's your mood today?<|ASSISTANT|>",
        max_new_tokens=10,
    )
@ -28,12 +28,14 @@ async def test_flash_neox(flash_neox, response_snapshot):
 async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
    responses = await generate_load(
        flash_neox,
-        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
+        "<|USER|>What's your mood today?<|ASSISTANT|>",
        max_new_tokens=10,
        n=4,
    )
-    assert len(responses) == 4
+    generated_texts = [r.generated_text for r in responses]
-    assert all([r.generated_text == responses[0].generated_text for r in responses])
+
    assert len(generated_texts) == 4
    assert generated_texts, all([text == generated_texts[0] for text in generated_texts])
    assert responses == response_snapshot
--- a/integration-tests/models/test_flash_neox_sharded.py
+++ b/integration-tests/models/test_flash_neox_sharded.py
@ -0,0 +1,39 @@
 import pytest
@pytest.fixture(scope="module")
 def flash_neox_sharded_handle(launcher):
    with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle:
        yield handle
@pytest.fixture(scope="module")
 async def flash_neox_sharded(flash_neox_sharded_handle):
    await flash_neox_sharded_handle.health(240)
    return flash_neox_sharded_handle.client
@pytest.mark.asyncio
 async def test_flash_neox(flash_neox_sharded, response_snapshot):
    response = await flash_neox_sharded.generate(
        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
        max_new_tokens=10,
    )
    assert response.details.generated_tokens == 10
    assert response == response_snapshot
@pytest.mark.asyncio
 async def test_flash_neox_load(flash_neox_sharded, generate_load, response_snapshot):
    responses = await generate_load(
        flash_neox_sharded,
        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
        max_new_tokens=10,
        n=4,
    )
    assert len(responses) == 4
    assert all([r.generated_text == responses[0].generated_text for r in responses])
    assert responses == response_snapshot
--- a/integration-tests/pytest.ini
+++ b/integration-tests/pytest.ini
@ -0,0 +1,4 @@
 [pytest]
 asyncio_mode = auto
 markers =
    private: marks tests as requiring an admin hf token (deselect with '-m "not private"')
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@ -35,6 +35,9 @@ bnb = ["bitsandbytes"]
 grpcio-tools = "^1.51.1"
 pytest = "^7.3.0"
 [tool.pytest.ini_options]
 markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@ -362,7 +362,7 @@ class FlashGPTNeoXModel(FlashGPTNeoXPreTrainedModel):
            pretrained_model_name_or_path, load_in_8bit=False, *model_args, **kwargs
        )
-        model.post_load_weights(load_in_8bit)
+        model.post_load_weights("bitsandbytes" if load_in_8bit else None)
        return model
    def forward(
@ -466,7 +466,7 @@ class FlashGPTNeoXForCausalLM(FlashGPTNeoXPreTrainedModel):
        model = super(FlashGPTNeoXForCausalLM, cls).from_pretrained(
            pretrained_model_name_or_path, load_in_8bit=False, *model_args, **kwargs
        )
-        model.post_load_weights(load_in_8bit)
+        model.post_load_weights("bitsandbytes" if load_in_8bit else None)
        return model
    def forward(
--- a/server/text_generation_server/models/flash_neox.py
+++ b/server/text_generation_server/models/flash_neox.py
@ -88,7 +88,7 @@ class FlashNeoXSharded(FlashNeoX):
    def load_weights(
        model,
        filenames: List[str],
-        quantize: bool,
+        quantize: Optional[str],
        device: torch.device,
        dtype: torch.dtype,
        rank: int,
--- a/server/text_generation_server/models/flash_santacoder.py
+++ b/server/text_generation_server/models/flash_santacoder.py
@ -80,7 +80,7 @@ class FlashSantacoder(FlashCausalLM):
    def load_weights(
        model: FlashSantacoderForCausalLM,
        filenames: List[Path],
-        quantize: bool,
+        quantize: Optional[str],
        device: torch.device,
        dtype: torch.dtype,
        transpose: bool,
--- a/server/text_generation_server/models/opt.py
+++ b/server/text_generation_server/models/opt.py
@ -101,7 +101,7 @@ class OPTSharded(OPT):
    def load_weights(
        model,
        filenames: List[str],
-        quantize: bool,
+        quantize: Optional[str],
        device: torch.device,
        dtype: torch.dtype,
        rank: int,