hf_text-generation-inference/server/tests/models/test_model.py

import pytest
import torch

from transformers import AutoTokenizer

from text_generation_server.models import Model


def get_test_model():
    class TestModel(Model):
        def batch_type(self):
            raise NotImplementedError

        def generate_token(self, batch):
            raise NotImplementedError

    tokenizer = AutoTokenizer.from_pretrained("huggingface/llama-7b")

    model = TestModel(
        "test_model_id",
        torch.nn.Linear(1, 1),
        tokenizer,
        False,
        torch.float32,
        torch.device("cpu"),
    )
    return model


@pytest.mark.private
def test_decode_streaming_english_spaces():
    model = get_test_model()
    truth = "Hello here, this is a simple test"
    all_input_ids = [15043, 1244, 29892, 445, 338, 263, 2560, 1243]
    assert (
        all_input_ids == model.tokenizer(truth, add_special_tokens=False)["input_ids"]
    )

    decoded_text = ""
    offset = 0
    token_offset = 0
    for i in range(len(all_input_ids)):
        text, offset, token_offset = model.decode_token(
            all_input_ids[: i + 1], offset, token_offset
        )
        decoded_text += text

    assert decoded_text == truth


@pytest.mark.private
def test_decode_streaming_chinese_utf8():
    model = get_test_model()
    truth = "我很感谢你的热情"
    all_input_ids = [
        30672,
        232,
        193,
        139,
        233,
        135,
        162,
        235,
        179,
        165,
        30919,
        30210,
        234,
        134,
        176,
        30993,
    ]

    decoded_text = ""
    offset = 0
    token_offset = 0
    for i in range(len(all_input_ids)):
        text, offset, token_offset = model.decode_token(
            all_input_ids[: i + 1], offset, token_offset
        )
        decoded_text += text

    assert decoded_text == truth
fix(server): fix decode token (#334) Fixes #333 --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> 2023-05-16 15:23:27 -06:00			`import pytest`
			`import torch`

			`from transformers import AutoTokenizer`

			`from text_generation_server.models import Model`


			`def get_test_model():`
			`class TestModel(Model):`
			`def batch_type(self):`
			`raise NotImplementedError`

			`def generate_token(self, batch):`
			`raise NotImplementedError`

			`tokenizer = AutoTokenizer.from_pretrained("huggingface/llama-7b")`

			`model = TestModel(`
Enable multiple LoRa adapters (#2010) * feat: first draft load multiple lora * feat: load weights within layer and refactor lora pass * fix: refactor and reduce lora math * feat: baseline impl single request multi lora support * feat: prefer lorax implementation and port loading logic * fix: prefer adapter_data and refactors * feat: perfer loraxs custom punica kernels and add mlp loras * fix: adjust batch for bgmv * fix: adjust adapter_segments logic when in batch * fix: refactor and move changes to v3 proto * fix: pass model_id for all flash causal lms * fix: pass model_id for all causal and seq2seq lms * fix: add model_id to model test * feat: add lora support to mistral and refactors * feat: prefer model id in request * fix: include rust code for adapter id * feat: bump launcher and add new lora docs * feat: support base model generation and refactors * fix: rename doc to retry ci build * feat: support if vlm models * fix: add adapter_data param and avoid missing layers * fix: add adapter_data param to phi and neox * fix: update all models forwards to include adapter_data * fix: add model_id to IdeficsCausalLM * Update lora.md Fixed a typo * Update lora.md Fixing spam image * fix: add lora kernel to dockerfile, support running without kernels and refactors * fix: avoid dockerfile conflict * fix: refactors and adjust flash llama lora logic * fix: skip llama test due to CI issue (temp) * fix: skip llama test CI (temp) 2 * fix: revert skips and prefer updated ci token for tests * fix: refactors and helpful comments * fix: add noop in TensorParallelAdapterRowLinear too * fix: refactor and move shard_lora_weights logic * fix: exit early if no adapter_data --------- Co-authored-by: Derek <datavistics@gmail.com> 2024-06-25 12:46:27 -06:00			`"test_model_id",`
			`torch.nn.Linear(1, 1),`
			`tokenizer,`
			`False,`
			`torch.float32,`
			`torch.device("cpu"),`
fix(server): fix decode token (#334) Fixes #333 --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> 2023-05-16 15:23:27 -06:00			`)`
			`return model`


			`@pytest.mark.private`
			`def test_decode_streaming_english_spaces():`
			`model = get_test_model()`
			`truth = "Hello here, this is a simple test"`
			`all_input_ids = [15043, 1244, 29892, 445, 338, 263, 2560, 1243]`
			`assert (`
			`all_input_ids == model.tokenizer(truth, add_special_tokens=False)["input_ids"]`
			`)`

			`decoded_text = ""`
			`offset = 0`
			`token_offset = 0`
			`for i in range(len(all_input_ids)):`
			`text, offset, token_offset = model.decode_token(`
			`all_input_ids[: i + 1], offset, token_offset`
			`)`
			`decoded_text += text`

			`assert decoded_text == truth`


			`@pytest.mark.private`
			`def test_decode_streaming_chinese_utf8():`
			`model = get_test_model()`
			`truth = "我很感谢你的热情"`
			`all_input_ids = [`
			`30672,`
			`232,`
			`193,`
			`139,`
			`233,`
			`135,`
			`162,`
			`235,`
			`179,`
			`165,`
			`30919,`
			`30210,`
			`234,`
			`134,`
			`176,`
			`30993,`
			`]`

			`decoded_text = ""`
			`offset = 0`
			`token_offset = 0`
			`for i in range(len(all_input_ids)):`
			`text, offset, token_offset = model.decode_token(`
			`all_input_ids[: i + 1], offset, token_offset`
			`)`
			`decoded_text += text`

			`assert decoded_text == truth`