hf_text-generation-inference/integration-tests/models/test_grammar_llama.py

import pytest
import json

from text_generation.types import GrammarType


@pytest.fixture(scope="module")
def non_flash_llama_grammar_handle(launcher):
    with launcher(
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        num_shard=1,
        disable_grammar_support=False,
        use_flash_attention=False,
    ) as handle:
        yield handle


@pytest.fixture(scope="module")
async def non_flash_llama_grammar(non_flash_llama_grammar_handle):
    await non_flash_llama_grammar_handle.health(300)
    return non_flash_llama_grammar_handle.client


@pytest.mark.release
@pytest.mark.skip
@pytest.mark.asyncio
async def test_non_flash_llama_grammar_json(non_flash_llama_grammar, response_snapshot):
    response = await non_flash_llama_grammar.generate(
        "info: david holtz like trees and has two cats. ",
        max_new_tokens=100,
        decoder_input_details=True,
        seed=0,
        grammar={
            "type": GrammarType.Json,
            "value": json.dumps(
                {
                    "type": "object",
                    "$id": "https://example.com/person.schema.json",
                    "$schema": "https://json-schema.org/draft/2020-12/schema",
                    "title": "Person",
                    "properties": {
                        "firstName": {
                            "type": "string",
                            "description": "The person'''s first name.",
                        },
                        "lastName": {
                            "type": "string",
                            "description": "The person'''s last name.",
                        },
                        "hobby": {
                            "description": "The person'''s hobby.",
                            "type": "string",
                        },
                        "numCats": {
                            "description": "The number of cats the person has.",
                            "type": "integer",
                            "minimum": 0,
                        },
                    },
                    "required": ["firstName", "lastName", "hobby", "numCats"],
                }
            ),
        },
    )

    assert response.details.generated_tokens == 30
    assert (
        response.generated_text
        == '{"firstName":"David","hobby":"Trees","lastName":"Holtz","numCats":2}'
    )
    assert response == response_snapshot
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00			`import pytest`
			`import json`

			`from text_generation.types import GrammarType`


			`@pytest.fixture(scope="module")`
fix: correctly index into mask when applying grammar (#1618) This PR fixes how the grammar mask is index when generating text and adds a new test to ensure the grammars work with non flash models 2024-03-01 10:22:01 -07:00			`def non_flash_llama_grammar_handle(launcher):`
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00			`with launcher(`
fix: correctly index into mask when applying grammar (#1618) This PR fixes how the grammar mask is index when generating text and adds a new test to ensure the grammars work with non flash models 2024-03-01 10:22:01 -07:00			`"TinyLlama/TinyLlama-1.1B-Chat-v1.0",`
			`num_shard=1,`
			`disable_grammar_support=False,`
			`use_flash_attention=False,`
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00			`) as handle:`
			`yield handle`


			`@pytest.fixture(scope="module")`
fix: correctly index into mask when applying grammar (#1618) This PR fixes how the grammar mask is index when generating text and adds a new test to ensure the grammars work with non flash models 2024-03-01 10:22:01 -07:00			`async def non_flash_llama_grammar(non_flash_llama_grammar_handle):`
			`await non_flash_llama_grammar_handle.health(300)`
			`return non_flash_llama_grammar_handle.client`
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00

Add pytest release marker (#2114) * Add pytest release marker Annotate a test with `@pytest.mark.release` and it only gets run with `pytest integration-tests --release`. * Mark many models as `release` to speed up CI 2024-06-25 08:53:20 -06:00			`@pytest.mark.release`
fix: correctly index into mask when applying grammar (#1618) This PR fixes how the grammar mask is index when generating text and adds a new test to ensure the grammars work with non flash models 2024-03-01 10:22:01 -07:00			`@pytest.mark.skip`
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00			`@pytest.mark.asyncio`
fix: correctly index into mask when applying grammar (#1618) This PR fixes how the grammar mask is index when generating text and adds a new test to ensure the grammars work with non flash models 2024-03-01 10:22:01 -07:00			`async def test_non_flash_llama_grammar_json(non_flash_llama_grammar, response_snapshot):`
			`response = await non_flash_llama_grammar.generate(`
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00			`"info: david holtz like trees and has two cats. ",`
			`max_new_tokens=100,`
			`decoder_input_details=True,`
			`seed=0,`
			`grammar={`
fix: correctly index into mask when applying grammar (#1618) This PR fixes how the grammar mask is index when generating text and adds a new test to ensure the grammars work with non flash models 2024-03-01 10:22:01 -07:00			`"type": GrammarType.Json,`
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00			`"value": json.dumps(`
			`{`
			`"type": "object",`
			`"$id": "https://example.com/person.schema.json",`
			`"$schema": "https://json-schema.org/draft/2020-12/schema",`
			`"title": "Person",`
			`"properties": {`
			`"firstName": {`
			`"type": "string",`
			`"description": "The person'''s first name.",`
			`},`
			`"lastName": {`
			`"type": "string",`
			`"description": "The person'''s last name.",`
			`},`
			`"hobby": {`
			`"description": "The person'''s hobby.",`
			`"type": "string",`
			`},`
			`"numCats": {`
			`"description": "The number of cats the person has.",`
			`"type": "integer",`
			`"minimum": 0,`
			`},`
			`},`
			`"required": ["firstName", "lastName", "hobby", "numCats"],`
			`}`
			`),`
			`},`
			`)`

			`assert response.details.generated_tokens == 30`
			`assert (`
			`response.generated_text`
fix(router): fix openapi and add jsonschema validation (#1578) 2024-02-21 03:05:32 -07:00			`== '{"firstName":"David","hobby":"Trees","lastName":"Holtz","numCats":2}'`
Outlines guided generation (#1539) This WIP PR starts to add grammar support via outlines, currently this PR supports very simple regex grammars and does not optimize for precompiling or caching grammar fsm's. todo: - [X] add simple outlines guidance to `NextTokenChooser` - [X] update protos for grammar - [X] update generation params API - [X] constrain simple grammar - [ ] support parsing more complex grammar into fsm - [ ] support all outline support grammar types - [ ] explore optimizations to avoid recompiling grammars guided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data-raw '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6, "grammar": "[\\w-]+@([\\w-]+\\.)+[\\w-]+" } }' \| jq ``` response ```json { "generated_text": "david@example.com" } ``` unguided request ```bash curl -s 'http://localhost:3000/generate' \ --header 'Content-Type: application/json' \ --data '{ "inputs": "make an email for david: \n", "parameters": { "max_new_tokens": 6 } }' \| jq ``` response ```json { "generated_text": " email = 'david" } ``` 2024-02-15 02:28:10 -07:00			`)`
			`assert response == response_snapshot`