feat: simple mistral lora integration tests (#2180)

* feat: simple mistral lora integration tests

* fix: include args in docker launcher

* fix: disable cuda graphs with lora and warn

* fix: adjust docs and precommit issues

* fix: re update docs
This commit is contained in:
drbh 2024-07-15 09:16:15 -04:00 committed by GitHub
parent dbb23fbfa8
commit 5a65066922
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 967 additions and 0 deletions

View File

@ -333,6 +333,8 @@ def launcher(event_loop):
max_input_length: Optional[int] = None, max_input_length: Optional[int] = None,
max_batch_prefill_tokens: Optional[int] = None, max_batch_prefill_tokens: Optional[int] = None,
max_total_tokens: Optional[int] = None, max_total_tokens: Optional[int] = None,
lora_adapters: Optional[List[str]] = None,
cuda_graphs: Optional[List[int]] = None,
): ):
port = random.randint(8000, 10_000) port = random.randint(8000, 10_000)
master_port = random.randint(10_000, 20_000) master_port = random.randint(10_000, 20_000)
@ -379,6 +381,14 @@ def launcher(event_loop):
if max_total_tokens: if max_total_tokens:
args.append("--max-total-tokens") args.append("--max-total-tokens")
args.append(str(max_total_tokens)) args.append(str(max_total_tokens))
if lora_adapters:
args.append("--lora-adapters")
args.append(",".join(lora_adapters))
if cuda_graphs:
args.append("--cuda-graphs")
args.append(",".join(map(str, cuda_graphs)))
print(" ".join(args), file=sys.stderr)
env["LOG_LEVEL"] = "info,text_generation_router=debug" env["LOG_LEVEL"] = "info,text_generation_router=debug"
@ -418,6 +428,8 @@ def launcher(event_loop):
max_input_length: Optional[int] = None, max_input_length: Optional[int] = None,
max_batch_prefill_tokens: Optional[int] = None, max_batch_prefill_tokens: Optional[int] = None,
max_total_tokens: Optional[int] = None, max_total_tokens: Optional[int] = None,
lora_adapters: Optional[List[str]] = None,
cuda_graphs: Optional[List[int]] = None,
): ):
port = random.randint(8000, 10_000) port = random.randint(8000, 10_000)
@ -447,6 +459,12 @@ def launcher(event_loop):
if max_total_tokens: if max_total_tokens:
args.append("--max-total-tokens") args.append("--max-total-tokens")
args.append(str(max_total_tokens)) args.append(str(max_total_tokens))
if lora_adapters:
args.append("--lora-adapters")
args.append(",".join(lora_adapters))
if cuda_graphs:
args.append("--cuda-graphs")
args.append(",".join(map(str, cuda_graphs)))
client = docker.from_env() client = docker.from_env()

View File

@ -0,0 +1,251 @@
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.27416992,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.17016602,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -2.7109375,
"special": false,
"text": "I"
},
{
"id": 28809,
"logprob": -1.5,
"special": false,
"text": ""
},
{
"id": 28719,
"logprob": -0.34204102,
"special": false,
"text": "m"
},
{
"id": 459,
"logprob": -1.6914062,
"special": false,
"text": " not"
},
{
"id": 1864,
"logprob": -0.69140625,
"special": false,
"text": " sure"
},
{
"id": 513,
"logprob": -1.6171875,
"special": false,
"text": " if"
},
{
"id": 315,
"logprob": -1.3837891,
"special": false,
"text": " I"
},
{
"id": 541,
"logprob": -1.2226562,
"special": false,
"text": " can"
},
{
"id": 1567,
"logprob": -1.8652344,
"special": false,
"text": " come"
},
{
"id": 582,
"logprob": -0.0070228577,
"special": false,
"text": " up"
},
{
"id": 395,
"logprob": -0.0054092407,
"special": false,
"text": " with"
},
{
"id": 28705,
"logprob": -0.62597656,
"special": false,
"text": " "
},
{
"id": 28770,
"logprob": -0.0035572052,
"special": false,
"text": "3"
},
{
"id": 4842,
"logprob": -0.93603516,
"special": false,
"text": " unique"
},
{
"id": 3085,
"logprob": -0.028411865,
"special": false,
"text": " words"
},
{
"id": 369,
"logprob": -1.0400391,
"special": false,
"text": " that"
},
{
"id": 6685,
"logprob": -0.09710693,
"special": false,
"text": " describe"
},
{
"id": 528,
"logprob": -0.066467285,
"special": false,
"text": " me"
},
{
"id": 28725,
"logprob": -1.0722656,
"special": false,
"text": ","
},
{
"id": 562,
"logprob": -0.33422852,
"special": false,
"text": " but"
},
{
"id": 315,
"logprob": -0.5136719,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -0.8989258,
"special": false,
"text": ""
},
{
"id": 584,
"logprob": -0.2076416,
"special": false,
"text": "ll"
},
{
"id": 1464,
"logprob": -0.8808594,
"special": false,
"text": " try"
},
{
"id": 28723,
"logprob": -0.88427734,
"special": false,
"text": "."
},
{
"id": 13,
"logprob": -0.91064453,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.08105469,
"special": false,
"text": "\n"
},
{
"id": 28740,
"logprob": -1.8486328,
"special": false,
"text": "1"
},
{
"id": 28723,
"logprob": -0.111572266,
"special": false,
"text": "."
},
{
"id": 23626,
"logprob": -3.15625,
"special": false,
"text": " Creative"
},
{
"id": 13,
"logprob": -0.9194336,
"special": false,
"text": "\n"
},
{
"id": 28750,
"logprob": -0.24841309,
"special": false,
"text": "2"
},
{
"id": 28723,
"logprob": -9.393692e-05,
"special": false,
"text": "."
},
{
"id": 6785,
"logprob": -3.1386719,
"special": false,
"text": " Fun"
},
{
"id": 1780,
"logprob": -0.53564453,
"special": false,
"text": "ny"
},
{
"id": 13,
"logprob": -0.09033203,
"special": false,
"text": "\n"
},
{
"id": 28770,
"logprob": -0.00466156,
"special": false,
"text": "3"
},
{
"id": 28723,
"logprob": -0.00016450882,
"special": false,
"text": "."
}
]
},
"generated_text": "\n\nIm not sure if I can come up with 3 unique words that describe me, but Ill try.\n\n1. Creative\n2. Funny\n3."
}

View File

@ -0,0 +1,53 @@
{
"details": {
"finish_reason": "eos_token",
"generated_tokens": 7,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 1,
"logprob": -0.49658203,
"special": true,
"text": "<s>"
},
{
"id": 28705,
"logprob": -0.0016384125,
"special": false,
"text": " "
},
{
"id": 1,
"logprob": -1.4931641,
"special": true,
"text": "<s>"
},
{
"id": 28705,
"logprob": -0.00075769424,
"special": false,
"text": " "
},
{
"id": 28740,
"logprob": -0.25024414,
"special": false,
"text": "1"
},
{
"id": 28740,
"logprob": -0.2631836,
"special": false,
"text": "1"
},
{
"id": 2,
"logprob": -0.0003285408,
"special": true,
"text": "</s>"
}
]
},
"generated_text": " 11"
}

View File

@ -0,0 +1,251 @@
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.0488281,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.0800781,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -2.1152344,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -1.6748047,
"special": false,
"text": " "
},
{
"id": 28740,
"logprob": -0.097229004,
"special": false,
"text": "1"
},
{
"id": 28723,
"logprob": -0.16467285,
"special": false,
"text": "."
},
{
"id": 7615,
"logprob": -2.2246094,
"special": false,
"text": " News"
},
{
"id": 13,
"logprob": -1.0488281,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.69189453,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.013343811,
"special": false,
"text": " "
},
{
"id": 28750,
"logprob": -0.011230469,
"special": false,
"text": "2"
},
{
"id": 28723,
"logprob": -0.00096845627,
"special": false,
"text": "."
},
{
"id": 21095,
"logprob": -2.5605469,
"special": false,
"text": " Blog"
},
{
"id": 13,
"logprob": -0.19458008,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.031280518,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.0030708313,
"special": false,
"text": " "
},
{
"id": 28770,
"logprob": -0.0029277802,
"special": false,
"text": "3"
},
{
"id": 28723,
"logprob": -0.0012350082,
"special": false,
"text": "."
},
{
"id": 20108,
"logprob": -2.1582031,
"special": false,
"text": " Article"
},
{
"id": 13,
"logprob": -0.05810547,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.35083008,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.034332275,
"special": false,
"text": " "
},
{
"id": 28781,
"logprob": -0.009666443,
"special": false,
"text": "4"
},
{
"id": 28723,
"logprob": -0.0013113022,
"special": false,
"text": "."
},
{
"id": 8349,
"logprob": -2.6191406,
"special": false,
"text": " Review"
},
{
"id": 13,
"logprob": -0.04031372,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.45239258,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.045410156,
"special": false,
"text": " "
},
{
"id": 28782,
"logprob": -0.0041236877,
"special": false,
"text": "5"
},
{
"id": 28723,
"logprob": -0.0010223389,
"special": false,
"text": "."
},
{
"id": 5299,
"logprob": -2.8066406,
"special": false,
"text": " Other"
},
{
"id": 13,
"logprob": -0.12054443,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.44580078,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.4921875,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.3574219,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.0039062,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.5859375,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.43481445,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.2783203,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.20410156,
"special": false,
"text": "\n"
}
]
},
"generated_text": "\n\n### 1. News\n### 2. Blog\n### 3. Article\n### 4. Review\n### 5. Other\n\n\n\n\n\n\n\n\n"
}

View File

@ -0,0 +1,251 @@
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.31347656,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.27441406,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -2.2285156,
"special": false,
"text": "I"
},
{
"id": 28809,
"logprob": -1.4677734,
"special": false,
"text": ""
},
{
"id": 28719,
"logprob": -0.31762695,
"special": false,
"text": "m"
},
{
"id": 264,
"logprob": -1.6865234,
"special": false,
"text": " a"
},
{
"id": 1215,
"logprob": -3.2695312,
"special": false,
"text": " very"
},
{
"id": 20640,
"logprob": -3.1230469,
"special": false,
"text": " passionate"
},
{
"id": 1338,
"logprob": -0.48339844,
"special": false,
"text": " person"
},
{
"id": 28723,
"logprob": -0.9970703,
"special": false,
"text": "."
},
{
"id": 315,
"logprob": -0.5498047,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -1.1923828,
"special": false,
"text": ""
},
{
"id": 28719,
"logprob": -0.080444336,
"special": false,
"text": "m"
},
{
"id": 1215,
"logprob": -1.8271484,
"special": false,
"text": " very"
},
{
"id": 12215,
"logprob": -2.8847656,
"special": false,
"text": " driven"
},
{
"id": 28723,
"logprob": -1.0927734,
"special": false,
"text": "."
},
{
"id": 315,
"logprob": -0.4584961,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -0.5019531,
"special": false,
"text": ""
},
{
"id": 28719,
"logprob": -0.030715942,
"special": false,
"text": "m"
},
{
"id": 1215,
"logprob": -0.96972656,
"special": false,
"text": " very"
},
{
"id": 7798,
"logprob": -2.8847656,
"special": false,
"text": " determined"
},
{
"id": 28723,
"logprob": -0.27319336,
"special": false,
"text": "."
},
{
"id": 13,
"logprob": -0.56396484,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.011016846,
"special": false,
"text": "\n"
},
{
"id": 3195,
"logprob": -0.7163086,
"special": false,
"text": "What"
},
{
"id": 349,
"logprob": -1.1611328,
"special": false,
"text": " is"
},
{
"id": 574,
"logprob": -0.515625,
"special": false,
"text": " your"
},
{
"id": 6656,
"logprob": -1.0253906,
"special": false,
"text": " favorite"
},
{
"id": 1970,
"logprob": -2.1738281,
"special": false,
"text": " thing"
},
{
"id": 684,
"logprob": -0.48364258,
"special": false,
"text": " about"
},
{
"id": 1250,
"logprob": -1.8876953,
"special": false,
"text": " being"
},
{
"id": 264,
"logprob": -0.41967773,
"special": false,
"text": " a"
},
{
"id": 8626,
"logprob": -2.9160156,
"special": false,
"text": " teacher"
},
{
"id": 28804,
"logprob": -0.11920166,
"special": false,
"text": "?"
},
{
"id": 13,
"logprob": -0.023727417,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.010848999,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -1.0566406,
"special": false,
"text": "I"
},
{
"id": 2016,
"logprob": -0.7163086,
"special": false,
"text": " love"
},
{
"id": 272,
"logprob": -1.9169922,
"special": false,
"text": " the"
},
{
"id": 1639,
"logprob": -2.03125,
"special": false,
"text": " fact"
}
]
},
"generated_text": "\n\nIm a very passionate person. Im very driven. Im very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact"
}

View File

@ -0,0 +1,134 @@
import pytest
import requests
@pytest.fixture(scope="module")
def lora_mistral_handle(launcher):
with launcher(
"mistralai/Mistral-7B-v0.1",
lora_adapters=[
"predibase/dbpedia",
"predibase/customer_support",
],
cuda_graphs=[0],
) as handle:
yield handle
@pytest.fixture(scope="module")
async def lora_mistral(lora_mistral_handle):
await lora_mistral_handle.health(300)
return lora_mistral_handle.client
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral(lora_mistral, response_snapshot):
response = await lora_mistral.generate(
"Test request", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
classification_prompt = """You are given the title and the body of an article below. Please determine the type of the article.\n### Title: Great White Whale\n\n### Body: Great White Whale is the debut album by the Canadian rock band Secret and Whisper. The album was in the works for about a year and was released on February 12 2008. A music video was shot in Pittsburgh for the album's first single XOXOXO. The album reached number 17 on iTunes's top 100 albums in its first week on sale.\n\n### Article Type:"""
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral_without_adapter(lora_mistral, response_snapshot):
response = requests.post(
f"{lora_mistral.base_url}/generate",
headers=lora_mistral.headers,
json={
"inputs": classification_prompt,
"parameters": {
"max_new_tokens": 40,
"details": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert (
data["generated_text"]
== "\n\n### 1. News\n### 2. Blog\n### 3. Article\n### 4. Review\n### 5. Other\n\n\n\n\n\n\n\n\n"
)
assert data == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral_with_dbpedia_adapter(lora_mistral, response_snapshot):
response = requests.post(
f"{lora_mistral.base_url}/generate",
headers=lora_mistral.headers,
json={
"inputs": classification_prompt,
"parameters": {
"max_new_tokens": 40,
"adapter_id": "predibase/dbpedia",
"details": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert data["generated_text"] == " 11"
assert data == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral_with_customer_support_adapter(
lora_mistral, response_snapshot
):
print(lora_mistral.base_url)
print(lora_mistral.headers)
response = requests.post(
f"{lora_mistral.base_url}/generate",
headers=lora_mistral.headers,
json={
"inputs": "What are 3 unique words that describe you?",
"parameters": {
"max_new_tokens": 40,
"adapter_id": "predibase/customer_support",
"details": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert (
data["generated_text"]
== "\n\nIm not sure if I can come up with 3 unique words that describe me, but Ill try.\n\n1. Creative\n2. Funny\n3."
)
assert data == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_lora_mistral_without_customer_support_adapter(
lora_mistral, response_snapshot
):
response = requests.post(
f"{lora_mistral.base_url}/generate",
headers=lora_mistral.headers,
json={
"inputs": "What are 3 unique words that describe you?",
"parameters": {
"max_new_tokens": 40,
"details": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert (
data["generated_text"]
== "\n\nIm a very passionate person. Im very driven. Im very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact"
)
assert data == response_snapshot

View File

@ -91,6 +91,15 @@ def serve(
f"LoRA adapters are enabled. This is an experimental feature and may not work as expected." f"LoRA adapters are enabled. This is an experimental feature and may not work as expected."
) )
# TODO: enable lora with cuda graphs. for now disable cuda graphs if lora is enabled
# and warn the user
if len(lora_adapter_ids) > 0 and os.getenv("CUDA_GRAPHS", None) is not None:
logger.warning(
f"LoRa adapter are not supported with CUDA Graphs. Disabling CUDA Graphs."
)
global CUDA_GRAPHS
CUDA_GRAPHS = None
# Downgrade enum into str for easier management later on # Downgrade enum into str for easier management later on
quantize = None if quantize is None else quantize.value quantize = None if quantize is None else quantize.value
dtype = None if dtype is None else dtype.value dtype = None if dtype is None else dtype.value