More tests.

This commit is contained in:
Nicolas Patry 2024-12-03 19:05:36 +01:00
parent 1352f70847
commit 13e6d522b7
No known key found for this signature in database
GPG Key ID: D2920555C90F704C
3 changed files with 135 additions and 0 deletions

94
load_tests/long.js Normal file
View File

@ -0,0 +1,94 @@
import { check } from 'k6';
import { scenario } from 'k6/execution';
import http from 'k6/http';
import { Trend, Counter } from 'k6/metrics';
const host = __ENV.HOST;
const model_id = __ENV.MODEL_ID;
const timePerToken = new Trend('time_per_token', true);
const tokens = new Counter('tokens');
const new_tokens = new Counter('new_tokens');
const input_tokens = new Counter('input_tokens');
const max_new_tokens = 50;
// const shareGPT = JSON.parse(open("ShareGPT_V3_unfiltered_cleaned_split.json"))
const shareGPT = JSON.parse(open("long.json"))
export function get_options() {
return {
thresholds: {
http_req_failed: ['rate==0'],
// time_per_token: [{
// threshold: `p(50)<${5 * reference_latency_ms}`,
// abortOnFail: true,
// delayAbortEval: '10s'
// }],
},
scenarios: {
// single_user: {
// executor: 'constant-arrival-rate',
// duration: '60s',
// preAllocatedVUs: 1,
// rate: 20,
// timeUnit: '1s',
// },
// load_test: {
// executor: 'constant-arrival-rate',
// duration: '60s',
// preAllocatedVUs: 100,
// rate: 1,
// timeUnit: '1s',
// },
// breakpoint: {
// executor: 'ramping-arrival-rate', //Assure load increase if the system slows
// preAllocatedVUs: 300,
// stages: [
// { duration: '60s', target: 100 }, // just slowly ramp-up to a HUGE load
// ],
// },
throughput: {
executor: 'shared-iterations',
vus: 10,
iterations: 10,
maxDuration: '120s',
},
},
};
}
function generate_payload(gpt, max_new_tokens) {
const input = gpt["conversations"][0]["value"];
return { "messages": [{ "role": "user", "content": input }], "temperature": 0, "model": `${model_id}`, "max_tokens": max_new_tokens }
}
export const options = get_options();
export default function run() {
const headers = { 'Content-Type': 'application/json' };
const query = shareGPT[scenario.iterationInTest % shareGPT.length];
const payload = JSON.stringify(generate_payload(query, max_new_tokens));
const res = http.post(`http://${host}/v1/chat/completions`, payload, {
headers,
});
if (res.status >= 400 && res.status < 500) {
return;
}
check(res, {
'Post status is 200': (res) => res.status === 200,
});
const duration = res.timings.duration;
if (res.status === 200) {
const body = res.json();
const completion_tokens = body.usage.completion_tokens;
const latency_ms_per_token = duration / completion_tokens;
timePerToken.add(latency_ms_per_token);
const prompt_tokens = body.usage.prompt_tokens;
input_tokens.add(prompt_tokens);
new_tokens.add(completion_tokens);
tokens.add(completion_tokens + prompt_tokens);
}
}

19
load_tests/long.py Normal file
View File

@ -0,0 +1,19 @@
import datasets
import json
dataset = datasets.load_dataset("ccdv/govreport-summarization")
max_new_tokens = 50
conversations = []
for i, item in enumerate(dataset["test"]):
report = item["report"]
messages = [{"from": "human", "value": f"Summarize this report: ```{report}```"}]
conversations.append({"conversations": messages})
with open("long.json", "w") as f:
json.dump(conversations, f, indent=4)

View File

@ -0,0 +1,22 @@
# https://www.gutenberg.org/cache/epub/103/pg103.txt
from openai import OpenAI
import os
import requests
if not os.path.exists("pg103.txt"):
response = requests.get("https://www.gutenberg.org/cache/epub/103/pg103.txt")
with open("pg103.txt", "w") as f:
f.write(response.text)
length = 130000
with open("pg103.txt", "r") as f:
data = f.read()
messages = [{"role": "user", "content": data[: length * 4]}]
client = OpenAI(base_url="http://localhost:8000/v1", api_key="w")
completion = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct", messages=messages, max_tokens=2
)