63 lines
1.8 KiB
JavaScript
63 lines
1.8 KiB
JavaScript
import {check} from 'k6';
|
|
import http from 'k6/http';
|
|
import {Trend} from 'k6/metrics';
|
|
|
|
const host = __ENV.HOST || '127.0.0.1:3000';
|
|
|
|
const totalTime = new Trend('total_time', true);
|
|
const validationTime = new Trend('validation_time', true);
|
|
const queueTime = new Trend('queue_time', true);
|
|
const inferenceTime = new Trend('inference_time', true);
|
|
const timePerToken = new Trend('time_per_token', true);
|
|
|
|
const example = {
|
|
payload: JSON.stringify({
|
|
inputs: '# This is a fibonacci function written in the Python programming language.' +
|
|
'def fibonacci',
|
|
parameters: {
|
|
details: true,
|
|
max_new_tokens: 60,
|
|
temperature: 0.2,
|
|
top_p: 0.95,
|
|
seed: 0,
|
|
},
|
|
}),
|
|
generated_tokens: 60
|
|
};
|
|
|
|
export const options = {
|
|
thresholds: {
|
|
http_req_failed: ['rate==0'],
|
|
time_per_token: ['p(95)<90'],
|
|
queue_time: ['p(95)<1500'],
|
|
},
|
|
scenarios: {
|
|
load_test: {
|
|
executor: 'constant-arrival-rate',
|
|
duration: '60s',
|
|
preAllocatedVUs: 100,
|
|
rate: 10,
|
|
timeUnit: '1s',
|
|
},
|
|
},
|
|
};
|
|
|
|
export default function () {
|
|
const headers = {'Content-Type': 'application/json'};
|
|
const res = http.post(`http://${host}/generate`, example.payload, {
|
|
headers,
|
|
});
|
|
|
|
check(res, {
|
|
'Post status is 200': (r) => res.status === 200,
|
|
'Post response generated tokens': (r) => res.status === 200 && res.json().details.generated_tokens === example.generated_tokens,
|
|
});
|
|
|
|
if (res.status === 200) {
|
|
totalTime.add(res.headers["X-Total-Time"]);
|
|
validationTime.add(res.headers["X-Validation-Time"]);
|
|
queueTime.add(res.headers["X-Queue-Time"]);
|
|
inferenceTime.add(res.headers["X-Inference-Time"]);
|
|
timePerToken.add(res.headers["X-Time-Per-Token"]);
|
|
}
|
|
} |