diff --git a/.github/workflows/load_test.yaml b/.github/workflows/load_test.yaml index de900f26..d50c070f 100644 --- a/.github/workflows/load_test.yaml +++ b/.github/workflows/load_test.yaml @@ -20,6 +20,8 @@ env: jobs: load-tests: + permissions: + issues: write concurrency: group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true diff --git a/load_tests/Makefile b/load_tests/Makefile index 19c8d440..b89ece93 100644 --- a/load_tests/Makefile +++ b/load_tests/Makefile @@ -7,7 +7,7 @@ download-dataset: echo "Dataset already downloaded"; \ fi -load-test: export PATH = $(shell pwd)/.bin/:$(PATH) +load-test: export PATH := $(shell pwd)/.bin/:$(PATH) load-test: download-dataset poetry install && poetry run python load_test.py diff --git a/load_tests/benchmarks/engine.py b/load_tests/benchmarks/engine.py index d3d9715f..615bfb72 100644 --- a/load_tests/benchmarks/engine.py +++ b/load_tests/benchmarks/engine.py @@ -83,7 +83,7 @@ class TGIDockerRunner(InferenceEngineRunner): volumes[v[0]] = {"bind": v[1], "mode": "rw"} self.container = run_docker(self.image, params, "Connected", - "Error", + "ERROR", volumes=volumes) def stop(self): diff --git a/load_tests/benchmarks/k6.py b/load_tests/benchmarks/k6.py index 4464391c..a79a2548 100644 --- a/load_tests/benchmarks/k6.py +++ b/load_tests/benchmarks/k6.py @@ -6,17 +6,11 @@ from enum import Enum from typing import Any, Dict, List import numpy as np -from jinja2 import Environment, PackageLoader, select_autoescape from loguru import logger from transformers import LlamaTokenizerFast from benchmarks.utils import kill -env = Environment( - loader=PackageLoader("benchmarks"), - autoescape=select_autoescape() -) - class ExecutorInputType(Enum): CONSTANT_TOKENS = "constant_tokens" @@ -148,8 +142,8 @@ class K6Benchmark: env_vars = [] for key, val in self.k6_config.executor.variables.items(): env_vars += ["-e", f"{key.upper()}={val}"] - env_vars += ["-e", f"MAX_NEW_TOKENS={self.k6_config.executor.variables['max_new_tokens']}"] env_vars += ["-e", f"INPUT_FILENAME={self.k6_config.executor.input_filename}"] + env_vars += ["-e", f"TEST_EXECUTOR={self.k6_config.executor.name}"] args = ["k6", "run", "--out", "json=results.json"] + env_vars + ["main.js"] logger.info(f"Running k6 with parameters: {args}") logger.info(f"K6Config is: {self.k6_config}") diff --git a/load_tests/benchmarks/main.js b/load_tests/main.js similarity index 96% rename from load_tests/benchmarks/main.js rename to load_tests/main.js index 78361568..9d24b9b0 100644 --- a/load_tests/benchmarks/main.js +++ b/load_tests/main.js @@ -23,7 +23,6 @@ const max_new_tokens = parseInt(__ENV.MAX_NEW_TOKENS) const input_filename = __ENV.INPUT_FILENAME; if (input_filename === undefined) { throw new Error('INPUT_FILENAME must be defined'); - } const shareGPT = JSON.parse(open(input_filename)) @@ -138,17 +137,17 @@ export default function run() { } export function get_options() { - const test_type = __ENV.TEST_TYPE; - if (test_type === undefined) { - throw new Error('TEST_TYPE must be defined'); + const test_executor = __ENV.TEST_EXECUTOR; + if (test_executor === undefined) { + throw new Error('TEST_EXECUTOR must be defined'); } - switch (test_type) { + switch (test_executor) { case 'constant_arrival_rate': return get_constant_arrival_rate_options(); case 'constant_vus': return get_constant_vus_options(); default: - throw new Error('Invalid test type'); + throw new Error('Invalid test executor'); } }